Package org.jsoup.parser
Class Tokeniser
java.lang.Object
org.jsoup.parser.Tokeniser
Readers the input stream into tokens.
-
Field Summary
FieldsModifier and TypeFieldDescription(package private) final Token.Character
private final StringBuilder
private String
private int
private final int[]
(package private) final Token.Comment
(package private) final StringBuilder
(package private) final Token.Doctype
private Token
(package private) final Token.EndTag
private final ParseErrorList
private boolean
private String
private String
private int
private final int[]
private static final char[]
private final CharacterReader
(package private) static final char
(package private) final Token.StartTag
private TokeniserState
(package private) Token.Tag
private static final int
(package private) static final int[]
(package private) static final int
-
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescription(package private) void
advanceTransition
(TokeniserState newState) (package private) String
(package private) String
Returns the closer sequence</lastStart
private void
characterReferenceError
(String message, Object... args) (package private) int[]
consumeCharacterReference
(Character additionalAllowedCharacter, boolean inAttribute) (package private) void
(package private) void
(package private) void
(package private) Token.Tag
createTagPending
(boolean start) (package private) void
(package private) static boolean
(package private) void
emit
(char c) (package private) void
emit
(char[] chars) (package private) void
emit
(int[] codepoints) (package private) void
(package private) void
emit
(StringBuilder str) (package private) void
(package private) void
(package private) void
(package private) void
(package private) void
eofError
(TokeniserState state) (package private) void
(package private) void
(package private) void
error
(TokeniserState state) (package private) TokeniserState
getState()
(package private) boolean
(package private) Token
read()
(package private) void
transition
(TokeniserState newState) (package private) String
unescapeEntities
(boolean inAttribute) Utility method to consume reader and unescape entities found within.
-
Field Details
-
replacementChar
static final char replacementChar- See Also:
-
notCharRefCharsSorted
private static final char[] notCharRefCharsSorted -
win1252ExtensionsStart
static final int win1252ExtensionsStart- See Also:
-
win1252Extensions
static final int[] win1252Extensions -
reader
-
errors
-
state
-
emitPending
-
isEmitPending
private boolean isEmitPending -
charsString
-
charsBuilder
-
dataBuffer
-
startPending
-
endPending
-
tagPending
Token.Tag tagPending -
charPending
-
doctypePending
-
commentPending
-
lastStartTag
-
lastStartCloseSeq
-
Unset
private static final int Unset- See Also:
-
markupStartPos
private int markupStartPos -
charStartPos
private int charStartPos -
codepointHolder
private final int[] codepointHolder -
multipointHolder
private final int[] multipointHolder
-
-
Constructor Details
-
Tokeniser
Tokeniser(TreeBuilder treeBuilder)
-
-
Method Details
-
read
Token read() -
emit
-
emit
-
emit
-
emit
void emit(char c) -
emit
void emit(char[] chars) -
emit
void emit(int[] codepoints) -
getState
TokeniserState getState() -
transition
-
advanceTransition
-
consumeCharacterReference
-
createTagPending
-
emitTagPending
void emitTagPending() -
createCommentPending
void createCommentPending() -
emitCommentPending
void emitCommentPending() -
createBogusCommentPending
void createBogusCommentPending() -
createDoctypePending
void createDoctypePending() -
emitDoctypePending
void emitDoctypePending() -
createTempBuffer
void createTempBuffer() -
isAppropriateEndTagToken
boolean isAppropriateEndTagToken() -
appropriateEndTagName
String appropriateEndTagName() -
appropriateEndTagSeq
String appropriateEndTagSeq()Returns the closer sequence</lastStart
-
error
-
eofError
-
characterReferenceError
-
error
-
error
-
currentNodeInHtmlNS
static boolean currentNodeInHtmlNS() -
unescapeEntities
Utility method to consume reader and unescape entities found within.- Parameters:
inAttribute
- if the text to be unescaped is in an attribute- Returns:
- unescaped string from reader
-