public class Tokenizer extends Object implements Locator
Locator interface. This is not an
incidental implementation detail: Users of this class are encouraged to make
use of the Locator nature.
By default, the tokenizer may report data that XML 1.0 bans. The tokenizer
can be configured to treat these conditions as fatal or to coerce the infoset
to something that XML 1.0 allows.| Constructor and Description |
|---|
Tokenizer(TokenHandler tokenHandler)
The constructor.
|
Tokenizer(TokenHandler tokenHandler,
boolean newAttributesEachTime) |
| Modifier and Type | Method and Description |
|---|---|
void |
becomeConfident() |
protected char |
checkChar(char[] buf,
int pos) |
void |
end() |
void |
eof() |
void |
err(String message)
Reports a Parse Error.
|
protected void |
errAstralNonCharacter(int ch) |
protected void |
errAttributeValueMissing() |
protected void |
errBadCharAfterLt(char c) |
protected void |
errBadCharBeforeAttributeNameOrNull(char c) |
protected void |
errBogusComment() |
protected void |
errBogusDoctype() |
protected void |
errCharRefLacksSemicolon() |
protected void |
errConsecutiveHyphens() |
protected void |
errDuplicateAttribute() |
protected void |
errEofAfterLt() |
protected void |
errEofInAttributeName() |
protected void |
errEofInAttributeValue() |
protected void |
errEofInComment() |
protected void |
errEofInDoctype() |
protected void |
errEofInEndTag() |
protected void |
errEofInPublicId() |
protected void |
errEofInSystemId() |
protected void |
errEofInTagName() |
protected void |
errEofWithoutGt() |
protected void |
errEqualsSignBeforeAttributeName() |
protected void |
errExpectedPublicId() |
protected void |
errExpectedSystemId() |
protected void |
errGarbageAfterLtSlash() |
protected void |
errGtInPublicId() |
protected void |
errGtInSystemId() |
protected void |
errHtml4LtSlashInRcdata(char folded) |
protected void |
errHtml4NonNameInUnquotedAttribute(char c) |
protected void |
errHtml4XmlVoidSyntax() |
protected void |
errHyphenHyphenBang() |
protected void |
errLtGt() |
protected void |
errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c) |
protected void |
errLtSlashGt() |
protected void |
errMissingSpaceBeforeDoctypeName() |
protected void |
errNamelessDoctype() |
protected void |
errNcrControlChar() |
protected char |
errNcrControlChar(char ch) |
protected void |
errNcrCr() |
protected void |
errNcrInC1Range() |
protected char |
errNcrNonCharacter(char ch) |
protected void |
errNcrOutOfRange() |
protected void |
errNcrSurrogate() |
protected void |
errNcrUnassigned() |
protected void |
errNcrZero() |
protected void |
errNoDigitsInNCR() |
protected void |
errNoNamedCharacterMatch() |
protected void |
errNoSpaceBetweenAttributes() |
protected void |
errNoSpaceBetweenDoctypePublicKeywordAndQuote() |
protected void |
errNoSpaceBetweenDoctypeSystemKeywordAndQuote() |
protected void |
errNoSpaceBetweenPublicAndSystemIds() |
protected void |
errNotSemicolonTerminated() |
protected void |
errPrematureEndOfComment() |
protected void |
errProcessingInstruction() |
protected void |
errQuoteBeforeAttributeName(char c) |
protected void |
errQuoteOrLtInAttributeNameOrNull(char c) |
protected void |
errSlashNotFollowedByGt() |
void |
errTreeBuilder(String message) |
protected void |
errUnescapedAmpersandInterpretedAsCharacterReference() |
protected void |
errUnquotedAttributeValOrNull(char c) |
protected void |
errWarnLtSlashInRcdata() |
void |
fatal(String message)
Reports an condition that would make the infoset incompatible with XML
1.0 as fatal.
|
protected void |
flushChars(char[] buf,
int pos)
Flushes coalesced character tokens.
|
int |
getCol()
Returns the col.
|
int |
getColumnNumber() |
ErrorHandler |
getErrorHandler() |
int |
getLine()
Returns the line.
|
int |
getLineNumber() |
String |
getPublicId() |
String |
getSystemId() |
void |
initializeWithoutStarting() |
void |
initLocation(String newPublicId,
String newSystemId) |
boolean |
internalEncodingDeclaration(String internalCharset) |
boolean |
isAlreadyComplainedAboutNonAscii()
Returns the alreadyComplainedAboutNonAscii.
|
boolean |
isInDataState() |
boolean |
isMappingLangToXmlLang()
Returns the mappingLangToXmlLang.
|
boolean |
isNextCharOnNewLine()
Returns the nextCharOnNewLine.
|
boolean |
isPrevCR() |
void |
loadState(Tokenizer other) |
protected void |
maybeErrAttributesOnEndTag(HtmlAttributes attrs) |
protected void |
maybeErrSlashInEndTag(boolean selfClosing) |
protected void |
maybeWarnPrivateUse(char ch) |
protected void |
maybeWarnPrivateUseAstral() |
protected void |
noteAttributeWithoutValue() |
protected void |
noteUnquotedAttributeValue() |
void |
notifyAboutMetaBoundary() |
void |
requestSuspension() |
void |
resetToDataState() |
void |
setCommentPolicy(XmlViolationPolicy commentPolicy)
Sets the commentPolicy.
|
void |
setContentNonXmlCharPolicy(XmlViolationPolicy contentNonXmlCharPolicy)
Sets the contentNonXmlCharPolicy.
|
void |
setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy)
Sets the contentSpacePolicy.
|
void |
setEncodingDeclarationHandler(EncodingDeclarationHandler encodingDeclarationHandler)
Sets the encodingDeclarationHandler.
|
void |
setErrorHandler(ErrorHandler eh)
Sets the error handler.
|
void |
setHtml4ModeCompatibleWithXhtml1Schemata(boolean html4ModeCompatibleWithXhtml1Schemata)
Sets the html4ModeCompatibleWithXhtml1Schemata.
|
void |
setInterner(Interner interner) |
void |
setLineNumber(int line)
For C++ use only.
|
void |
setMappingLangToXmlLang(boolean mappingLangToXmlLang)
Sets the mappingLangToXmlLang.
|
void |
setNamePolicy(XmlViolationPolicy namePolicy) |
void |
setStateAndEndTagExpectation(int specialTokenizerState,
ElementName endTagExpectation)
Sets the tokenizer state and the associated element name.
|
void |
setStateAndEndTagExpectation(int specialTokenizerState,
String endTagExpectation)
Sets the tokenizer state and the associated element name.
|
void |
setTransitionBaseOffset(int offset)
Sets an offset to be added to the position reported to
TransitionHandler. |
void |
setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy)
Sets the xmlnsPolicy.
|
protected void |
silentCarriageReturn() |
protected void |
silentLineFeed() |
void |
start() |
protected void |
startErrorReporting() |
protected String |
strBufToString()
The smaller buffer as a String.
|
boolean |
tokenizeBuffer(UTF16Buffer buffer) |
protected int |
transition(int from,
int to,
boolean reconsume,
int pos) |
void |
warn(String message)
Reports a warning
|
public static final int DATA
public static final int RCDATA
public static final int SCRIPT_DATA
public static final int RAWTEXT
public static final int SCRIPT_DATA_ESCAPED
public static final int ATTRIBUTE_VALUE_DOUBLE_QUOTED
public static final int ATTRIBUTE_VALUE_SINGLE_QUOTED
public static final int ATTRIBUTE_VALUE_UNQUOTED
public static final int PLAINTEXT
public static final int TAG_OPEN
public static final int CLOSE_TAG_OPEN
public static final int TAG_NAME
public static final int BEFORE_ATTRIBUTE_NAME
public static final int ATTRIBUTE_NAME
public static final int AFTER_ATTRIBUTE_NAME
public static final int BEFORE_ATTRIBUTE_VALUE
public static final int AFTER_ATTRIBUTE_VALUE_QUOTED
public static final int BOGUS_COMMENT
public static final int MARKUP_DECLARATION_OPEN
public static final int DOCTYPE
public static final int BEFORE_DOCTYPE_NAME
public static final int DOCTYPE_NAME
public static final int AFTER_DOCTYPE_NAME
public static final int BEFORE_DOCTYPE_PUBLIC_IDENTIFIER
public static final int DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED
public static final int DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED
public static final int AFTER_DOCTYPE_PUBLIC_IDENTIFIER
public static final int BEFORE_DOCTYPE_SYSTEM_IDENTIFIER
public static final int DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED
public static final int DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED
public static final int AFTER_DOCTYPE_SYSTEM_IDENTIFIER
public static final int BOGUS_DOCTYPE
public static final int COMMENT_START
public static final int COMMENT_START_DASH
public static final int COMMENT
public static final int COMMENT_END_DASH
public static final int COMMENT_END
public static final int COMMENT_END_BANG
public static final int NON_DATA_END_TAG_NAME
public static final int MARKUP_DECLARATION_HYPHEN
public static final int MARKUP_DECLARATION_OCTYPE
public static final int DOCTYPE_UBLIC
public static final int DOCTYPE_YSTEM
public static final int AFTER_DOCTYPE_PUBLIC_KEYWORD
public static final int BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS
public static final int AFTER_DOCTYPE_SYSTEM_KEYWORD
public static final int CONSUME_CHARACTER_REFERENCE
public static final int CONSUME_NCR
public static final int CHARACTER_REFERENCE_TAIL
public static final int HEX_NCR_LOOP
public static final int DECIMAL_NRC_LOOP
public static final int HANDLE_NCR_VALUE
public static final int HANDLE_NCR_VALUE_RECONSUME
public static final int CHARACTER_REFERENCE_HILO_LOOKUP
public static final int SELF_CLOSING_START_TAG
public static final int CDATA_START
public static final int CDATA_SECTION
public static final int CDATA_RSQB
public static final int CDATA_RSQB_RSQB
public static final int SCRIPT_DATA_LESS_THAN_SIGN
public static final int SCRIPT_DATA_ESCAPE_START
public static final int SCRIPT_DATA_ESCAPE_START_DASH
public static final int SCRIPT_DATA_ESCAPED_DASH
public static final int SCRIPT_DATA_ESCAPED_DASH_DASH
public static final int BOGUS_COMMENT_HYPHEN
public static final int RAWTEXT_RCDATA_LESS_THAN_SIGN
public static final int SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN
public static final int SCRIPT_DATA_DOUBLE_ESCAPE_START
public static final int SCRIPT_DATA_DOUBLE_ESCAPED
public static final int SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN
public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH
public static final int SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH
public static final int SCRIPT_DATA_DOUBLE_ESCAPE_END
protected final TokenHandler tokenHandler
protected EncodingDeclarationHandler encodingDeclarationHandler
protected ErrorHandler errorHandler
protected boolean lastCR
protected int stateSave
protected int index
protected int value
protected int cstart
protected ElementName endTagExpectation
protected boolean endTag
true if tokenizing an end tagprotected AttributeName attributeName
protected boolean html4
true when HTML4-specific additional errors are requested.protected boolean confident
protected int currentBufferGlobalOffset
protected LocatorImpl ampersandLocation
public Tokenizer(TokenHandler tokenHandler, boolean newAttributesEachTime)
public Tokenizer(TokenHandler tokenHandler)
tokenHandler - the handler for receiving tokenspublic void setInterner(Interner interner)
public boolean isMappingLangToXmlLang()
public void setMappingLangToXmlLang(boolean mappingLangToXmlLang)
mappingLangToXmlLang - the mappingLangToXmlLang to setpublic void setErrorHandler(ErrorHandler eh)
public ErrorHandler getErrorHandler()
public void setCommentPolicy(XmlViolationPolicy commentPolicy)
commentPolicy - the commentPolicy to setpublic void setContentNonXmlCharPolicy(XmlViolationPolicy contentNonXmlCharPolicy)
contentNonXmlCharPolicy - the contentNonXmlCharPolicy to setpublic void setContentSpacePolicy(XmlViolationPolicy contentSpacePolicy)
contentSpacePolicy - the contentSpacePolicy to setpublic void setXmlnsPolicy(XmlViolationPolicy xmlnsPolicy)
xmlnsPolicy - the xmlnsPolicy to setpublic void setNamePolicy(XmlViolationPolicy namePolicy)
public void setHtml4ModeCompatibleWithXhtml1Schemata(boolean html4ModeCompatibleWithXhtml1Schemata)
html4ModeCompatibleWithXhtml1Schemata - the html4ModeCompatibleWithXhtml1Schemata to setpublic void setStateAndEndTagExpectation(int specialTokenizerState,
String endTagExpectation)
specialTokenizerState - the tokenizer state to setendTagExpectation - the expected end tag for transitioning back to normalpublic void setStateAndEndTagExpectation(int specialTokenizerState,
ElementName endTagExpectation)
specialTokenizerState - the tokenizer state to setendTagExpectation - the expected end tag for transitioning back to normalpublic void setLineNumber(int line)
public int getLineNumber()
getLineNumber in interface LocatorLocator.getLineNumber()public int getColumnNumber()
getColumnNumber in interface LocatorLocator.getColumnNumber()public String getPublicId()
getPublicId in interface LocatorLocator.getPublicId()public String getSystemId()
getSystemId in interface LocatorLocator.getSystemId()public void notifyAboutMetaBoundary()
protected String strBufToString()
C++ memory note: The return value must be released.
protected void flushChars(char[] buf,
int pos)
throws SAXException
buf - TODOpos - TODOSAXExceptionpublic void fatal(String message) throws SAXException
message - the messageSAXExceptionSAXParseExceptionpublic void err(String message) throws SAXException
message - the messageSAXExceptionpublic void errTreeBuilder(String message) throws SAXException
SAXExceptionpublic void warn(String message) throws SAXException
message - the messageSAXExceptionprotected void startErrorReporting()
throws SAXException
SAXExceptionpublic void start()
throws SAXException
SAXExceptionpublic boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException
SAXExceptionprotected int transition(int from,
int to,
boolean reconsume,
int pos)
throws SAXException
SAXExceptionprotected void silentCarriageReturn()
protected void silentLineFeed()
public void eof()
throws SAXException
SAXExceptionprotected char checkChar(char[] buf,
int pos)
throws SAXException
SAXExceptionpublic boolean isAlreadyComplainedAboutNonAscii()
public boolean internalEncodingDeclaration(String internalCharset) throws SAXException
SAXExceptionpublic void end()
throws SAXException
SAXExceptionpublic void requestSuspension()
public void becomeConfident()
public boolean isNextCharOnNewLine()
public boolean isPrevCR()
public int getLine()
public int getCol()
public boolean isInDataState()
public void resetToDataState()
public void loadState(Tokenizer other) throws SAXException
SAXExceptionpublic void initializeWithoutStarting()
throws SAXException
SAXExceptionprotected void errGarbageAfterLtSlash()
throws SAXException
SAXExceptionprotected void errLtSlashGt()
throws SAXException
SAXExceptionprotected void errWarnLtSlashInRcdata()
throws SAXException
SAXExceptionprotected void errHtml4LtSlashInRcdata(char folded)
throws SAXException
SAXExceptionprotected void errCharRefLacksSemicolon()
throws SAXException
SAXExceptionprotected void errNoDigitsInNCR()
throws SAXException
SAXExceptionprotected void errGtInSystemId()
throws SAXException
SAXExceptionprotected void errGtInPublicId()
throws SAXException
SAXExceptionprotected void errNamelessDoctype()
throws SAXException
SAXExceptionprotected void errConsecutiveHyphens()
throws SAXException
SAXExceptionprotected void errPrematureEndOfComment()
throws SAXException
SAXExceptionprotected void errBogusComment()
throws SAXException
SAXExceptionprotected void errUnquotedAttributeValOrNull(char c)
throws SAXException
SAXExceptionprotected void errSlashNotFollowedByGt()
throws SAXException
SAXExceptionprotected void errHtml4XmlVoidSyntax()
throws SAXException
SAXExceptionprotected void errNoSpaceBetweenAttributes()
throws SAXException
SAXExceptionprotected void errHtml4NonNameInUnquotedAttribute(char c)
throws SAXException
SAXExceptionprotected void errLtOrEqualsOrGraveInUnquotedAttributeOrNull(char c)
throws SAXException
SAXExceptionprotected void errAttributeValueMissing()
throws SAXException
SAXExceptionprotected void errBadCharBeforeAttributeNameOrNull(char c)
throws SAXException
SAXExceptionprotected void errEqualsSignBeforeAttributeName()
throws SAXException
SAXExceptionprotected void errBadCharAfterLt(char c)
throws SAXException
SAXExceptionprotected void errLtGt()
throws SAXException
SAXExceptionprotected void errProcessingInstruction()
throws SAXException
SAXExceptionprotected void errUnescapedAmpersandInterpretedAsCharacterReference()
throws SAXException
SAXExceptionprotected void errNotSemicolonTerminated()
throws SAXException
SAXExceptionprotected void errNoNamedCharacterMatch()
throws SAXException
SAXExceptionprotected void errQuoteBeforeAttributeName(char c)
throws SAXException
SAXExceptionprotected void errQuoteOrLtInAttributeNameOrNull(char c)
throws SAXException
SAXExceptionprotected void errExpectedPublicId()
throws SAXException
SAXExceptionprotected void errBogusDoctype()
throws SAXException
SAXExceptionprotected void maybeWarnPrivateUseAstral()
throws SAXException
SAXExceptionprotected void maybeWarnPrivateUse(char ch)
throws SAXException
SAXExceptionprotected void maybeErrAttributesOnEndTag(HtmlAttributes attrs) throws SAXException
SAXExceptionprotected void maybeErrSlashInEndTag(boolean selfClosing)
throws SAXException
SAXExceptionprotected char errNcrNonCharacter(char ch)
throws SAXException
SAXExceptionprotected void errAstralNonCharacter(int ch)
throws SAXException
SAXExceptionprotected void errNcrSurrogate()
throws SAXException
SAXExceptionprotected char errNcrControlChar(char ch)
throws SAXException
SAXExceptionprotected void errNcrCr()
throws SAXException
SAXExceptionprotected void errNcrInC1Range()
throws SAXException
SAXExceptionprotected void errEofInPublicId()
throws SAXException
SAXExceptionprotected void errEofInComment()
throws SAXException
SAXExceptionprotected void errEofInDoctype()
throws SAXException
SAXExceptionprotected void errEofInAttributeValue()
throws SAXException
SAXExceptionprotected void errEofInAttributeName()
throws SAXException
SAXExceptionprotected void errEofWithoutGt()
throws SAXException
SAXExceptionprotected void errEofInTagName()
throws SAXException
SAXExceptionprotected void errEofInEndTag()
throws SAXException
SAXExceptionprotected void errEofAfterLt()
throws SAXException
SAXExceptionprotected void errNcrOutOfRange()
throws SAXException
SAXExceptionprotected void errNcrUnassigned()
throws SAXException
SAXExceptionprotected void errDuplicateAttribute()
throws SAXException
SAXExceptionprotected void errEofInSystemId()
throws SAXException
SAXExceptionprotected void errExpectedSystemId()
throws SAXException
SAXExceptionprotected void errMissingSpaceBeforeDoctypeName()
throws SAXException
SAXExceptionprotected void errHyphenHyphenBang()
throws SAXException
SAXExceptionprotected void errNcrControlChar()
throws SAXException
SAXExceptionprotected void errNcrZero()
throws SAXException
SAXExceptionprotected void errNoSpaceBetweenDoctypeSystemKeywordAndQuote()
throws SAXException
SAXExceptionprotected void errNoSpaceBetweenPublicAndSystemIds()
throws SAXException
SAXExceptionprotected void errNoSpaceBetweenDoctypePublicKeywordAndQuote()
throws SAXException
SAXExceptionprotected void noteAttributeWithoutValue()
throws SAXException
SAXExceptionprotected void noteUnquotedAttributeValue()
throws SAXException
SAXExceptionpublic void setEncodingDeclarationHandler(EncodingDeclarationHandler encodingDeclarationHandler)
encodingDeclarationHandler - the encodingDeclarationHandler to setpublic void setTransitionBaseOffset(int offset)
TransitionHandler.offset - the offsetCopyright © 2016. All rights reserved.