Orbits
1
|
Public Attributes | |
stream | |
parser | |
lowercaseElementName | |
lowercaseAttrName | |
escapeFlag | |
lastFourChars | |
state | |
escape | |
currentToken | |
tokenQueue | |
temporaryBuffer | |
This class takes care of tokenizing HTML. * self.currentToken Holds the token that is currently being processed. * self.state Holds a reference to the method to be invoked... XXX * self.stream Points to HTMLInputStream object.
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.__init__ | ( | self, | |
stream, | |||
encoding = None , |
|||
parseMeta = True , |
|||
useChardet = True , |
|||
lowercaseElementName = True , |
|||
lowercaseAttrName = True , |
|||
parser = None |
|||
) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.__iter__ | ( | self | ) |
This is where the magic happens. We do our usually processing through the states and when we have a token to return we yield the token which pauses processing until the next token is requested.
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterAttributeNameState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterAttributeValueState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterDoctypeNameState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterDoctypePublicIdentifierState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterDoctypePublicKeywordState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterDoctypeSystemIdentifierState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterDoctypeSystemKeywordState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.attributeNameState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.attributeValueDoubleQuotedState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.attributeValueSingleQuotedState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.attributeValueUnQuotedState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.beforeAttributeNameState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.beforeAttributeValueState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.beforeDoctypeNameState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.beforeDoctypePublicIdentifierState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.beforeDoctypeSystemIdentifierState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.bogusCommentState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.bogusDoctypeState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.cdataSectionState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.characterReferenceInRcdata | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.closeTagOpenState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.commentEndBangState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.commentEndDashState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.commentEndState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.commentStartDashState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.commentStartState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.commentState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.consumeEntity | ( | self, | |
allowedChar = None , |
|||
fromAttribute = False |
|||
) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.consumeNumberEntity | ( | self, | |
isHex | |||
) |
This function returns either U+FFFD or the character based on the decimal or hexadecimal representation. It also discards ";" if present. If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.dataState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.doctypeNameState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.doctypePublicIdentifierDoubleQuotedState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.doctypePublicIdentifierSingleQuotedState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.doctypeState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.doctypeSystemIdentifierSingleQuotedState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.emitCurrentToken | ( | self | ) |
This method is a generic handler for emitting the tags. It also sets the state to "data" because that's what's needed after a token has been emitted.
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.entityDataState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.markupDeclarationOpenState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.plaintextState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.processEntityInAttribute | ( | self, | |
allowedChar | |||
) |
This method replaces the need for "entityInAttributeValueState".
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rawtextEndTagNameState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rawtextEndTagOpenState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rawtextLessThanSignState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rawtextState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rcdataEndTagNameState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rcdataEndTagOpenState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rcdataLessThanSignState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rcdataState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataDoubleEscapedDashDashState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataDoubleEscapedDashState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataDoubleEscapedLessThanSignState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataDoubleEscapedState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataDoubleEscapeEndState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataDoubleEscapeStartState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEndTagNameState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEndTagOpenState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapedDashDashState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapedDashState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapedEndTagNameState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapedEndTagOpenState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapedLessThanSignState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapedState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapeStartDashState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapeStartState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataLessThanSignState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.selfClosingStartTagState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.tagNameState | ( | self | ) |
def pip._vendor.html5lib.tokenizer.HTMLTokenizer.tagOpenState | ( | self | ) |
pip._vendor.html5lib.tokenizer.HTMLTokenizer.currentToken |
pip._vendor.html5lib.tokenizer.HTMLTokenizer.escape |
pip._vendor.html5lib.tokenizer.HTMLTokenizer.escapeFlag |
pip._vendor.html5lib.tokenizer.HTMLTokenizer.lastFourChars |
pip._vendor.html5lib.tokenizer.HTMLTokenizer.lowercaseAttrName |
pip._vendor.html5lib.tokenizer.HTMLTokenizer.lowercaseElementName |
pip._vendor.html5lib.tokenizer.HTMLTokenizer.parser |
pip._vendor.html5lib.tokenizer.HTMLTokenizer.state |
pip._vendor.html5lib.tokenizer.HTMLTokenizer.stream |
pip._vendor.html5lib.tokenizer.HTMLTokenizer.temporaryBuffer |
pip._vendor.html5lib.tokenizer.HTMLTokenizer.tokenQueue |
Copyright 2014 Google Inc. All rights reserved.