|
Orbits
1
|
Public Attributes | |
| stream | |
| parser | |
| lowercaseElementName | |
| lowercaseAttrName | |
| escapeFlag | |
| lastFourChars | |
| state | |
| escape | |
| currentToken | |
| tokenQueue | |
| temporaryBuffer | |
This class takes care of tokenizing HTML. * self.currentToken Holds the token that is currently being processed. * self.state Holds a reference to the method to be invoked... XXX * self.stream Points to HTMLInputStream object.
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.__init__ | ( | self, | |
| stream, | |||
encoding = None, |
|||
parseMeta = True, |
|||
useChardet = True, |
|||
lowercaseElementName = True, |
|||
lowercaseAttrName = True, |
|||
parser = None |
|||
| ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.__iter__ | ( | self | ) |
This is where the magic happens. We do our usually processing through the states and when we have a token to return we yield the token which pauses processing until the next token is requested.
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterAttributeNameState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterAttributeValueState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterDoctypeNameState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterDoctypePublicIdentifierState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterDoctypePublicKeywordState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterDoctypeSystemIdentifierState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.afterDoctypeSystemKeywordState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.attributeNameState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.attributeValueDoubleQuotedState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.attributeValueSingleQuotedState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.attributeValueUnQuotedState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.beforeAttributeNameState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.beforeAttributeValueState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.beforeDoctypeNameState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.beforeDoctypePublicIdentifierState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.beforeDoctypeSystemIdentifierState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.betweenDoctypePublicAndSystemIdentifiersState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.bogusCommentState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.bogusDoctypeState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.cdataSectionState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.characterReferenceInRcdata | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.closeTagOpenState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.commentEndBangState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.commentEndDashState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.commentEndState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.commentStartDashState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.commentStartState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.commentState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.consumeEntity | ( | self, | |
allowedChar = None, |
|||
fromAttribute = False |
|||
| ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.consumeNumberEntity | ( | self, | |
| isHex | |||
| ) |
This function returns either U+FFFD or the character based on the
decimal or hexadecimal representation. It also discards ";" if present.
If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked.
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.dataState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.doctypeNameState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.doctypePublicIdentifierDoubleQuotedState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.doctypePublicIdentifierSingleQuotedState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.doctypeState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.doctypeSystemIdentifierDoubleQuotedState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.doctypeSystemIdentifierSingleQuotedState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.emitCurrentToken | ( | self | ) |
This method is a generic handler for emitting the tags. It also sets the state to "data" because that's what's needed after a token has been emitted.
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.entityDataState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.markupDeclarationOpenState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.plaintextState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.processEntityInAttribute | ( | self, | |
| allowedChar | |||
| ) |
This method replaces the need for "entityInAttributeValueState".
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rawtextEndTagNameState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rawtextEndTagOpenState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rawtextLessThanSignState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rawtextState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rcdataEndTagNameState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rcdataEndTagOpenState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rcdataLessThanSignState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.rcdataState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataDoubleEscapedDashDashState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataDoubleEscapedDashState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataDoubleEscapedLessThanSignState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataDoubleEscapedState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataDoubleEscapeEndState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataDoubleEscapeStartState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEndTagNameState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEndTagOpenState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapedDashDashState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapedDashState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapedEndTagNameState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapedEndTagOpenState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapedLessThanSignState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapedState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapeStartDashState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataEscapeStartState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataLessThanSignState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.scriptDataState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.selfClosingStartTagState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.tagNameState | ( | self | ) |
| def pip._vendor.html5lib.tokenizer.HTMLTokenizer.tagOpenState | ( | self | ) |
| pip._vendor.html5lib.tokenizer.HTMLTokenizer.currentToken |
| pip._vendor.html5lib.tokenizer.HTMLTokenizer.escape |
| pip._vendor.html5lib.tokenizer.HTMLTokenizer.escapeFlag |
| pip._vendor.html5lib.tokenizer.HTMLTokenizer.lastFourChars |
| pip._vendor.html5lib.tokenizer.HTMLTokenizer.lowercaseAttrName |
| pip._vendor.html5lib.tokenizer.HTMLTokenizer.lowercaseElementName |
| pip._vendor.html5lib.tokenizer.HTMLTokenizer.parser |
| pip._vendor.html5lib.tokenizer.HTMLTokenizer.state |
| pip._vendor.html5lib.tokenizer.HTMLTokenizer.stream |
| pip._vendor.html5lib.tokenizer.HTMLTokenizer.temporaryBuffer |
| pip._vendor.html5lib.tokenizer.HTMLTokenizer.tokenQueue |
1.8.3.1
Copyright 2014 Google Inc. All rights reserved.