Orbits
1
|
Public Member Functions | |
def | __init__ |
def | reset |
def | openStream |
def | position |
def | char |
def | readChunk |
def | characterErrorsUCS4 |
def | characterErrorsUCS2 |
def | charsUntil |
def | unget |
Public Attributes | |
reportCharacterErrors | |
replaceCharactersRegexp | |
newLines | |
charEncoding | |
dataStream | |
chunk | |
chunkSize | |
chunkOffset | |
errors | |
prevNumLines | |
prevNumCols | |
Private Member Functions | |
def | _position |
Private Attributes | |
_bufferedCharacter | |
Static Private Attributes | |
int | _defaultChunkSize = 10240 |
Provides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing incorrect byte-sequences and also provides column and line tracking.
def pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.__init__ | ( | self, | |
source | |||
) |
Initialises the HTMLInputStream. HTMLInputStream(source, [encoding]) -> Normalized stream from source for use by html5lib. source can be either a file-object, local filename or a string. The optional encoding parameter must be a string that indicates the encoding. If specified, that encoding will be used, regardless of any BOM or later declaration (such as in a meta element) parseMeta - Look for a <meta> element containing encoding information
|
private |
def pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.char | ( | self | ) |
Read one character from the stream or queue if available. Return EOF when EOF is reached.
def pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.characterErrorsUCS2 | ( | self, | |
data | |||
) |
def pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.characterErrorsUCS4 | ( | self, | |
data | |||
) |
def pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.charsUntil | ( | self, | |
characters, | |||
opposite = False |
|||
) |
Returns a string of characters from the stream up to but not including any character in 'characters' or EOF. 'characters' must be a container that supports the 'in' method and iteration over its characters.
def pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.openStream | ( | self, | |
source | |||
) |
Produces a file object from source. source can be either a file object, local filename or a string.
def pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.position | ( | self | ) |
Returns (line, col) of the current position in the stream.
def pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.readChunk | ( | self, | |
chunkSize = None |
|||
) |
def pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.reset | ( | self | ) |
def pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.unget | ( | self, | |
char | |||
) |
|
private |
|
staticprivate |
pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.charEncoding |
pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.chunk |
pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.chunkOffset |
pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.chunkSize |
pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.dataStream |
pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.errors |
pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.newLines |
pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.prevNumCols |
pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.prevNumLines |
pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.replaceCharactersRegexp |
pip._vendor.html5lib.inputstream.HTMLUnicodeInputStream.reportCharacterErrors |
Copyright 2014 Google Inc. All rights reserved.