1 #ifndef Py_UNICODEOBJECT_H
2 #define Py_UNICODEOBJECT_H
63 #ifndef Py_USING_UNICODE
65 #define PyUnicode_Check(op) 0
66 #define PyUnicode_CheckExact(op) 0
74 #ifndef Py_UNICODE_SIZE
75 #error Must define Py_UNICODE_SIZE
81 #if Py_UNICODE_SIZE >= 4
82 #define Py_UNICODE_WIDE
91 #ifndef PY_UNICODE_TYPE
94 # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
95 # define HAVE_USABLE_WCHAR_T
96 # define PY_UNICODE_TYPE wchar_t
99 # if defined(Py_UNICODE_WIDE)
100 # define PY_UNICODE_TYPE Py_UCS4
109 #ifdef HAVE_USABLE_WCHAR_T
110 # ifndef HAVE_WCHAR_H
111 # define HAVE_WCHAR_H
128 typedef unsigned int Py_UCS4;
129 #elif SIZEOF_LONG >= 4
130 typedef unsigned long Py_UCS4;
146 #ifndef Py_UNICODE_WIDE
148 # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
149 # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
150 # define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
151 # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
152 # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
153 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
154 # define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
155 # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
156 # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
157 # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
158 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
159 # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
160 # define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist
161 # define PyUnicode_Compare PyUnicodeUCS2_Compare
162 # define PyUnicode_Concat PyUnicodeUCS2_Concat
163 # define PyUnicode_Contains PyUnicodeUCS2_Contains
164 # define PyUnicode_Count PyUnicodeUCS2_Count
165 # define PyUnicode_Decode PyUnicodeUCS2_Decode
166 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
167 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
168 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
169 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
170 # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
171 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
172 # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
173 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
174 # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
175 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
176 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
177 # define PyUnicode_Encode PyUnicodeUCS2_Encode
178 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
179 # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
180 # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
181 # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
182 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
183 # define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
184 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
185 # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
186 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
187 # define PyUnicode_Find PyUnicodeUCS2_Find
188 # define PyUnicode_Format PyUnicodeUCS2_Format
189 # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
190 # define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
191 # define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
192 # define PyUnicode_FromObject PyUnicodeUCS2_FromObject
193 # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
194 # define PyUnicode_FromString PyUnicodeUCS2_FromString
195 # define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
196 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
197 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
198 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
199 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
200 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
201 # define PyUnicode_Join PyUnicodeUCS2_Join
202 # define PyUnicode_Partition PyUnicodeUCS2_Partition
203 # define PyUnicode_RPartition PyUnicodeUCS2_RPartition
204 # define PyUnicode_RSplit PyUnicodeUCS2_RSplit
205 # define PyUnicode_Replace PyUnicodeUCS2_Replace
206 # define PyUnicode_Resize PyUnicodeUCS2_Resize
207 # define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
208 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
209 # define PyUnicode_Split PyUnicodeUCS2_Split
210 # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
211 # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
212 # define PyUnicode_Translate PyUnicodeUCS2_Translate
213 # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
214 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
215 # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
216 # define _PyUnicode_Init _PyUnicodeUCS2_Init
217 # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
218 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
219 # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
220 # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
221 # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
222 # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
223 # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
224 # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
225 # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
226 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
227 # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
228 # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
229 # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
230 # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
231 # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
235 # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
236 # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
237 # define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
238 # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
239 # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
240 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
241 # define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
242 # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
243 # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
244 # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
245 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
246 # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
247 # define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist
248 # define PyUnicode_Compare PyUnicodeUCS4_Compare
249 # define PyUnicode_Concat PyUnicodeUCS4_Concat
250 # define PyUnicode_Contains PyUnicodeUCS4_Contains
251 # define PyUnicode_Count PyUnicodeUCS4_Count
252 # define PyUnicode_Decode PyUnicodeUCS4_Decode
253 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
254 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
255 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
256 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
257 # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
258 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
259 # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
260 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
261 # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
262 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
263 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
264 # define PyUnicode_Encode PyUnicodeUCS4_Encode
265 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
266 # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
267 # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
268 # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
269 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
270 # define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
271 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
272 # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
273 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
274 # define PyUnicode_Find PyUnicodeUCS4_Find
275 # define PyUnicode_Format PyUnicodeUCS4_Format
276 # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
277 # define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
278 # define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
279 # define PyUnicode_FromObject PyUnicodeUCS4_FromObject
280 # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
281 # define PyUnicode_FromString PyUnicodeUCS4_FromString
282 # define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
283 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
284 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
285 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
286 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
287 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
288 # define PyUnicode_Join PyUnicodeUCS4_Join
289 # define PyUnicode_Partition PyUnicodeUCS4_Partition
290 # define PyUnicode_RPartition PyUnicodeUCS4_RPartition
291 # define PyUnicode_RSplit PyUnicodeUCS4_RSplit
292 # define PyUnicode_Replace PyUnicodeUCS4_Replace
293 # define PyUnicode_Resize PyUnicodeUCS4_Resize
294 # define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
295 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
296 # define PyUnicode_Split PyUnicodeUCS4_Split
297 # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
298 # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
299 # define PyUnicode_Translate PyUnicodeUCS4_Translate
300 # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
301 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
302 # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
303 # define _PyUnicode_Init _PyUnicodeUCS4_Init
304 # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
305 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
306 # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
307 # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
308 # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
309 # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
310 # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
311 # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
312 # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
313 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
314 # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
315 # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
316 # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
317 # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
318 # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
330 #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
334 #define Py_UNICODE_ISSPACE(ch) iswspace(ch)
336 #define Py_UNICODE_ISLOWER(ch) iswlower(ch)
337 #define Py_UNICODE_ISUPPER(ch) iswupper(ch)
338 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
339 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
341 #define Py_UNICODE_TOLOWER(ch) towlower(ch)
342 #define Py_UNICODE_TOUPPER(ch) towupper(ch)
343 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
345 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
346 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
347 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
349 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
350 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
351 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
353 #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
363 #define Py_UNICODE_ISSPACE(ch) \
364 ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
366 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
367 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
368 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
369 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
371 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
372 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
373 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
375 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
376 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
377 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
379 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
380 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
381 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
383 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
387 #define Py_UNICODE_ISALNUM(ch) \
388 (Py_UNICODE_ISALPHA(ch) || \
389 Py_UNICODE_ISDECIMAL(ch) || \
390 Py_UNICODE_ISDIGIT(ch) || \
391 Py_UNICODE_ISNUMERIC(ch))
393 #define Py_UNICODE_COPY(target, source, length) \
394 Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
396 #define Py_UNICODE_FILL(target, value, length) \
397 do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
398 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
404 #define Py_UNICODE_MATCH(string, offset, substring) \
405 ((*((string)->str + (offset)) == *((substring)->str)) && \
406 ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
407 !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
427 #define PyUnicode_Check(op) \
428 PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
429 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
432 #define PyUnicode_GET_SIZE(op) \
433 (((PyUnicodeObject *)(op))->length)
434 #define PyUnicode_GET_DATA_SIZE(op) \
435 (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
436 #define PyUnicode_AS_UNICODE(op) \
437 (((PyUnicodeObject *)(op))->str)
438 #define PyUnicode_AS_DATA(op) \
439 ((const char *)((PyUnicodeObject *)(op))->str)
448 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
495 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(
void);
575 register const wchar_t *w,
592 PyUnicodeObject *unicode,
620 PyAPI_FUNC(
int) PyUnicode_ClearFreeList(
void);
668 PyAPI_FUNC(
const char*) PyUnicode_GetDefaultEncoding(
void);
688 const char *encoding,
698 const char *encoding,
707 const char *encoding,
716 const char *encoding,
741 const Py_UNICODE *data,
744 int base64WhiteSpace,
768 const Py_UNICODE *data,
841 const Py_UNICODE *data,
919 const Py_UNICODE *data,
938 const Py_UNICODE *data,
955 const Py_UNICODE *data,
963 PyObject *_PyUnicode_DecodeUnicodeInternal(
986 const Py_UNICODE *data,
1008 const Py_UNICODE *data,
1050 const Py_UNICODE *data,
1071 const Py_UNICODE *data,
1099 const Py_UNICODE *data,
1331 PyUnicodeObject *
self,
1340 PyAPI_DATA(
const unsigned char) _Py_ascii_whitespace[];
1369 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
1373 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
1377 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(