gh-139353: Add Objects/unicode_writer.c file (#139911)
Move the public PyUnicodeWriter API and the private _PyUnicodeWriter API to a new Objects/unicode_writer.c file. Rename a few helper functions to share them between unicodeobject.c and unicode_writer.c, such as resize_compact() or unicode_result().
This commit is contained in:
@@ -17,6 +17,46 @@ extern "C" {
|
|||||||
|
|
||||||
|
|
||||||
extern int _PyUnicode_IsModifiable(PyObject *unicode);
|
extern int _PyUnicode_IsModifiable(PyObject *unicode);
|
||||||
|
extern void _PyUnicodeWriter_InitWithBuffer(
|
||||||
|
_PyUnicodeWriter *writer,
|
||||||
|
PyObject *buffer);
|
||||||
|
extern PyObject* _PyUnicode_Result(PyObject *unicode);
|
||||||
|
extern int _PyUnicode_DecodeUTF8Writer(
|
||||||
|
_PyUnicodeWriter *writer,
|
||||||
|
const char *s,
|
||||||
|
Py_ssize_t size,
|
||||||
|
_Py_error_handler error_handler,
|
||||||
|
const char *errors,
|
||||||
|
Py_ssize_t *consumed);
|
||||||
|
extern PyObject* _PyUnicode_ResizeCompact(
|
||||||
|
PyObject *unicode,
|
||||||
|
Py_ssize_t length);
|
||||||
|
extern PyObject* _PyUnicode_GetEmpty(void);
|
||||||
|
|
||||||
|
|
||||||
|
/* Generic helper macro to convert characters of different types.
|
||||||
|
from_type and to_type have to be valid type names, begin and end
|
||||||
|
are pointers to the source characters which should be of type
|
||||||
|
"from_type *". to is a pointer of type "to_type *" and points to the
|
||||||
|
buffer where the result characters are written to. */
|
||||||
|
#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
|
||||||
|
do { \
|
||||||
|
to_type *_to = (to_type *)(to); \
|
||||||
|
const from_type *_iter = (const from_type *)(begin);\
|
||||||
|
const from_type *_end = (const from_type *)(end);\
|
||||||
|
Py_ssize_t n = (_end) - (_iter); \
|
||||||
|
const from_type *_unrolled_end = \
|
||||||
|
_iter + _Py_SIZE_ROUND_DOWN(n, 4); \
|
||||||
|
while (_iter < (_unrolled_end)) { \
|
||||||
|
_to[0] = (to_type) _iter[0]; \
|
||||||
|
_to[1] = (to_type) _iter[1]; \
|
||||||
|
_to[2] = (to_type) _iter[2]; \
|
||||||
|
_to[3] = (to_type) _iter[3]; \
|
||||||
|
_iter += 4; _to += 4; \
|
||||||
|
} \
|
||||||
|
while (_iter < (_end)) \
|
||||||
|
*_to++ = (to_type) *_iter++; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
|||||||
@@ -559,6 +559,7 @@ OBJECT_OBJS= \
|
|||||||
Objects/typevarobject.o \
|
Objects/typevarobject.o \
|
||||||
Objects/unicode_format.o \
|
Objects/unicode_format.o \
|
||||||
Objects/unicode_formatter.o \
|
Objects/unicode_formatter.o \
|
||||||
|
Objects/unicode_writer.o \
|
||||||
Objects/unicodectype.o \
|
Objects/unicodectype.o \
|
||||||
Objects/unicodeobject.o \
|
Objects/unicodeobject.o \
|
||||||
Objects/unionobject.o \
|
Objects/unionobject.o \
|
||||||
|
|||||||
639
Objects/unicode_writer.c
Normal file
639
Objects/unicode_writer.c
Normal file
@@ -0,0 +1,639 @@
|
|||||||
|
/*
|
||||||
|
|
||||||
|
Unicode implementation based on original code by Fredrik Lundh,
|
||||||
|
modified by Marc-Andre Lemburg <mal@lemburg.com>.
|
||||||
|
|
||||||
|
Major speed upgrades to the method implementations at the Reykjavik
|
||||||
|
NeedForSpeed sprint, by Fredrik Lundh and Andrew Dalke.
|
||||||
|
|
||||||
|
Copyright (c) Corporation for National Research Initiatives.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------
|
||||||
|
The original string type implementation is:
|
||||||
|
|
||||||
|
Copyright (c) 1999 by Secret Labs AB
|
||||||
|
Copyright (c) 1999 by Fredrik Lundh
|
||||||
|
|
||||||
|
By obtaining, using, and/or copying this software and/or its
|
||||||
|
associated documentation, you agree that you have read, understood,
|
||||||
|
and will comply with the following terms and conditions:
|
||||||
|
|
||||||
|
Permission to use, copy, modify, and distribute this software and its
|
||||||
|
associated documentation for any purpose and without fee is hereby
|
||||||
|
granted, provided that the above copyright notice appears in all
|
||||||
|
copies, and that both that copyright notice and this permission notice
|
||||||
|
appear in supporting documentation, and that the name of Secret Labs
|
||||||
|
AB or the author not be used in advertising or publicity pertaining to
|
||||||
|
distribution of the software without specific, written prior
|
||||||
|
permission.
|
||||||
|
|
||||||
|
SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
|
||||||
|
THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||||
|
FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
|
||||||
|
ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
|
||||||
|
OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
--------------------------------------------------------------------
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "Python.h"
|
||||||
|
#include "pycore_freelist.h" // _Py_FREELIST_FREE()
|
||||||
|
#include "pycore_long.h" // _PyLong_FormatWriter()
|
||||||
|
#include "pycore_unicodeobject.h" // _PyUnicode_Result()
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef MS_WINDOWS
|
||||||
|
/* On Windows, overallocate by 50% is the best factor */
|
||||||
|
# define OVERALLOCATE_FACTOR 2
|
||||||
|
#else
|
||||||
|
/* On Linux, overallocate by 25% is the best factor */
|
||||||
|
# define OVERALLOCATE_FACTOR 4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
/* Compilation of templated routines */
|
||||||
|
|
||||||
|
#define STRINGLIB_GET_EMPTY() _PyUnicode_GetEmpty()
|
||||||
|
|
||||||
|
#include "stringlib/ucs1lib.h"
|
||||||
|
#include "stringlib/find_max_char.h"
|
||||||
|
#include "stringlib/undef.h"
|
||||||
|
|
||||||
|
|
||||||
|
/* Copy an ASCII or latin1 char* string into a Python Unicode string.
|
||||||
|
|
||||||
|
WARNING: The function doesn't copy the terminating null character and
|
||||||
|
doesn't check the maximum character (may write a latin1 character in an
|
||||||
|
ASCII string). */
|
||||||
|
static void
|
||||||
|
unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
|
||||||
|
const char *str, Py_ssize_t len)
|
||||||
|
{
|
||||||
|
int kind = PyUnicode_KIND(unicode);
|
||||||
|
const void *data = PyUnicode_DATA(unicode);
|
||||||
|
const char *end = str + len;
|
||||||
|
|
||||||
|
assert(index + len <= PyUnicode_GET_LENGTH(unicode));
|
||||||
|
switch (kind) {
|
||||||
|
case PyUnicode_1BYTE_KIND: {
|
||||||
|
#ifdef Py_DEBUG
|
||||||
|
if (PyUnicode_IS_ASCII(unicode)) {
|
||||||
|
Py_UCS4 maxchar = ucs1lib_find_max_char(
|
||||||
|
(const Py_UCS1*)str,
|
||||||
|
(const Py_UCS1*)str + len);
|
||||||
|
assert(maxchar < 128);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
memcpy((char *) data + index, str, len);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PyUnicode_2BYTE_KIND: {
|
||||||
|
Py_UCS2 *start = (Py_UCS2 *)data + index;
|
||||||
|
Py_UCS2 *ucs2 = start;
|
||||||
|
|
||||||
|
for (; str < end; ++ucs2, ++str)
|
||||||
|
*ucs2 = (Py_UCS2)*str;
|
||||||
|
|
||||||
|
assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PyUnicode_4BYTE_KIND: {
|
||||||
|
Py_UCS4 *start = (Py_UCS4 *)data + index;
|
||||||
|
Py_UCS4 *ucs4 = start;
|
||||||
|
|
||||||
|
for (; str < end; ++ucs4, ++str)
|
||||||
|
*ucs4 = (Py_UCS4)*str;
|
||||||
|
|
||||||
|
assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
Py_UNREACHABLE();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
_PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
|
||||||
|
{
|
||||||
|
writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
|
||||||
|
writer->data = PyUnicode_DATA(writer->buffer);
|
||||||
|
|
||||||
|
if (!writer->readonly) {
|
||||||
|
writer->kind = PyUnicode_KIND(writer->buffer);
|
||||||
|
writer->size = PyUnicode_GET_LENGTH(writer->buffer);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* use a value smaller than PyUnicode_1BYTE_KIND() so
|
||||||
|
_PyUnicodeWriter_PrepareKind() will copy the buffer. */
|
||||||
|
writer->kind = 0;
|
||||||
|
assert(writer->kind <= PyUnicode_1BYTE_KIND);
|
||||||
|
|
||||||
|
/* Copy-on-write mode: set buffer size to 0 so
|
||||||
|
* _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on
|
||||||
|
* next write. */
|
||||||
|
writer->size = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
|
||||||
|
{
|
||||||
|
memset(writer, 0, sizeof(*writer));
|
||||||
|
|
||||||
|
/* ASCII is the bare minimum */
|
||||||
|
writer->min_char = 127;
|
||||||
|
|
||||||
|
/* use a kind value smaller than PyUnicode_1BYTE_KIND so
|
||||||
|
_PyUnicodeWriter_PrepareKind() will copy the buffer. */
|
||||||
|
assert(writer->kind == 0);
|
||||||
|
assert(writer->kind < PyUnicode_1BYTE_KIND);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PyUnicodeWriter*
|
||||||
|
PyUnicodeWriter_Create(Py_ssize_t length)
|
||||||
|
{
|
||||||
|
if (length < 0) {
|
||||||
|
PyErr_SetString(PyExc_ValueError,
|
||||||
|
"length must be positive");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
const size_t size = sizeof(_PyUnicodeWriter);
|
||||||
|
PyUnicodeWriter *pub_writer;
|
||||||
|
pub_writer = _Py_FREELIST_POP_MEM(unicode_writers);
|
||||||
|
if (pub_writer == NULL) {
|
||||||
|
pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
|
||||||
|
if (pub_writer == NULL) {
|
||||||
|
return (PyUnicodeWriter *)PyErr_NoMemory();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;
|
||||||
|
|
||||||
|
_PyUnicodeWriter_Init(writer);
|
||||||
|
if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
|
||||||
|
PyUnicodeWriter_Discard(pub_writer);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
writer->overallocate = 1;
|
||||||
|
|
||||||
|
return pub_writer;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
|
||||||
|
{
|
||||||
|
if (writer == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
_PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
|
||||||
|
_Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Initialize _PyUnicodeWriter with initial buffer
|
||||||
|
void
|
||||||
|
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
|
||||||
|
{
|
||||||
|
memset(writer, 0, sizeof(*writer));
|
||||||
|
writer->buffer = buffer;
|
||||||
|
_PyUnicodeWriter_Update(writer);
|
||||||
|
writer->min_length = writer->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
|
||||||
|
Py_ssize_t length, Py_UCS4 maxchar)
|
||||||
|
{
|
||||||
|
Py_ssize_t newlen;
|
||||||
|
PyObject *newbuffer;
|
||||||
|
|
||||||
|
assert(length >= 0);
|
||||||
|
assert(maxchar <= _Py_MAX_UNICODE);
|
||||||
|
|
||||||
|
/* ensure that the _PyUnicodeWriter_Prepare macro was used */
|
||||||
|
assert((maxchar > writer->maxchar && length >= 0)
|
||||||
|
|| length > 0);
|
||||||
|
|
||||||
|
if (length > PY_SSIZE_T_MAX - writer->pos) {
|
||||||
|
PyErr_NoMemory();
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
newlen = writer->pos + length;
|
||||||
|
|
||||||
|
maxchar = Py_MAX(maxchar, writer->min_char);
|
||||||
|
|
||||||
|
if (writer->buffer == NULL) {
|
||||||
|
assert(!writer->readonly);
|
||||||
|
if (writer->overallocate
|
||||||
|
&& newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
|
||||||
|
/* overallocate to limit the number of realloc() */
|
||||||
|
newlen += newlen / OVERALLOCATE_FACTOR;
|
||||||
|
}
|
||||||
|
if (newlen < writer->min_length)
|
||||||
|
newlen = writer->min_length;
|
||||||
|
|
||||||
|
writer->buffer = PyUnicode_New(newlen, maxchar);
|
||||||
|
if (writer->buffer == NULL)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
else if (newlen > writer->size) {
|
||||||
|
if (writer->overallocate
|
||||||
|
&& newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
|
||||||
|
/* overallocate to limit the number of realloc() */
|
||||||
|
newlen += newlen / OVERALLOCATE_FACTOR;
|
||||||
|
}
|
||||||
|
if (newlen < writer->min_length)
|
||||||
|
newlen = writer->min_length;
|
||||||
|
|
||||||
|
if (maxchar > writer->maxchar || writer->readonly) {
|
||||||
|
/* resize + widen */
|
||||||
|
maxchar = Py_MAX(maxchar, writer->maxchar);
|
||||||
|
newbuffer = PyUnicode_New(newlen, maxchar);
|
||||||
|
if (newbuffer == NULL)
|
||||||
|
return -1;
|
||||||
|
_PyUnicode_FastCopyCharacters(newbuffer, 0,
|
||||||
|
writer->buffer, 0, writer->pos);
|
||||||
|
Py_DECREF(writer->buffer);
|
||||||
|
writer->readonly = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
newbuffer = _PyUnicode_ResizeCompact(writer->buffer, newlen);
|
||||||
|
if (newbuffer == NULL)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
writer->buffer = newbuffer;
|
||||||
|
}
|
||||||
|
else if (maxchar > writer->maxchar) {
|
||||||
|
assert(!writer->readonly);
|
||||||
|
newbuffer = PyUnicode_New(writer->size, maxchar);
|
||||||
|
if (newbuffer == NULL)
|
||||||
|
return -1;
|
||||||
|
_PyUnicode_FastCopyCharacters(newbuffer, 0,
|
||||||
|
writer->buffer, 0, writer->pos);
|
||||||
|
Py_SETREF(writer->buffer, newbuffer);
|
||||||
|
}
|
||||||
|
_PyUnicodeWriter_Update(writer);
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
#undef OVERALLOCATE_FACTOR
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
|
||||||
|
int kind)
|
||||||
|
{
|
||||||
|
Py_UCS4 maxchar;
|
||||||
|
|
||||||
|
/* ensure that the _PyUnicodeWriter_PrepareKind macro was used */
|
||||||
|
assert(writer->kind < kind);
|
||||||
|
|
||||||
|
switch (kind)
|
||||||
|
{
|
||||||
|
case PyUnicode_1BYTE_KIND: maxchar = 0xff; break;
|
||||||
|
case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break;
|
||||||
|
case PyUnicode_4BYTE_KIND: maxchar = _Py_MAX_UNICODE; break;
|
||||||
|
default:
|
||||||
|
Py_UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
|
||||||
|
{
|
||||||
|
return _PyUnicodeWriter_WriteCharInline(writer, ch);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
|
||||||
|
{
|
||||||
|
if (ch > _Py_MAX_UNICODE) {
|
||||||
|
PyErr_SetString(PyExc_ValueError,
|
||||||
|
"character must be in range(0x110000)");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
|
||||||
|
{
|
||||||
|
assert(PyUnicode_Check(str));
|
||||||
|
|
||||||
|
Py_UCS4 maxchar;
|
||||||
|
Py_ssize_t len;
|
||||||
|
|
||||||
|
len = PyUnicode_GET_LENGTH(str);
|
||||||
|
if (len == 0)
|
||||||
|
return 0;
|
||||||
|
maxchar = PyUnicode_MAX_CHAR_VALUE(str);
|
||||||
|
if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
|
||||||
|
if (writer->buffer == NULL && !writer->overallocate) {
|
||||||
|
assert(_PyUnicode_CheckConsistency(str, 1));
|
||||||
|
writer->readonly = 1;
|
||||||
|
writer->buffer = Py_NewRef(str);
|
||||||
|
_PyUnicodeWriter_Update(writer);
|
||||||
|
writer->pos += len;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1)
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
|
||||||
|
str, 0, len);
|
||||||
|
writer->pos += len;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
|
||||||
|
{
|
||||||
|
PyTypeObject *type = Py_TYPE(obj);
|
||||||
|
if (type == &PyUnicode_Type) {
|
||||||
|
return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type == &PyLong_Type) {
|
||||||
|
return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *str = PyObject_Str(obj);
|
||||||
|
if (str == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
|
||||||
|
Py_DECREF(str);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
|
||||||
|
{
|
||||||
|
if (Py_TYPE(obj) == &PyLong_Type) {
|
||||||
|
return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *repr = PyObject_Repr(obj);
|
||||||
|
if (repr == NULL) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, repr);
|
||||||
|
Py_DECREF(repr);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
|
||||||
|
Py_ssize_t start, Py_ssize_t end)
|
||||||
|
{
|
||||||
|
assert(0 <= start);
|
||||||
|
assert(end <= PyUnicode_GET_LENGTH(str));
|
||||||
|
assert(start <= end);
|
||||||
|
|
||||||
|
if (start == 0 && end == PyUnicode_GET_LENGTH(str))
|
||||||
|
return _PyUnicodeWriter_WriteStr(writer, str);
|
||||||
|
|
||||||
|
Py_ssize_t len = end - start;
|
||||||
|
if (len == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_UCS4 maxchar;
|
||||||
|
if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar) {
|
||||||
|
maxchar = _PyUnicode_FindMaxChar(str, start, end);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
maxchar = writer->maxchar;
|
||||||
|
}
|
||||||
|
if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
|
||||||
|
str, start, len);
|
||||||
|
writer->pos += len;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
|
||||||
|
Py_ssize_t start, Py_ssize_t end)
|
||||||
|
{
|
||||||
|
if (!PyUnicode_Check(str)) {
|
||||||
|
PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (start < 0 || start > end) {
|
||||||
|
PyErr_Format(PyExc_ValueError, "invalid start argument");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (end > PyUnicode_GET_LENGTH(str)) {
|
||||||
|
PyErr_Format(PyExc_ValueError, "invalid end argument");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
|
||||||
|
start, end);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
|
||||||
|
const char *ascii, Py_ssize_t len)
|
||||||
|
{
|
||||||
|
if (len == -1)
|
||||||
|
len = strlen(ascii);
|
||||||
|
|
||||||
|
assert(ucs1lib_find_max_char((const Py_UCS1*)ascii, (const Py_UCS1*)ascii + len) < 128);
|
||||||
|
|
||||||
|
if (writer->buffer == NULL && !writer->overallocate) {
|
||||||
|
PyObject *str;
|
||||||
|
|
||||||
|
str = _PyUnicode_FromASCII(ascii, len);
|
||||||
|
if (str == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
writer->readonly = 1;
|
||||||
|
writer->buffer = str;
|
||||||
|
_PyUnicodeWriter_Update(writer);
|
||||||
|
writer->pos += len;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
switch (writer->kind)
|
||||||
|
{
|
||||||
|
case PyUnicode_1BYTE_KIND:
|
||||||
|
{
|
||||||
|
const Py_UCS1 *str = (const Py_UCS1 *)ascii;
|
||||||
|
Py_UCS1 *data = writer->data;
|
||||||
|
|
||||||
|
memcpy(data + writer->pos, str, len);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PyUnicode_2BYTE_KIND:
|
||||||
|
{
|
||||||
|
_PyUnicode_CONVERT_BYTES(
|
||||||
|
Py_UCS1, Py_UCS2,
|
||||||
|
ascii, ascii + len,
|
||||||
|
(Py_UCS2 *)writer->data + writer->pos);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case PyUnicode_4BYTE_KIND:
|
||||||
|
{
|
||||||
|
_PyUnicode_CONVERT_BYTES(
|
||||||
|
Py_UCS1, Py_UCS4,
|
||||||
|
ascii, ascii + len,
|
||||||
|
(Py_UCS4 *)writer->data + writer->pos);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
Py_UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
writer->pos += len;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
|
||||||
|
const char *str,
|
||||||
|
Py_ssize_t size)
|
||||||
|
{
|
||||||
|
assert(writer != NULL);
|
||||||
|
_Py_AssertHoldsTstate();
|
||||||
|
|
||||||
|
_PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
|
||||||
|
return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
|
||||||
|
const char *str,
|
||||||
|
Py_ssize_t size)
|
||||||
|
{
|
||||||
|
if (size < 0) {
|
||||||
|
size = strlen(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
_PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
|
||||||
|
Py_ssize_t old_pos = _writer->pos;
|
||||||
|
int res = _PyUnicode_DecodeUTF8Writer(_writer, str, size,
|
||||||
|
_Py_ERROR_STRICT, NULL, NULL);
|
||||||
|
if (res < 0) {
|
||||||
|
_writer->pos = old_pos;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
PyUnicodeWriter_DecodeUTF8Stateful(PyUnicodeWriter *writer,
|
||||||
|
const char *string,
|
||||||
|
Py_ssize_t length,
|
||||||
|
const char *errors,
|
||||||
|
Py_ssize_t *consumed)
|
||||||
|
{
|
||||||
|
if (length < 0) {
|
||||||
|
length = strlen(string);
|
||||||
|
}
|
||||||
|
|
||||||
|
_PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
|
||||||
|
Py_ssize_t old_pos = _writer->pos;
|
||||||
|
int res = _PyUnicode_DecodeUTF8Writer(_writer, string, length,
|
||||||
|
_Py_ERROR_UNKNOWN, errors,
|
||||||
|
consumed);
|
||||||
|
if (res < 0) {
|
||||||
|
_writer->pos = old_pos;
|
||||||
|
if (consumed) {
|
||||||
|
*consumed = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int
|
||||||
|
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
|
||||||
|
const char *str, Py_ssize_t len)
|
||||||
|
{
|
||||||
|
Py_UCS4 maxchar;
|
||||||
|
|
||||||
|
maxchar = ucs1lib_find_max_char((const Py_UCS1*)str, (const Py_UCS1*)str + len);
|
||||||
|
if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1)
|
||||||
|
return -1;
|
||||||
|
unicode_write_cstr(writer->buffer, writer->pos, str, len);
|
||||||
|
writer->pos += len;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PyObject *
|
||||||
|
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
|
||||||
|
{
|
||||||
|
PyObject *str;
|
||||||
|
|
||||||
|
if (writer->pos == 0) {
|
||||||
|
Py_CLEAR(writer->buffer);
|
||||||
|
return _PyUnicode_GetEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
str = writer->buffer;
|
||||||
|
writer->buffer = NULL;
|
||||||
|
|
||||||
|
if (writer->readonly) {
|
||||||
|
assert(PyUnicode_GET_LENGTH(str) == writer->pos);
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (PyUnicode_GET_LENGTH(str) != writer->pos) {
|
||||||
|
PyObject *str2;
|
||||||
|
str2 = _PyUnicode_ResizeCompact(str, writer->pos);
|
||||||
|
if (str2 == NULL) {
|
||||||
|
Py_DECREF(str);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
str = str2;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(_PyUnicode_CheckConsistency(str, 1));
|
||||||
|
return _PyUnicode_Result(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
PyObject*
|
||||||
|
PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
|
||||||
|
{
|
||||||
|
PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
|
||||||
|
assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
|
||||||
|
_Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free);
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void
|
||||||
|
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
|
||||||
|
{
|
||||||
|
Py_CLEAR(writer->buffer);
|
||||||
|
}
|
||||||
@@ -46,7 +46,6 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|||||||
#include "pycore_codecs.h" // _PyCodec_Lookup()
|
#include "pycore_codecs.h" // _PyCodec_Lookup()
|
||||||
#include "pycore_critical_section.h" // Py_*_CRITICAL_SECTION_SEQUENCE_FAST
|
#include "pycore_critical_section.h" // Py_*_CRITICAL_SECTION_SEQUENCE_FAST
|
||||||
#include "pycore_format.h" // F_LJUST
|
#include "pycore_format.h" // F_LJUST
|
||||||
#include "pycore_freelist.h" // _Py_FREELIST_FREE(), _Py_FREELIST_POP()
|
|
||||||
#include "pycore_initconfig.h" // _PyStatus_OK()
|
#include "pycore_initconfig.h" // _PyStatus_OK()
|
||||||
#include "pycore_interp.h" // PyInterpreterState.fs_codec
|
#include "pycore_interp.h" // PyInterpreterState.fs_codec
|
||||||
#include "pycore_long.h" // _PyLong_FormatWriter()
|
#include "pycore_long.h" // _PyLong_FormatWriter()
|
||||||
@@ -184,45 +183,9 @@ static inline int _PyUnicode_HAS_UTF8_MEMORY(PyObject *op)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Generic helper macro to convert characters of different types.
|
|
||||||
from_type and to_type have to be valid type names, begin and end
|
|
||||||
are pointers to the source characters which should be of type
|
|
||||||
"from_type *". to is a pointer of type "to_type *" and points to the
|
|
||||||
buffer where the result characters are written to. */
|
|
||||||
#define _PyUnicode_CONVERT_BYTES(from_type, to_type, begin, end, to) \
|
|
||||||
do { \
|
|
||||||
to_type *_to = (to_type *)(to); \
|
|
||||||
const from_type *_iter = (const from_type *)(begin);\
|
|
||||||
const from_type *_end = (const from_type *)(end);\
|
|
||||||
Py_ssize_t n = (_end) - (_iter); \
|
|
||||||
const from_type *_unrolled_end = \
|
|
||||||
_iter + _Py_SIZE_ROUND_DOWN(n, 4); \
|
|
||||||
while (_iter < (_unrolled_end)) { \
|
|
||||||
_to[0] = (to_type) _iter[0]; \
|
|
||||||
_to[1] = (to_type) _iter[1]; \
|
|
||||||
_to[2] = (to_type) _iter[2]; \
|
|
||||||
_to[3] = (to_type) _iter[3]; \
|
|
||||||
_iter += 4; _to += 4; \
|
|
||||||
} \
|
|
||||||
while (_iter < (_end)) \
|
|
||||||
*_to++ = (to_type) *_iter++; \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define LATIN1 _Py_LATIN1_CHR
|
#define LATIN1 _Py_LATIN1_CHR
|
||||||
|
|
||||||
#ifdef MS_WINDOWS
|
|
||||||
/* On Windows, overallocate by 50% is the best factor */
|
|
||||||
# define OVERALLOCATE_FACTOR 2
|
|
||||||
#else
|
|
||||||
/* On Linux, overallocate by 25% is the best factor */
|
|
||||||
# define OVERALLOCATE_FACTOR 4
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Forward declaration */
|
/* Forward declaration */
|
||||||
static inline int
|
|
||||||
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
|
|
||||||
static inline void
|
|
||||||
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer);
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
|
unicode_encode_utf8(PyObject *unicode, _Py_error_handler error_handler,
|
||||||
const char *errors);
|
const char *errors);
|
||||||
@@ -230,11 +193,6 @@ static PyObject *
|
|||||||
unicode_decode_utf8(const char *s, Py_ssize_t size,
|
unicode_decode_utf8(const char *s, Py_ssize_t size,
|
||||||
_Py_error_handler error_handler, const char *errors,
|
_Py_error_handler error_handler, const char *errors,
|
||||||
Py_ssize_t *consumed);
|
Py_ssize_t *consumed);
|
||||||
static int
|
|
||||||
unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
|
|
||||||
const char *s, Py_ssize_t size,
|
|
||||||
_Py_error_handler error_handler, const char *errors,
|
|
||||||
Py_ssize_t *consumed);
|
|
||||||
#ifdef Py_DEBUG
|
#ifdef Py_DEBUG
|
||||||
static inline int unicode_is_finalizing(void);
|
static inline int unicode_is_finalizing(void);
|
||||||
static int unicode_is_singleton(PyObject *unicode);
|
static int unicode_is_singleton(PyObject *unicode);
|
||||||
@@ -242,7 +200,8 @@ static int unicode_is_singleton(PyObject *unicode);
|
|||||||
|
|
||||||
|
|
||||||
// Return a reference to the immortal empty string singleton.
|
// Return a reference to the immortal empty string singleton.
|
||||||
static inline PyObject* unicode_get_empty(void)
|
PyObject*
|
||||||
|
_PyUnicode_GetEmpty(void)
|
||||||
{
|
{
|
||||||
_Py_DECLARE_STR(empty, "");
|
_Py_DECLARE_STR(empty, "");
|
||||||
return &_Py_STR(empty);
|
return &_Py_STR(empty);
|
||||||
@@ -416,7 +375,7 @@ static void clear_global_interned_strings(void)
|
|||||||
|
|
||||||
#define _Py_RETURN_UNICODE_EMPTY() \
|
#define _Py_RETURN_UNICODE_EMPTY() \
|
||||||
do { \
|
do { \
|
||||||
return unicode_get_empty(); \
|
return _PyUnicode_GetEmpty();\
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
@@ -748,14 +707,14 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
|
|||||||
#undef CHECK
|
#undef CHECK
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
PyObject*
|
||||||
unicode_result(PyObject *unicode)
|
_PyUnicode_Result(PyObject *unicode)
|
||||||
{
|
{
|
||||||
assert(_PyUnicode_CHECK(unicode));
|
assert(_PyUnicode_CHECK(unicode));
|
||||||
|
|
||||||
Py_ssize_t length = PyUnicode_GET_LENGTH(unicode);
|
Py_ssize_t length = PyUnicode_GET_LENGTH(unicode);
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
PyObject *empty = unicode_get_empty();
|
PyObject *empty = _PyUnicode_GetEmpty();
|
||||||
if (unicode != empty) {
|
if (unicode != empty) {
|
||||||
Py_DECREF(unicode);
|
Py_DECREF(unicode);
|
||||||
}
|
}
|
||||||
@@ -778,6 +737,7 @@ unicode_result(PyObject *unicode)
|
|||||||
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
||||||
return unicode;
|
return unicode;
|
||||||
}
|
}
|
||||||
|
#define unicode_result _PyUnicode_Result
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
unicode_result_unchanged(PyObject *unicode)
|
unicode_result_unchanged(PyObject *unicode)
|
||||||
@@ -985,7 +945,7 @@ make_bloom_mask(int kind, const void* ptr, Py_ssize_t len)
|
|||||||
|
|
||||||
/* Compilation of templated routines */
|
/* Compilation of templated routines */
|
||||||
|
|
||||||
#define STRINGLIB_GET_EMPTY() unicode_get_empty()
|
#define STRINGLIB_GET_EMPTY() _PyUnicode_GetEmpty()
|
||||||
|
|
||||||
#include "stringlib/asciilib.h"
|
#include "stringlib/asciilib.h"
|
||||||
#include "stringlib/fastsearch.h"
|
#include "stringlib/fastsearch.h"
|
||||||
@@ -1097,8 +1057,8 @@ resize_copy(PyObject *unicode, Py_ssize_t length)
|
|||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
PyObject*
|
||||||
resize_compact(PyObject *unicode, Py_ssize_t length)
|
_PyUnicode_ResizeCompact(PyObject *unicode, Py_ssize_t length)
|
||||||
{
|
{
|
||||||
Py_ssize_t char_size;
|
Py_ssize_t char_size;
|
||||||
Py_ssize_t struct_size;
|
Py_ssize_t struct_size;
|
||||||
@@ -1306,7 +1266,7 @@ PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar)
|
|||||||
{
|
{
|
||||||
/* Optimization for empty strings */
|
/* Optimization for empty strings */
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
return unicode_get_empty();
|
return _PyUnicode_GetEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *obj;
|
PyObject *obj;
|
||||||
@@ -1799,7 +1759,7 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
if (length == 0) {
|
if (length == 0) {
|
||||||
PyObject *empty = unicode_get_empty();
|
PyObject *empty = _PyUnicode_GetEmpty();
|
||||||
Py_SETREF(*p_unicode, empty);
|
Py_SETREF(*p_unicode, empty);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -1813,7 +1773,7 @@ unicode_resize(PyObject **p_unicode, Py_ssize_t length)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (PyUnicode_IS_COMPACT(unicode)) {
|
if (PyUnicode_IS_COMPACT(unicode)) {
|
||||||
PyObject *new_unicode = resize_compact(unicode, length);
|
PyObject *new_unicode = _PyUnicode_ResizeCompact(unicode, length);
|
||||||
if (new_unicode == NULL)
|
if (new_unicode == NULL)
|
||||||
return -1;
|
return -1;
|
||||||
*p_unicode = new_unicode;
|
*p_unicode = new_unicode;
|
||||||
@@ -1839,58 +1799,6 @@ PyUnicode_Resize(PyObject **p_unicode, Py_ssize_t length)
|
|||||||
return unicode_resize(p_unicode, length);
|
return unicode_resize(p_unicode, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Copy an ASCII or latin1 char* string into a Python Unicode string.
|
|
||||||
|
|
||||||
WARNING: The function doesn't copy the terminating null character and
|
|
||||||
doesn't check the maximum character (may write a latin1 character in an
|
|
||||||
ASCII string). */
|
|
||||||
static void
|
|
||||||
unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
|
|
||||||
const char *str, Py_ssize_t len)
|
|
||||||
{
|
|
||||||
int kind = PyUnicode_KIND(unicode);
|
|
||||||
const void *data = PyUnicode_DATA(unicode);
|
|
||||||
const char *end = str + len;
|
|
||||||
|
|
||||||
assert(index + len <= PyUnicode_GET_LENGTH(unicode));
|
|
||||||
switch (kind) {
|
|
||||||
case PyUnicode_1BYTE_KIND: {
|
|
||||||
#ifdef Py_DEBUG
|
|
||||||
if (PyUnicode_IS_ASCII(unicode)) {
|
|
||||||
Py_UCS4 maxchar = ucs1lib_find_max_char(
|
|
||||||
(const Py_UCS1*)str,
|
|
||||||
(const Py_UCS1*)str + len);
|
|
||||||
assert(maxchar < 128);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
memcpy((char *) data + index, str, len);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case PyUnicode_2BYTE_KIND: {
|
|
||||||
Py_UCS2 *start = (Py_UCS2 *)data + index;
|
|
||||||
Py_UCS2 *ucs2 = start;
|
|
||||||
|
|
||||||
for (; str < end; ++ucs2, ++str)
|
|
||||||
*ucs2 = (Py_UCS2)*str;
|
|
||||||
|
|
||||||
assert((ucs2 - start) <= PyUnicode_GET_LENGTH(unicode));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case PyUnicode_4BYTE_KIND: {
|
|
||||||
Py_UCS4 *start = (Py_UCS4 *)data + index;
|
|
||||||
Py_UCS4 *ucs4 = start;
|
|
||||||
|
|
||||||
for (; str < end; ++ucs4, ++str)
|
|
||||||
*ucs4 = (Py_UCS4)*str;
|
|
||||||
|
|
||||||
assert((ucs4 - start) <= PyUnicode_GET_LENGTH(unicode));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
Py_UNREACHABLE();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
get_latin1_char(Py_UCS1 ch)
|
get_latin1_char(Py_UCS1 ch)
|
||||||
{
|
{
|
||||||
@@ -2105,7 +2013,7 @@ PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size)
|
|||||||
"NULL string with positive size with NULL passed to PyUnicode_FromStringAndSize");
|
"NULL string with positive size with NULL passed to PyUnicode_FromStringAndSize");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return unicode_get_empty();
|
return _PyUnicode_GetEmpty();
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *
|
PyObject *
|
||||||
@@ -2672,8 +2580,8 @@ unicode_fromformat_write_utf8(_PyUnicodeWriter *writer, const char *str,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (width < 0) {
|
if (width < 0) {
|
||||||
return unicode_decode_utf8_writer(writer, str, length,
|
return _PyUnicode_DecodeUTF8Writer(writer, str, length,
|
||||||
_Py_ERROR_REPLACE, "replace", pconsumed);
|
_Py_ERROR_REPLACE, "replace", pconsumed);
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *unicode = PyUnicode_DecodeUTF8Stateful(str, length,
|
PyObject *unicode = PyUnicode_DecodeUTF8Stateful(str, length,
|
||||||
@@ -5424,11 +5332,11 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
|
|||||||
|
|
||||||
|
|
||||||
// Used by PyUnicodeWriter_WriteUTF8() implementation
|
// Used by PyUnicodeWriter_WriteUTF8() implementation
|
||||||
static int
|
int
|
||||||
unicode_decode_utf8_writer(_PyUnicodeWriter *writer,
|
_PyUnicode_DecodeUTF8Writer(_PyUnicodeWriter *writer,
|
||||||
const char *s, Py_ssize_t size,
|
const char *s, Py_ssize_t size,
|
||||||
_Py_error_handler error_handler, const char *errors,
|
_Py_error_handler error_handler, const char *errors,
|
||||||
Py_ssize_t *consumed)
|
Py_ssize_t *consumed)
|
||||||
{
|
{
|
||||||
if (size == 0) {
|
if (size == 0) {
|
||||||
if (consumed) {
|
if (consumed) {
|
||||||
@@ -10766,7 +10674,7 @@ replace(PyObject *self, PyObject *str1,
|
|||||||
}
|
}
|
||||||
new_size = slen + n * (len2 - len1);
|
new_size = slen + n * (len2 - len1);
|
||||||
if (new_size == 0) {
|
if (new_size == 0) {
|
||||||
u = unicode_get_empty();
|
u = _PyUnicode_GetEmpty();
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
if (new_size > (PY_SSIZE_T_MAX / rkind)) {
|
if (new_size > (PY_SSIZE_T_MAX / rkind)) {
|
||||||
@@ -11439,7 +11347,7 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Shortcuts */
|
/* Shortcuts */
|
||||||
PyObject *empty = unicode_get_empty(); // Borrowed reference
|
PyObject *empty = _PyUnicode_GetEmpty(); // Borrowed reference
|
||||||
if (left == empty) {
|
if (left == empty) {
|
||||||
return PyUnicode_FromObject(right);
|
return PyUnicode_FromObject(right);
|
||||||
}
|
}
|
||||||
@@ -11491,7 +11399,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Shortcuts */
|
/* Shortcuts */
|
||||||
PyObject *empty = unicode_get_empty(); // Borrowed reference
|
PyObject *empty = _PyUnicode_GetEmpty(); // Borrowed reference
|
||||||
if (left == empty) {
|
if (left == empty) {
|
||||||
Py_DECREF(left);
|
Py_DECREF(left);
|
||||||
*p_left = Py_NewRef(right);
|
*p_left = Py_NewRef(right);
|
||||||
@@ -12987,7 +12895,7 @@ PyUnicode_Partition(PyObject *str_obj, PyObject *sep_obj)
|
|||||||
len1 = PyUnicode_GET_LENGTH(str_obj);
|
len1 = PyUnicode_GET_LENGTH(str_obj);
|
||||||
len2 = PyUnicode_GET_LENGTH(sep_obj);
|
len2 = PyUnicode_GET_LENGTH(sep_obj);
|
||||||
if (kind1 < kind2 || len1 < len2) {
|
if (kind1 < kind2 || len1 < len2) {
|
||||||
PyObject *empty = unicode_get_empty(); // Borrowed reference
|
PyObject *empty = _PyUnicode_GetEmpty(); // Borrowed reference
|
||||||
return PyTuple_Pack(3, str_obj, empty, empty);
|
return PyTuple_Pack(3, str_obj, empty, empty);
|
||||||
}
|
}
|
||||||
buf1 = PyUnicode_DATA(str_obj);
|
buf1 = PyUnicode_DATA(str_obj);
|
||||||
@@ -13039,7 +12947,7 @@ PyUnicode_RPartition(PyObject *str_obj, PyObject *sep_obj)
|
|||||||
len1 = PyUnicode_GET_LENGTH(str_obj);
|
len1 = PyUnicode_GET_LENGTH(str_obj);
|
||||||
len2 = PyUnicode_GET_LENGTH(sep_obj);
|
len2 = PyUnicode_GET_LENGTH(sep_obj);
|
||||||
if (kind1 < kind2 || len1 < len2) {
|
if (kind1 < kind2 || len1 < len2) {
|
||||||
PyObject *empty = unicode_get_empty(); // Borrowed reference
|
PyObject *empty = _PyUnicode_GetEmpty(); // Borrowed reference
|
||||||
return PyTuple_Pack(3, empty, empty, str_obj);
|
return PyTuple_Pack(3, empty, empty, str_obj);
|
||||||
}
|
}
|
||||||
buf1 = PyUnicode_DATA(str_obj);
|
buf1 = PyUnicode_DATA(str_obj);
|
||||||
@@ -13518,523 +13426,6 @@ unicode_endswith_impl(PyObject *self, PyObject *subobj, Py_ssize_t start,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline void
|
|
||||||
_PyUnicodeWriter_Update(_PyUnicodeWriter *writer)
|
|
||||||
{
|
|
||||||
writer->maxchar = PyUnicode_MAX_CHAR_VALUE(writer->buffer);
|
|
||||||
writer->data = PyUnicode_DATA(writer->buffer);
|
|
||||||
|
|
||||||
if (!writer->readonly) {
|
|
||||||
writer->kind = PyUnicode_KIND(writer->buffer);
|
|
||||||
writer->size = PyUnicode_GET_LENGTH(writer->buffer);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* use a value smaller than PyUnicode_1BYTE_KIND() so
|
|
||||||
_PyUnicodeWriter_PrepareKind() will copy the buffer. */
|
|
||||||
writer->kind = 0;
|
|
||||||
assert(writer->kind <= PyUnicode_1BYTE_KIND);
|
|
||||||
|
|
||||||
/* Copy-on-write mode: set buffer size to 0 so
|
|
||||||
* _PyUnicodeWriter_Prepare() will copy (and enlarge) the buffer on
|
|
||||||
* next write. */
|
|
||||||
writer->size = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
_PyUnicodeWriter_Init(_PyUnicodeWriter *writer)
|
|
||||||
{
|
|
||||||
memset(writer, 0, sizeof(*writer));
|
|
||||||
|
|
||||||
/* ASCII is the bare minimum */
|
|
||||||
writer->min_char = 127;
|
|
||||||
|
|
||||||
/* use a kind value smaller than PyUnicode_1BYTE_KIND so
|
|
||||||
_PyUnicodeWriter_PrepareKind() will copy the buffer. */
|
|
||||||
assert(writer->kind == 0);
|
|
||||||
assert(writer->kind < PyUnicode_1BYTE_KIND);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
PyUnicodeWriter*
|
|
||||||
PyUnicodeWriter_Create(Py_ssize_t length)
|
|
||||||
{
|
|
||||||
if (length < 0) {
|
|
||||||
PyErr_SetString(PyExc_ValueError,
|
|
||||||
"length must be positive");
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
const size_t size = sizeof(_PyUnicodeWriter);
|
|
||||||
PyUnicodeWriter *pub_writer;
|
|
||||||
pub_writer = _Py_FREELIST_POP_MEM(unicode_writers);
|
|
||||||
if (pub_writer == NULL) {
|
|
||||||
pub_writer = (PyUnicodeWriter *)PyMem_Malloc(size);
|
|
||||||
if (pub_writer == NULL) {
|
|
||||||
return (PyUnicodeWriter *)PyErr_NoMemory();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_PyUnicodeWriter *writer = (_PyUnicodeWriter *)pub_writer;
|
|
||||||
|
|
||||||
_PyUnicodeWriter_Init(writer);
|
|
||||||
if (_PyUnicodeWriter_Prepare(writer, length, 127) < 0) {
|
|
||||||
PyUnicodeWriter_Discard(pub_writer);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
writer->overallocate = 1;
|
|
||||||
|
|
||||||
return pub_writer;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void PyUnicodeWriter_Discard(PyUnicodeWriter *writer)
|
|
||||||
{
|
|
||||||
if (writer == NULL) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
_PyUnicodeWriter_Dealloc((_PyUnicodeWriter*)writer);
|
|
||||||
_Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Initialize _PyUnicodeWriter with initial buffer
|
|
||||||
static inline void
|
|
||||||
_PyUnicodeWriter_InitWithBuffer(_PyUnicodeWriter *writer, PyObject *buffer)
|
|
||||||
{
|
|
||||||
memset(writer, 0, sizeof(*writer));
|
|
||||||
writer->buffer = buffer;
|
|
||||||
_PyUnicodeWriter_Update(writer);
|
|
||||||
writer->min_length = writer->size;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
|
|
||||||
Py_ssize_t length, Py_UCS4 maxchar)
|
|
||||||
{
|
|
||||||
Py_ssize_t newlen;
|
|
||||||
PyObject *newbuffer;
|
|
||||||
|
|
||||||
assert(length >= 0);
|
|
||||||
assert(maxchar <= MAX_UNICODE);
|
|
||||||
|
|
||||||
/* ensure that the _PyUnicodeWriter_Prepare macro was used */
|
|
||||||
assert((maxchar > writer->maxchar && length >= 0)
|
|
||||||
|| length > 0);
|
|
||||||
|
|
||||||
if (length > PY_SSIZE_T_MAX - writer->pos) {
|
|
||||||
PyErr_NoMemory();
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
newlen = writer->pos + length;
|
|
||||||
|
|
||||||
maxchar = Py_MAX(maxchar, writer->min_char);
|
|
||||||
|
|
||||||
if (writer->buffer == NULL) {
|
|
||||||
assert(!writer->readonly);
|
|
||||||
if (writer->overallocate
|
|
||||||
&& newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
|
|
||||||
/* overallocate to limit the number of realloc() */
|
|
||||||
newlen += newlen / OVERALLOCATE_FACTOR;
|
|
||||||
}
|
|
||||||
if (newlen < writer->min_length)
|
|
||||||
newlen = writer->min_length;
|
|
||||||
|
|
||||||
writer->buffer = PyUnicode_New(newlen, maxchar);
|
|
||||||
if (writer->buffer == NULL)
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
else if (newlen > writer->size) {
|
|
||||||
if (writer->overallocate
|
|
||||||
&& newlen <= (PY_SSIZE_T_MAX - newlen / OVERALLOCATE_FACTOR)) {
|
|
||||||
/* overallocate to limit the number of realloc() */
|
|
||||||
newlen += newlen / OVERALLOCATE_FACTOR;
|
|
||||||
}
|
|
||||||
if (newlen < writer->min_length)
|
|
||||||
newlen = writer->min_length;
|
|
||||||
|
|
||||||
if (maxchar > writer->maxchar || writer->readonly) {
|
|
||||||
/* resize + widen */
|
|
||||||
maxchar = Py_MAX(maxchar, writer->maxchar);
|
|
||||||
newbuffer = PyUnicode_New(newlen, maxchar);
|
|
||||||
if (newbuffer == NULL)
|
|
||||||
return -1;
|
|
||||||
_PyUnicode_FastCopyCharacters(newbuffer, 0,
|
|
||||||
writer->buffer, 0, writer->pos);
|
|
||||||
Py_DECREF(writer->buffer);
|
|
||||||
writer->readonly = 0;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
newbuffer = resize_compact(writer->buffer, newlen);
|
|
||||||
if (newbuffer == NULL)
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
writer->buffer = newbuffer;
|
|
||||||
}
|
|
||||||
else if (maxchar > writer->maxchar) {
|
|
||||||
assert(!writer->readonly);
|
|
||||||
newbuffer = PyUnicode_New(writer->size, maxchar);
|
|
||||||
if (newbuffer == NULL)
|
|
||||||
return -1;
|
|
||||||
_PyUnicode_FastCopyCharacters(newbuffer, 0,
|
|
||||||
writer->buffer, 0, writer->pos);
|
|
||||||
Py_SETREF(writer->buffer, newbuffer);
|
|
||||||
}
|
|
||||||
_PyUnicodeWriter_Update(writer);
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
#undef OVERALLOCATE_FACTOR
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
|
|
||||||
int kind)
|
|
||||||
{
|
|
||||||
Py_UCS4 maxchar;
|
|
||||||
|
|
||||||
/* ensure that the _PyUnicodeWriter_PrepareKind macro was used */
|
|
||||||
assert(writer->kind < kind);
|
|
||||||
|
|
||||||
switch (kind)
|
|
||||||
{
|
|
||||||
case PyUnicode_1BYTE_KIND: maxchar = 0xff; break;
|
|
||||||
case PyUnicode_2BYTE_KIND: maxchar = 0xffff; break;
|
|
||||||
case PyUnicode_4BYTE_KIND: maxchar = MAX_UNICODE; break;
|
|
||||||
default:
|
|
||||||
Py_UNREACHABLE();
|
|
||||||
}
|
|
||||||
|
|
||||||
return _PyUnicodeWriter_PrepareInternal(writer, 0, maxchar);
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, Py_UCS4 ch)
|
|
||||||
{
|
|
||||||
return _PyUnicodeWriter_WriteCharInline(writer, ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
PyUnicodeWriter_WriteChar(PyUnicodeWriter *writer, Py_UCS4 ch)
|
|
||||||
{
|
|
||||||
if (ch > MAX_UNICODE) {
|
|
||||||
PyErr_SetString(PyExc_ValueError,
|
|
||||||
"character must be in range(0x110000)");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return _PyUnicodeWriter_WriteChar((_PyUnicodeWriter*)writer, ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
|
|
||||||
{
|
|
||||||
assert(PyUnicode_Check(str));
|
|
||||||
|
|
||||||
Py_UCS4 maxchar;
|
|
||||||
Py_ssize_t len;
|
|
||||||
|
|
||||||
len = PyUnicode_GET_LENGTH(str);
|
|
||||||
if (len == 0)
|
|
||||||
return 0;
|
|
||||||
maxchar = PyUnicode_MAX_CHAR_VALUE(str);
|
|
||||||
if (maxchar > writer->maxchar || len > writer->size - writer->pos) {
|
|
||||||
if (writer->buffer == NULL && !writer->overallocate) {
|
|
||||||
assert(_PyUnicode_CheckConsistency(str, 1));
|
|
||||||
writer->readonly = 1;
|
|
||||||
writer->buffer = Py_NewRef(str);
|
|
||||||
_PyUnicodeWriter_Update(writer);
|
|
||||||
writer->pos += len;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (_PyUnicodeWriter_PrepareInternal(writer, len, maxchar) == -1)
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
|
|
||||||
str, 0, len);
|
|
||||||
writer->pos += len;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
|
||||||
PyUnicodeWriter_WriteStr(PyUnicodeWriter *writer, PyObject *obj)
|
|
||||||
{
|
|
||||||
PyTypeObject *type = Py_TYPE(obj);
|
|
||||||
if (type == &PyUnicode_Type) {
|
|
||||||
return _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, obj);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (type == &PyLong_Type) {
|
|
||||||
return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
PyObject *str = PyObject_Str(obj);
|
|
||||||
if (str == NULL) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, str);
|
|
||||||
Py_DECREF(str);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
PyUnicodeWriter_WriteRepr(PyUnicodeWriter *writer, PyObject *obj)
|
|
||||||
{
|
|
||||||
if (Py_TYPE(obj) == &PyLong_Type) {
|
|
||||||
return _PyLong_FormatWriter((_PyUnicodeWriter*)writer, obj, 10, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
PyObject *repr = PyObject_Repr(obj);
|
|
||||||
if (repr == NULL) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
int res = _PyUnicodeWriter_WriteStr((_PyUnicodeWriter*)writer, repr);
|
|
||||||
Py_DECREF(repr);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
|
|
||||||
Py_ssize_t start, Py_ssize_t end)
|
|
||||||
{
|
|
||||||
assert(0 <= start);
|
|
||||||
assert(end <= PyUnicode_GET_LENGTH(str));
|
|
||||||
assert(start <= end);
|
|
||||||
|
|
||||||
if (start == 0 && end == PyUnicode_GET_LENGTH(str))
|
|
||||||
return _PyUnicodeWriter_WriteStr(writer, str);
|
|
||||||
|
|
||||||
Py_ssize_t len = end - start;
|
|
||||||
if (len == 0) {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
Py_UCS4 maxchar;
|
|
||||||
if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar) {
|
|
||||||
maxchar = _PyUnicode_FindMaxChar(str, start, end);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
maxchar = writer->maxchar;
|
|
||||||
}
|
|
||||||
if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
|
|
||||||
str, start, len);
|
|
||||||
writer->pos += len;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
PyUnicodeWriter_WriteSubstring(PyUnicodeWriter *writer, PyObject *str,
|
|
||||||
Py_ssize_t start, Py_ssize_t end)
|
|
||||||
{
|
|
||||||
if (!PyUnicode_Check(str)) {
|
|
||||||
PyErr_Format(PyExc_TypeError, "expect str, not %T", str);
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (start < 0 || start > end) {
|
|
||||||
PyErr_Format(PyExc_ValueError, "invalid start argument");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (end > PyUnicode_GET_LENGTH(str)) {
|
|
||||||
PyErr_Format(PyExc_ValueError, "invalid end argument");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return _PyUnicodeWriter_WriteSubstring((_PyUnicodeWriter*)writer, str,
|
|
||||||
start, end);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
|
|
||||||
const char *ascii, Py_ssize_t len)
|
|
||||||
{
|
|
||||||
if (len == -1)
|
|
||||||
len = strlen(ascii);
|
|
||||||
|
|
||||||
assert(ucs1lib_find_max_char((const Py_UCS1*)ascii, (const Py_UCS1*)ascii + len) < 128);
|
|
||||||
|
|
||||||
if (writer->buffer == NULL && !writer->overallocate) {
|
|
||||||
PyObject *str;
|
|
||||||
|
|
||||||
str = _PyUnicode_FromASCII(ascii, len);
|
|
||||||
if (str == NULL)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
writer->readonly = 1;
|
|
||||||
writer->buffer = str;
|
|
||||||
_PyUnicodeWriter_Update(writer);
|
|
||||||
writer->pos += len;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (_PyUnicodeWriter_Prepare(writer, len, 127) == -1)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
switch (writer->kind)
|
|
||||||
{
|
|
||||||
case PyUnicode_1BYTE_KIND:
|
|
||||||
{
|
|
||||||
const Py_UCS1 *str = (const Py_UCS1 *)ascii;
|
|
||||||
Py_UCS1 *data = writer->data;
|
|
||||||
|
|
||||||
memcpy(data + writer->pos, str, len);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case PyUnicode_2BYTE_KIND:
|
|
||||||
{
|
|
||||||
_PyUnicode_CONVERT_BYTES(
|
|
||||||
Py_UCS1, Py_UCS2,
|
|
||||||
ascii, ascii + len,
|
|
||||||
(Py_UCS2 *)writer->data + writer->pos);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case PyUnicode_4BYTE_KIND:
|
|
||||||
{
|
|
||||||
_PyUnicode_CONVERT_BYTES(
|
|
||||||
Py_UCS1, Py_UCS4,
|
|
||||||
ascii, ascii + len,
|
|
||||||
(Py_UCS4 *)writer->data + writer->pos);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
Py_UNREACHABLE();
|
|
||||||
}
|
|
||||||
|
|
||||||
writer->pos += len;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
PyUnicodeWriter_WriteASCII(PyUnicodeWriter *writer,
|
|
||||||
const char *str,
|
|
||||||
Py_ssize_t size)
|
|
||||||
{
|
|
||||||
assert(writer != NULL);
|
|
||||||
_Py_AssertHoldsTstate();
|
|
||||||
|
|
||||||
_PyUnicodeWriter *priv_writer = (_PyUnicodeWriter*)writer;
|
|
||||||
return _PyUnicodeWriter_WriteASCIIString(priv_writer, str, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
PyUnicodeWriter_WriteUTF8(PyUnicodeWriter *writer,
|
|
||||||
const char *str,
|
|
||||||
Py_ssize_t size)
|
|
||||||
{
|
|
||||||
if (size < 0) {
|
|
||||||
size = strlen(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
_PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
|
|
||||||
Py_ssize_t old_pos = _writer->pos;
|
|
||||||
int res = unicode_decode_utf8_writer(_writer, str, size,
|
|
||||||
_Py_ERROR_STRICT, NULL, NULL);
|
|
||||||
if (res < 0) {
|
|
||||||
_writer->pos = old_pos;
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
PyUnicodeWriter_DecodeUTF8Stateful(PyUnicodeWriter *writer,
|
|
||||||
const char *string,
|
|
||||||
Py_ssize_t length,
|
|
||||||
const char *errors,
|
|
||||||
Py_ssize_t *consumed)
|
|
||||||
{
|
|
||||||
if (length < 0) {
|
|
||||||
length = strlen(string);
|
|
||||||
}
|
|
||||||
|
|
||||||
_PyUnicodeWriter *_writer = (_PyUnicodeWriter*)writer;
|
|
||||||
Py_ssize_t old_pos = _writer->pos;
|
|
||||||
int res = unicode_decode_utf8_writer(_writer, string, length,
|
|
||||||
_Py_ERROR_UNKNOWN, errors, consumed);
|
|
||||||
if (res < 0) {
|
|
||||||
_writer->pos = old_pos;
|
|
||||||
if (consumed) {
|
|
||||||
*consumed = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int
|
|
||||||
_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
|
|
||||||
const char *str, Py_ssize_t len)
|
|
||||||
{
|
|
||||||
Py_UCS4 maxchar;
|
|
||||||
|
|
||||||
maxchar = ucs1lib_find_max_char((const Py_UCS1*)str, (const Py_UCS1*)str + len);
|
|
||||||
if (_PyUnicodeWriter_Prepare(writer, len, maxchar) == -1)
|
|
||||||
return -1;
|
|
||||||
unicode_write_cstr(writer->buffer, writer->pos, str, len);
|
|
||||||
writer->pos += len;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyObject *
|
|
||||||
_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer)
|
|
||||||
{
|
|
||||||
PyObject *str;
|
|
||||||
|
|
||||||
if (writer->pos == 0) {
|
|
||||||
Py_CLEAR(writer->buffer);
|
|
||||||
_Py_RETURN_UNICODE_EMPTY();
|
|
||||||
}
|
|
||||||
|
|
||||||
str = writer->buffer;
|
|
||||||
writer->buffer = NULL;
|
|
||||||
|
|
||||||
if (writer->readonly) {
|
|
||||||
assert(PyUnicode_GET_LENGTH(str) == writer->pos);
|
|
||||||
return str;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (PyUnicode_GET_LENGTH(str) != writer->pos) {
|
|
||||||
PyObject *str2;
|
|
||||||
str2 = resize_compact(str, writer->pos);
|
|
||||||
if (str2 == NULL) {
|
|
||||||
Py_DECREF(str);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
str = str2;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(_PyUnicode_CheckConsistency(str, 1));
|
|
||||||
return unicode_result(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
PyObject*
|
|
||||||
PyUnicodeWriter_Finish(PyUnicodeWriter *writer)
|
|
||||||
{
|
|
||||||
PyObject *str = _PyUnicodeWriter_Finish((_PyUnicodeWriter*)writer);
|
|
||||||
assert(((_PyUnicodeWriter*)writer)->buffer == NULL);
|
|
||||||
_Py_FREELIST_FREE(unicode_writers, writer, PyMem_Free);
|
|
||||||
return str;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
|
||||||
_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer)
|
|
||||||
{
|
|
||||||
Py_CLEAR(writer->buffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
#include "stringlib/unicode_format.h"
|
#include "stringlib/unicode_format.h"
|
||||||
|
|
||||||
PyDoc_STRVAR(format__doc__,
|
PyDoc_STRVAR(format__doc__,
|
||||||
@@ -14456,7 +13847,7 @@ unicode_new_impl(PyTypeObject *type, PyObject *x, const char *encoding,
|
|||||||
{
|
{
|
||||||
PyObject *unicode;
|
PyObject *unicode;
|
||||||
if (x == NULL) {
|
if (x == NULL) {
|
||||||
unicode = unicode_get_empty();
|
unicode = _PyUnicode_GetEmpty();
|
||||||
}
|
}
|
||||||
else if (encoding == NULL && errors == NULL) {
|
else if (encoding == NULL && errors == NULL) {
|
||||||
unicode = PyObject_Str(x);
|
unicode = PyObject_Str(x);
|
||||||
@@ -14510,7 +13901,7 @@ unicode_vectorcall(PyObject *type, PyObject *const *args,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
if (nargs == 0) {
|
if (nargs == 0) {
|
||||||
return unicode_get_empty();
|
return _PyUnicode_GetEmpty();
|
||||||
}
|
}
|
||||||
PyObject *object = args[0];
|
PyObject *object = args[0];
|
||||||
if (nargs == 1) {
|
if (nargs == 1) {
|
||||||
@@ -15186,7 +14577,7 @@ unicodeiter_reduce(PyObject *op, PyObject *Py_UNUSED(ignored))
|
|||||||
if (it->it_seq != NULL) {
|
if (it->it_seq != NULL) {
|
||||||
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
|
return Py_BuildValue("N(O)n", iter, it->it_seq, it->it_index);
|
||||||
} else {
|
} else {
|
||||||
PyObject *u = unicode_get_empty();
|
PyObject *u = _PyUnicode_GetEmpty();
|
||||||
if (u == NULL) {
|
if (u == NULL) {
|
||||||
Py_XDECREF(iter);
|
Py_XDECREF(iter);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|||||||
@@ -167,6 +167,7 @@
|
|||||||
<ClCompile Include="..\Objects\unicode_format.c" />
|
<ClCompile Include="..\Objects\unicode_format.c" />
|
||||||
<ClCompile Include="..\Objects\unicodectype.c" />
|
<ClCompile Include="..\Objects\unicodectype.c" />
|
||||||
<ClCompile Include="..\Objects\unicode_formatter.c" />
|
<ClCompile Include="..\Objects\unicode_formatter.c" />
|
||||||
|
<ClCompile Include="..\Objects\unicode_writer.c" />
|
||||||
<ClCompile Include="..\Objects\unicodeobject.c" />
|
<ClCompile Include="..\Objects\unicodeobject.c" />
|
||||||
<ClCompile Include="..\Objects\unionobject.c" />
|
<ClCompile Include="..\Objects\unionobject.c" />
|
||||||
<ClCompile Include="..\Objects\weakrefobject.c" />
|
<ClCompile Include="..\Objects\weakrefobject.c" />
|
||||||
|
|||||||
@@ -490,6 +490,9 @@
|
|||||||
<ClCompile Include="..\Objects\unicode_formatter.c">
|
<ClCompile Include="..\Objects\unicode_formatter.c">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\Objects\unicode_writer.c">
|
||||||
|
<Filter>Source Files</Filter>
|
||||||
|
</ClCompile>
|
||||||
<ClCompile Include="..\Objects\unicodeobject.c">
|
<ClCompile Include="..\Objects\unicodeobject.c">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
|||||||
@@ -562,6 +562,7 @@
|
|||||||
<ClCompile Include="..\Objects\unicode_format.c" />
|
<ClCompile Include="..\Objects\unicode_format.c" />
|
||||||
<ClCompile Include="..\Objects\unicodectype.c" />
|
<ClCompile Include="..\Objects\unicodectype.c" />
|
||||||
<ClCompile Include="..\Objects\unicode_formatter.c" />
|
<ClCompile Include="..\Objects\unicode_formatter.c" />
|
||||||
|
<ClCompile Include="..\Objects\unicode_writer.c" />
|
||||||
<ClCompile Include="..\Objects\unicodeobject.c" />
|
<ClCompile Include="..\Objects\unicodeobject.c" />
|
||||||
<ClCompile Include="..\Objects\unionobject.c" />
|
<ClCompile Include="..\Objects\unionobject.c" />
|
||||||
<ClCompile Include="..\Objects\weakrefobject.c" />
|
<ClCompile Include="..\Objects\weakrefobject.c" />
|
||||||
|
|||||||
@@ -1283,6 +1283,9 @@
|
|||||||
<ClCompile Include="..\Objects\unicode_formatter.c">
|
<ClCompile Include="..\Objects\unicode_formatter.c">
|
||||||
<Filter>Objects</Filter>
|
<Filter>Objects</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="..\Objects\unicode_writer.c">
|
||||||
|
<Filter>Objects</Filter>
|
||||||
|
</ClCompile>
|
||||||
<ClCompile Include="..\Objects\unicodeobject.c">
|
<ClCompile Include="..\Objects\unicodeobject.c">
|
||||||
<Filter>Objects</Filter>
|
<Filter>Objects</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
|||||||
Reference in New Issue
Block a user