Files
cpython/Include/internal/pycore_bytesobject.h
Cody Maloney 732224e113 gh-139871: Add bytearray.take_bytes([n]) to efficiently extract bytes (GH-140128)
Update `bytearray` to contain a `bytes` and provide a zero-copy path to
"extract" the `bytes`. This allows making several code paths more efficient.

This does not move any codepaths to make use of this new API. The documentation
changes include common code patterns which can be made more efficient with
this API.

---

When just changing `bytearray` to contain `bytes` I ran pyperformance on a
`--with-lto --enable-optimizations --with-static-libpython` build and don't see
any major speedups or slowdowns with this; all seems to be in the noise of
my machine (Generally changes under 5% or benchmarks that don't touch
bytes/bytearray).


Co-authored-by: Victor Stinner <vstinner@python.org>
Co-authored-by: Maurycy Pawłowski-Wieroński <5383+maurycy@users.noreply.github.com>
2025-11-13 13:19:44 +00:00

108 lines
3.0 KiB
C

#ifndef Py_INTERNAL_BYTESOBJECT_H
#define Py_INTERNAL_BYTESOBJECT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif
extern PyObject* _PyBytes_FormatEx(
const char *format,
Py_ssize_t format_len,
PyObject *args,
int use_bytearray);
extern PyObject* _PyBytes_FromHex(
PyObject *string,
int use_bytearray);
// Helper for PyBytes_DecodeEscape that detects invalid escape chars.
// Export for test_peg_generator.
PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t,
const char *,
int *, const char **);
// Substring Search.
//
// Returns the index of the first occurrence of
// a substring ("needle") in a larger text ("haystack").
// If the needle is not found, return -1.
// If the needle is found, add offset to the index.
//
// Export for 'mmap' shared extension.
PyAPI_FUNC(Py_ssize_t)
_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack,
const char *needle, Py_ssize_t len_needle,
Py_ssize_t offset);
// Same as above, but search right-to-left.
// Export for 'mmap' shared extension.
PyAPI_FUNC(Py_ssize_t)
_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack,
const char *needle, Py_ssize_t len_needle,
Py_ssize_t offset);
// Helper function to implement the repeat and inplace repeat methods on a
// buffer.
//
// len_dest is assumed to be an integer multiple of len_src.
// If src equals dest, then assume the operation is inplace.
//
// This method repeately doubles the number of bytes copied to reduce
// the number of invocations of memcpy.
//
// Export for 'array' shared extension.
PyAPI_FUNC(void)
_PyBytes_Repeat(char* dest, Py_ssize_t len_dest,
const char* src, Py_ssize_t len_src);
/* _PyBytesObject_SIZE gives the basic size of a bytes object; any memory allocation
for a bytes object of length n should request PyBytesObject_SIZE + n bytes.
Using _PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
3 or 7 bytes per bytes object allocation on a typical system.
*/
#define _PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
/* --- PyBytesWriter ------------------------------------------------------ */
struct PyBytesWriter {
char small_buffer[256];
PyObject *obj;
Py_ssize_t size;
int use_bytearray;
int overallocate;
};
// Export for '_testcapi' shared extension
PyAPI_FUNC(PyBytesWriter*) _PyBytesWriter_CreateByteArray(Py_ssize_t size);
static inline Py_ssize_t
_PyBytesWriter_GetSize(PyBytesWriter *writer)
{
return writer->size;
}
static inline char*
_PyBytesWriter_GetData(PyBytesWriter *writer)
{
if (writer->obj == NULL) {
return writer->small_buffer;
}
else if (writer->use_bytearray) {
return PyByteArray_AS_STRING(writer->obj);
}
else {
return PyBytes_AS_STRING(writer->obj);
}
}
#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_BYTESOBJECT_H */