gh-101659: Isolate "obmalloc" State to Each Interpreter (gh-101660)
This is strictly about moving the "obmalloc" runtime state from `_PyRuntimeState` to `PyInterpreterState`. Doing so improves isolation between interpreters, specifically most of the memory (incl. objects) allocated for each interpreter's use. This is important for a per-interpreter GIL, but such isolation is valuable even without it. FWIW, a per-interpreter obmalloc is the proverbial canary-in-the-coalmine when it comes to the isolation of objects between interpreters. Any object that leaks (unintentionally) to another interpreter is highly likely to cause a crash (on debug builds at least). That's a useful thing to know, relative to interpreter isolation.
This commit is contained in:
@@ -725,20 +725,51 @@ PyObject_Free(void *ptr)
|
||||
static int running_on_valgrind = -1;
|
||||
#endif
|
||||
|
||||
typedef struct _obmalloc_state OMState;
|
||||
|
||||
#define allarenas (_PyRuntime.obmalloc.mgmt.arenas)
|
||||
#define maxarenas (_PyRuntime.obmalloc.mgmt.maxarenas)
|
||||
#define unused_arena_objects (_PyRuntime.obmalloc.mgmt.unused_arena_objects)
|
||||
#define usable_arenas (_PyRuntime.obmalloc.mgmt.usable_arenas)
|
||||
#define nfp2lasta (_PyRuntime.obmalloc.mgmt.nfp2lasta)
|
||||
#define narenas_currently_allocated (_PyRuntime.obmalloc.mgmt.narenas_currently_allocated)
|
||||
#define ntimes_arena_allocated (_PyRuntime.obmalloc.mgmt.ntimes_arena_allocated)
|
||||
#define narenas_highwater (_PyRuntime.obmalloc.mgmt.narenas_highwater)
|
||||
#define raw_allocated_blocks (_PyRuntime.obmalloc.mgmt.raw_allocated_blocks)
|
||||
static inline int
|
||||
has_own_state(PyInterpreterState *interp)
|
||||
{
|
||||
return (_Py_IsMainInterpreter(interp) ||
|
||||
!(interp->feature_flags & Py_RTFLAGS_USE_MAIN_OBMALLOC) ||
|
||||
_Py_IsMainInterpreterFinalizing(interp));
|
||||
}
|
||||
|
||||
static inline OMState *
|
||||
get_state(void)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
if (!has_own_state(interp)) {
|
||||
interp = _PyInterpreterState_Main();
|
||||
}
|
||||
return &interp->obmalloc;
|
||||
}
|
||||
|
||||
// These macros all rely on a local "state" variable.
|
||||
#define usedpools (state->pools.used)
|
||||
#define allarenas (state->mgmt.arenas)
|
||||
#define maxarenas (state->mgmt.maxarenas)
|
||||
#define unused_arena_objects (state->mgmt.unused_arena_objects)
|
||||
#define usable_arenas (state->mgmt.usable_arenas)
|
||||
#define nfp2lasta (state->mgmt.nfp2lasta)
|
||||
#define narenas_currently_allocated (state->mgmt.narenas_currently_allocated)
|
||||
#define ntimes_arena_allocated (state->mgmt.ntimes_arena_allocated)
|
||||
#define narenas_highwater (state->mgmt.narenas_highwater)
|
||||
#define raw_allocated_blocks (state->mgmt.raw_allocated_blocks)
|
||||
|
||||
Py_ssize_t
|
||||
_Py_GetAllocatedBlocks(void)
|
||||
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *interp)
|
||||
{
|
||||
#ifdef Py_DEBUG
|
||||
assert(has_own_state(interp));
|
||||
#else
|
||||
if (!has_own_state(interp)) {
|
||||
_Py_FatalErrorFunc(__func__,
|
||||
"the interpreter doesn't have its own allocator");
|
||||
}
|
||||
#endif
|
||||
OMState *state = &interp->obmalloc;
|
||||
|
||||
Py_ssize_t n = raw_allocated_blocks;
|
||||
/* add up allocated blocks for used pools */
|
||||
for (uint i = 0; i < maxarenas; ++i) {
|
||||
@@ -759,20 +790,100 @@ _Py_GetAllocatedBlocks(void)
|
||||
return n;
|
||||
}
|
||||
|
||||
void
|
||||
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *interp)
|
||||
{
|
||||
if (has_own_state(interp)) {
|
||||
Py_ssize_t leaked = _PyInterpreterState_GetAllocatedBlocks(interp);
|
||||
assert(has_own_state(interp) || leaked == 0);
|
||||
interp->runtime->obmalloc.interpreter_leaks += leaked;
|
||||
}
|
||||
}
|
||||
|
||||
static Py_ssize_t get_num_global_allocated_blocks(_PyRuntimeState *);
|
||||
|
||||
/* We preserve the number of blockss leaked during runtime finalization,
|
||||
so they can be reported if the runtime is initialized again. */
|
||||
// XXX We don't lose any information by dropping this,
|
||||
// so we should consider doing so.
|
||||
static Py_ssize_t last_final_leaks = 0;
|
||||
|
||||
void
|
||||
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *runtime)
|
||||
{
|
||||
last_final_leaks = get_num_global_allocated_blocks(runtime);
|
||||
runtime->obmalloc.interpreter_leaks = 0;
|
||||
}
|
||||
|
||||
static Py_ssize_t
|
||||
get_num_global_allocated_blocks(_PyRuntimeState *runtime)
|
||||
{
|
||||
Py_ssize_t total = 0;
|
||||
if (_PyRuntimeState_GetFinalizing(runtime) != NULL) {
|
||||
PyInterpreterState *interp = _PyInterpreterState_Main();
|
||||
if (interp == NULL) {
|
||||
/* We are at the very end of runtime finalization.
|
||||
We can't rely on finalizing->interp since that thread
|
||||
state is probably already freed, so we don't worry
|
||||
about it. */
|
||||
assert(PyInterpreterState_Head() == NULL);
|
||||
}
|
||||
else {
|
||||
assert(interp != NULL);
|
||||
/* It is probably the last interpreter but not necessarily. */
|
||||
assert(PyInterpreterState_Next(interp) == NULL);
|
||||
total += _PyInterpreterState_GetAllocatedBlocks(interp);
|
||||
}
|
||||
}
|
||||
else {
|
||||
HEAD_LOCK(runtime);
|
||||
PyInterpreterState *interp = PyInterpreterState_Head();
|
||||
assert(interp != NULL);
|
||||
#ifdef Py_DEBUG
|
||||
int got_main = 0;
|
||||
#endif
|
||||
for (; interp != NULL; interp = PyInterpreterState_Next(interp)) {
|
||||
#ifdef Py_DEBUG
|
||||
if (_Py_IsMainInterpreter(interp)) {
|
||||
assert(!got_main);
|
||||
got_main = 1;
|
||||
assert(has_own_state(interp));
|
||||
}
|
||||
#endif
|
||||
if (has_own_state(interp)) {
|
||||
total += _PyInterpreterState_GetAllocatedBlocks(interp);
|
||||
}
|
||||
}
|
||||
HEAD_UNLOCK(runtime);
|
||||
#ifdef Py_DEBUG
|
||||
assert(got_main);
|
||||
#endif
|
||||
}
|
||||
total += runtime->obmalloc.interpreter_leaks;
|
||||
total += last_final_leaks;
|
||||
return total;
|
||||
}
|
||||
|
||||
Py_ssize_t
|
||||
_Py_GetGlobalAllocatedBlocks(void)
|
||||
{
|
||||
return get_num_global_allocated_blocks(&_PyRuntime);
|
||||
}
|
||||
|
||||
#if WITH_PYMALLOC_RADIX_TREE
|
||||
/*==========================================================================*/
|
||||
/* radix tree for tracking arena usage. */
|
||||
|
||||
#define arena_map_root (_PyRuntime.obmalloc.usage.arena_map_root)
|
||||
#define arena_map_root (state->usage.arena_map_root)
|
||||
#ifdef USE_INTERIOR_NODES
|
||||
#define arena_map_mid_count (_PyRuntime.obmalloc.usage.arena_map_mid_count)
|
||||
#define arena_map_bot_count (_PyRuntime.obmalloc.usage.arena_map_bot_count)
|
||||
#define arena_map_mid_count (state->usage.arena_map_mid_count)
|
||||
#define arena_map_bot_count (state->usage.arena_map_bot_count)
|
||||
#endif
|
||||
|
||||
/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or
|
||||
* it cannot be created */
|
||||
static Py_ALWAYS_INLINE arena_map_bot_t *
|
||||
arena_map_get(pymem_block *p, int create)
|
||||
arena_map_get(OMState *state, pymem_block *p, int create)
|
||||
{
|
||||
#ifdef USE_INTERIOR_NODES
|
||||
/* sanity check that IGNORE_BITS is correct */
|
||||
@@ -833,11 +944,12 @@ arena_map_get(pymem_block *p, int create)
|
||||
|
||||
/* mark or unmark addresses covered by arena */
|
||||
static int
|
||||
arena_map_mark_used(uintptr_t arena_base, int is_used)
|
||||
arena_map_mark_used(OMState *state, uintptr_t arena_base, int is_used)
|
||||
{
|
||||
/* sanity check that IGNORE_BITS is correct */
|
||||
assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root));
|
||||
arena_map_bot_t *n_hi = arena_map_get((pymem_block *)arena_base, is_used);
|
||||
arena_map_bot_t *n_hi = arena_map_get(
|
||||
state, (pymem_block *)arena_base, is_used);
|
||||
if (n_hi == NULL) {
|
||||
assert(is_used); /* otherwise node should already exist */
|
||||
return 0; /* failed to allocate space for node */
|
||||
@@ -862,7 +974,8 @@ arena_map_mark_used(uintptr_t arena_base, int is_used)
|
||||
* must overflow to 0. However, that would mean arena_base was
|
||||
* "ideal" and we should not be in this case. */
|
||||
assert(arena_base < arena_base_next);
|
||||
arena_map_bot_t *n_lo = arena_map_get((pymem_block *)arena_base_next, is_used);
|
||||
arena_map_bot_t *n_lo = arena_map_get(
|
||||
state, (pymem_block *)arena_base_next, is_used);
|
||||
if (n_lo == NULL) {
|
||||
assert(is_used); /* otherwise should already exist */
|
||||
n_hi->arenas[i3].tail_hi = 0;
|
||||
@@ -877,9 +990,9 @@ arena_map_mark_used(uintptr_t arena_base, int is_used)
|
||||
/* Return true if 'p' is a pointer inside an obmalloc arena.
|
||||
* _PyObject_Free() calls this so it needs to be very fast. */
|
||||
static int
|
||||
arena_map_is_used(pymem_block *p)
|
||||
arena_map_is_used(OMState *state, pymem_block *p)
|
||||
{
|
||||
arena_map_bot_t *n = arena_map_get(p, 0);
|
||||
arena_map_bot_t *n = arena_map_get(state, p, 0);
|
||||
if (n == NULL) {
|
||||
return 0;
|
||||
}
|
||||
@@ -902,7 +1015,7 @@ arena_map_is_used(pymem_block *p)
|
||||
* `usable_arenas` to the return value.
|
||||
*/
|
||||
static struct arena_object*
|
||||
new_arena(void)
|
||||
new_arena(OMState *state)
|
||||
{
|
||||
struct arena_object* arenaobj;
|
||||
uint excess; /* number of bytes above pool alignment */
|
||||
@@ -968,7 +1081,7 @@ new_arena(void)
|
||||
address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE);
|
||||
#if WITH_PYMALLOC_RADIX_TREE
|
||||
if (address != NULL) {
|
||||
if (!arena_map_mark_used((uintptr_t)address, 1)) {
|
||||
if (!arena_map_mark_used(state, (uintptr_t)address, 1)) {
|
||||
/* marking arena in radix tree failed, abort */
|
||||
_PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE);
|
||||
address = NULL;
|
||||
@@ -1011,9 +1124,9 @@ new_arena(void)
|
||||
pymalloc. When the radix tree is used, 'poolp' is unused.
|
||||
*/
|
||||
static bool
|
||||
address_in_range(void *p, poolp Py_UNUSED(pool))
|
||||
address_in_range(OMState *state, void *p, poolp Py_UNUSED(pool))
|
||||
{
|
||||
return arena_map_is_used(p);
|
||||
return arena_map_is_used(state, p);
|
||||
}
|
||||
#else
|
||||
/*
|
||||
@@ -1094,7 +1207,7 @@ extremely desirable that it be this fast.
|
||||
static bool _Py_NO_SANITIZE_ADDRESS
|
||||
_Py_NO_SANITIZE_THREAD
|
||||
_Py_NO_SANITIZE_MEMORY
|
||||
address_in_range(void *p, poolp pool)
|
||||
address_in_range(OMState *state, void *p, poolp pool)
|
||||
{
|
||||
// Since address_in_range may be reading from memory which was not allocated
|
||||
// by Python, it is important that pool->arenaindex is read only once, as
|
||||
@@ -1111,8 +1224,6 @@ address_in_range(void *p, poolp pool)
|
||||
|
||||
/*==========================================================================*/
|
||||
|
||||
#define usedpools (_PyRuntime.obmalloc.pools.used)
|
||||
|
||||
// Called when freelist is exhausted. Extend the freelist if there is
|
||||
// space for a block. Otherwise, remove this pool from usedpools.
|
||||
static void
|
||||
@@ -1138,7 +1249,7 @@ pymalloc_pool_extend(poolp pool, uint size)
|
||||
* This function takes new pool and allocate a block from it.
|
||||
*/
|
||||
static void*
|
||||
allocate_from_new_pool(uint size)
|
||||
allocate_from_new_pool(OMState *state, uint size)
|
||||
{
|
||||
/* There isn't a pool of the right size class immediately
|
||||
* available: use a free pool.
|
||||
@@ -1150,7 +1261,7 @@ allocate_from_new_pool(uint size)
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
usable_arenas = new_arena();
|
||||
usable_arenas = new_arena(state);
|
||||
if (usable_arenas == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
@@ -1274,7 +1385,7 @@ allocate_from_new_pool(uint size)
|
||||
or when the max memory limit has been reached.
|
||||
*/
|
||||
static inline void*
|
||||
pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes)
|
||||
pymalloc_alloc(OMState *state, void *Py_UNUSED(ctx), size_t nbytes)
|
||||
{
|
||||
#ifdef WITH_VALGRIND
|
||||
if (UNLIKELY(running_on_valgrind == -1)) {
|
||||
@@ -1314,7 +1425,7 @@ pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes)
|
||||
/* There isn't a pool of the right size class immediately
|
||||
* available: use a free pool.
|
||||
*/
|
||||
bp = allocate_from_new_pool(size);
|
||||
bp = allocate_from_new_pool(state, size);
|
||||
}
|
||||
|
||||
return (void *)bp;
|
||||
@@ -1324,7 +1435,8 @@ pymalloc_alloc(void *Py_UNUSED(ctx), size_t nbytes)
|
||||
void *
|
||||
_PyObject_Malloc(void *ctx, size_t nbytes)
|
||||
{
|
||||
void* ptr = pymalloc_alloc(ctx, nbytes);
|
||||
OMState *state = get_state();
|
||||
void* ptr = pymalloc_alloc(state, ctx, nbytes);
|
||||
if (LIKELY(ptr != NULL)) {
|
||||
return ptr;
|
||||
}
|
||||
@@ -1343,7 +1455,8 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
|
||||
assert(elsize == 0 || nelem <= (size_t)PY_SSIZE_T_MAX / elsize);
|
||||
size_t nbytes = nelem * elsize;
|
||||
|
||||
void* ptr = pymalloc_alloc(ctx, nbytes);
|
||||
OMState *state = get_state();
|
||||
void* ptr = pymalloc_alloc(state, ctx, nbytes);
|
||||
if (LIKELY(ptr != NULL)) {
|
||||
memset(ptr, 0, nbytes);
|
||||
return ptr;
|
||||
@@ -1358,7 +1471,7 @@ _PyObject_Calloc(void *ctx, size_t nelem, size_t elsize)
|
||||
|
||||
|
||||
static void
|
||||
insert_to_usedpool(poolp pool)
|
||||
insert_to_usedpool(OMState *state, poolp pool)
|
||||
{
|
||||
assert(pool->ref.count > 0); /* else the pool is empty */
|
||||
|
||||
@@ -1374,7 +1487,7 @@ insert_to_usedpool(poolp pool)
|
||||
}
|
||||
|
||||
static void
|
||||
insert_to_freepool(poolp pool)
|
||||
insert_to_freepool(OMState *state, poolp pool)
|
||||
{
|
||||
poolp next = pool->nextpool;
|
||||
poolp prev = pool->prevpool;
|
||||
@@ -1457,7 +1570,7 @@ insert_to_freepool(poolp pool)
|
||||
|
||||
#if WITH_PYMALLOC_RADIX_TREE
|
||||
/* mark arena region as not under control of obmalloc */
|
||||
arena_map_mark_used(ao->address, 0);
|
||||
arena_map_mark_used(state, ao->address, 0);
|
||||
#endif
|
||||
|
||||
/* Free the entire arena. */
|
||||
@@ -1544,7 +1657,7 @@ insert_to_freepool(poolp pool)
|
||||
Return 1 if it was freed.
|
||||
Return 0 if the block was not allocated by pymalloc_alloc(). */
|
||||
static inline int
|
||||
pymalloc_free(void *Py_UNUSED(ctx), void *p)
|
||||
pymalloc_free(OMState *state, void *Py_UNUSED(ctx), void *p)
|
||||
{
|
||||
assert(p != NULL);
|
||||
|
||||
@@ -1555,7 +1668,7 @@ pymalloc_free(void *Py_UNUSED(ctx), void *p)
|
||||
#endif
|
||||
|
||||
poolp pool = POOL_ADDR(p);
|
||||
if (UNLIKELY(!address_in_range(p, pool))) {
|
||||
if (UNLIKELY(!address_in_range(state, p, pool))) {
|
||||
return 0;
|
||||
}
|
||||
/* We allocated this address. */
|
||||
@@ -1579,7 +1692,7 @@ pymalloc_free(void *Py_UNUSED(ctx), void *p)
|
||||
* targets optimal filling when several pools contain
|
||||
* blocks of the same size class.
|
||||
*/
|
||||
insert_to_usedpool(pool);
|
||||
insert_to_usedpool(state, pool);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -1596,7 +1709,7 @@ pymalloc_free(void *Py_UNUSED(ctx), void *p)
|
||||
* previously freed pools will be allocated later
|
||||
* (being not referenced, they are perhaps paged out).
|
||||
*/
|
||||
insert_to_freepool(pool);
|
||||
insert_to_freepool(state, pool);
|
||||
return 1;
|
||||
}
|
||||
|
||||
@@ -1609,7 +1722,8 @@ _PyObject_Free(void *ctx, void *p)
|
||||
return;
|
||||
}
|
||||
|
||||
if (UNLIKELY(!pymalloc_free(ctx, p))) {
|
||||
OMState *state = get_state();
|
||||
if (UNLIKELY(!pymalloc_free(state, ctx, p))) {
|
||||
/* pymalloc didn't allocate this address */
|
||||
PyMem_RawFree(p);
|
||||
raw_allocated_blocks--;
|
||||
@@ -1627,7 +1741,8 @@ _PyObject_Free(void *ctx, void *p)
|
||||
|
||||
Return 0 if pymalloc didn't allocated p. */
|
||||
static int
|
||||
pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
|
||||
pymalloc_realloc(OMState *state, void *ctx,
|
||||
void **newptr_p, void *p, size_t nbytes)
|
||||
{
|
||||
void *bp;
|
||||
poolp pool;
|
||||
@@ -1643,7 +1758,7 @@ pymalloc_realloc(void *ctx, void **newptr_p, void *p, size_t nbytes)
|
||||
#endif
|
||||
|
||||
pool = POOL_ADDR(p);
|
||||
if (!address_in_range(p, pool)) {
|
||||
if (!address_in_range(state, p, pool)) {
|
||||
/* pymalloc is not managing this block.
|
||||
|
||||
If nbytes <= SMALL_REQUEST_THRESHOLD, it's tempting to try to take
|
||||
@@ -1696,7 +1811,8 @@ _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
|
||||
return _PyObject_Malloc(ctx, nbytes);
|
||||
}
|
||||
|
||||
if (pymalloc_realloc(ctx, &ptr2, ptr, nbytes)) {
|
||||
OMState *state = get_state();
|
||||
if (pymalloc_realloc(state, ctx, &ptr2, ptr, nbytes)) {
|
||||
return ptr2;
|
||||
}
|
||||
|
||||
@@ -1710,11 +1826,29 @@ _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes)
|
||||
* only be used by extensions that are compiled with pymalloc enabled. */
|
||||
|
||||
Py_ssize_t
|
||||
_Py_GetAllocatedBlocks(void)
|
||||
_PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
Py_ssize_t
|
||||
_Py_GetGlobalAllocatedBlocks(void)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
_PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *Py_UNUSED(interp))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
_Py_FinalizeAllocatedBlocks(_PyRuntimeState *Py_UNUSED(runtime))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
#endif /* WITH_PYMALLOC */
|
||||
|
||||
|
||||
@@ -2289,6 +2423,7 @@ _PyObject_DebugMallocStats(FILE *out)
|
||||
if (!_PyMem_PymallocEnabled()) {
|
||||
return 0;
|
||||
}
|
||||
OMState *state = get_state();
|
||||
|
||||
uint i;
|
||||
const uint numclasses = SMALL_REQUEST_THRESHOLD >> ALIGNMENT_SHIFT;
|
||||
|
||||
Reference in New Issue
Block a user