gh-131253: free-threaded build support for pystats (gh-137189)

Allow the --enable-pystats build option to be used with free-threading.  The
stats are now stored on a per-interpreter basis, rather than process global.
For free-threaded builds, the stats structure is allocated per-thread and
then periodically merged into the per-interpreter stats structure (on thread
exit or when the reporting function is called). Most of the pystats related
code has be moved into the file Python/pystats.c.
This commit is contained in:
Neil Schemenauer
2025-11-03 11:36:37 -08:00
committed by GitHub
parent cf1a2c1ee4
commit c98c5b3449
24 changed files with 1269 additions and 485 deletions

View File

@@ -217,6 +217,15 @@ struct _ts {
*/
PyObject *threading_local_sentinel;
_PyRemoteDebuggerSupport remote_debugger_support;
#ifdef Py_STATS
// Pointer to PyStats structure, NULL if recording is off. For the
// free-threaded build, the structure is per-thread (stored as a pointer
// in _PyThreadStateImpl). For the default build, the structure is stored
// in the PyInterpreterState structure (threads do not have their own
// structure and all share the same per-interpreter structure).
PyStats *pystats;
#endif
};
/* other API */
@@ -239,6 +248,21 @@ PyAPI_FUNC(void) PyThreadState_EnterTracing(PyThreadState *tstate);
// function is set, otherwise disable them.
PyAPI_FUNC(void) PyThreadState_LeaveTracing(PyThreadState *tstate);
#ifdef Py_STATS
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
extern _Py_thread_local PyThreadState *_Py_tss_tstate;
static inline PyStats*
_PyThreadState_GetStatsFast(void)
{
if (_Py_tss_tstate == NULL) {
return NULL; // no attached thread state
}
return _Py_tss_tstate->pystats;
}
#endif
#endif // Py_STATS
/* PyGILState */
/* Helper/diagnostic function - return 1 if the current thread

View File

@@ -4,7 +4,7 @@
//
// - _Py_INCREF_STAT_INC() and _Py_DECREF_STAT_INC() used by Py_INCREF()
// and Py_DECREF().
// - _Py_stats variable
// - _PyStats_GET()
//
// Functions of the sys module:
//
@@ -14,7 +14,7 @@
// - sys._stats_dump()
//
// Python must be built with ./configure --enable-pystats to define the
// Py_STATS macro.
// _PyStats_GET() macro.
//
// Define _PY_INTERPRETER macro to increment interpreter_increfs and
// interpreter_decrefs. Otherwise, increment increfs and decrefs.
@@ -109,6 +109,18 @@ typedef struct _gc_stats {
uint64_t objects_not_transitively_reachable;
} GCStats;
#ifdef Py_GIL_DISABLED
// stats specific to free-threaded build
typedef struct _ft_stats {
// number of times interpreter had to spin or park when trying to acquire a mutex
uint64_t mutex_sleeps;
// number of times that the QSBR mechanism polled (compute read sequence value)
uint64_t qsbr_polls;
// number of times stop-the-world mechanism was used
uint64_t world_stops;
} FTStats;
#endif
typedef struct _uop_stats {
uint64_t execution_count;
uint64_t miss;
@@ -173,22 +185,48 @@ typedef struct _stats {
CallStats call_stats;
ObjectStats object_stats;
OptimizationStats optimization_stats;
#ifdef Py_GIL_DISABLED
FTStats ft_stats;
#endif
RareEventStats rare_event_stats;
GCStats *gc_stats;
GCStats gc_stats[3]; // must match NUM_GENERATIONS
} PyStats;
// Export for most shared extensions
PyAPI_FUNC(PyStats *) _PyStats_GetLocal(void);
// Export for shared extensions like 'math'
PyAPI_DATA(PyStats*) _Py_stats;
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
// use inline function version defined in cpython/pystate.h
static inline PyStats *_PyThreadState_GetStatsFast(void);
#define _PyStats_GET _PyThreadState_GetStatsFast
#else
#define _PyStats_GET _PyStats_GetLocal
#endif
#define _Py_STATS_EXPR(expr) \
do { \
PyStats *s = _PyStats_GET(); \
if (s != NULL) { \
s->expr; \
} \
} while (0)
#define _Py_STATS_COND_EXPR(cond, expr) \
do { \
PyStats *s = _PyStats_GET(); \
if (s != NULL && (cond)) { \
s->expr; \
} \
} while (0)
#ifdef _PY_INTERPRETER
# define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_increfs++; } while (0)
# define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_decrefs++; } while (0)
# define _Py_INCREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_immortal_increfs++; } while (0)
# define _Py_DECREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_immortal_decrefs++; } while (0)
# define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_increfs++)
# define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_decrefs++)
# define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_increfs++)
# define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_decrefs++)
#else
# define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.increfs++; } while (0)
# define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.decrefs++; } while (0)
# define _Py_INCREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.immortal_increfs++; } while (0)
# define _Py_DECREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.immortal_decrefs++; } while (0)
# define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.increfs++)
# define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.decrefs++)
# define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_increfs++)
# define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_decrefs++)
#endif

View File

@@ -199,7 +199,7 @@ enum _GCPhase {
};
/* If we change this, we need to change the default value in the
signature of gc.collect. */
signature of gc.collect and change the size of PyStats.gc_stats */
#define NUM_GENERATIONS 3
struct _gc_runtime_state {
@@ -963,6 +963,18 @@ struct _is {
# ifdef Py_STACKREF_CLOSE_DEBUG
_Py_hashtable_t *closed_stackrefs_table;
# endif
#endif
#ifdef Py_STATS
// true if recording of pystats is on, this is used when new threads
// are created to decide if recording should be on for them
int pystats_enabled;
// allocated when (and if) stats are first enabled
PyStats *pystats_struct;
#ifdef Py_GIL_DISABLED
// held when pystats related interpreter state is being updated
PyMutex pystats_mutex;
#endif
#endif
/* the initial PyInterpreterState.threads.head */

View File

@@ -9,7 +9,7 @@ extern "C" {
#endif
#ifdef Py_STATS
extern void _Py_StatsOn(void);
extern int _Py_StatsOn(void);
extern void _Py_StatsOff(void);
extern void _Py_StatsClear(void);
extern int _Py_PrintSpecializationStats(int to_file);

View File

@@ -15,39 +15,56 @@ extern "C" {
#include "pycore_bitutils.h" // _Py_bit_length
#define STAT_INC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name++; } while (0)
#define STAT_DEC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name--; } while (0)
#define OPCODE_EXE_INC(opname) do { if (_Py_stats) _Py_stats->opcode_stats[opname].execution_count++; } while (0)
#define CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.name++; } while (0)
#define OBJECT_STAT_INC(name) do { if (_Py_stats) _Py_stats->object_stats.name++; } while (0)
#define OBJECT_STAT_INC_COND(name, cond) \
do { if (_Py_stats && cond) _Py_stats->object_stats.name++; } while (0)
#define EVAL_CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.eval_calls[name]++; } while (0)
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
#define OPT_STAT_ADD(name, n) do { if (_Py_stats) _Py_stats->optimization_stats.name += (n); } while (0)
#define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0)
#define UOP_PAIR_INC(uopcode, lastuop) \
do { \
if (lastuop && _Py_stats) { \
_Py_stats->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \
} \
lastuop = uopcode; \
} while (0)
#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0)
#define OPT_ERROR_IN_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.error_in_opcode[opname]++; } while (0)
#define OPT_HIST(length, name) \
#define STAT_INC(opname, name) _Py_STATS_EXPR(opcode_stats[opname].specialization.name++)
#define STAT_DEC(opname, name) _Py_STATS_EXPR(opcode_stats[opname].specialization.name--)
#define OPCODE_EXE_INC(opname) _Py_STATS_EXPR(opcode_stats[opname].execution_count++)
#define CALL_STAT_INC(name) _Py_STATS_EXPR(call_stats.name++)
#define OBJECT_STAT_INC(name) _Py_STATS_EXPR(object_stats.name++)
#define OBJECT_STAT_INC_COND(name, cond) _Py_STATS_COND_EXPR(cond, object_stats.name++)
#define EVAL_CALL_STAT_INC(name) _Py_STATS_EXPR(call_stats.eval_calls[name]++)
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) _Py_STATS_COND_EXPR(PyFunction_Check(callable), call_stats.eval_calls[name]++)
#define GC_STAT_ADD(gen, name, n) _Py_STATS_EXPR(gc_stats[(gen)].name += (n))
#define OPT_STAT_INC(name) _Py_STATS_EXPR(optimization_stats.name++)
#define OPT_STAT_ADD(name, n) _Py_STATS_EXPR(optimization_stats.name += (n))
#define UOP_STAT_INC(opname, name) \
do { \
if (_Py_stats) { \
int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
_Py_stats->optimization_stats.name[bucket]++; \
PyStats *s = _PyStats_GET(); \
if (s) { \
assert(opname < 512); \
s->optimization_stats.opcode[opname].name++; \
} \
} while (0)
#define RARE_EVENT_STAT_INC(name) do { if (_Py_stats) _Py_stats->rare_event_stats.name++; } while (0)
#define OPCODE_DEFERRED_INC(opname) do { if (_Py_stats && opcode == opname) _Py_stats->opcode_stats[opname].specialization.deferred++; } while (0)
#define UOP_PAIR_INC(uopcode, lastuop) \
do { \
PyStats *s = _PyStats_GET(); \
if (lastuop && s) { \
s->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \
} \
lastuop = uopcode; \
} while (0)
#define OPT_UNSUPPORTED_OPCODE(opname) _Py_STATS_EXPR(optimization_stats.unsupported_opcode[opname]++)
#define OPT_ERROR_IN_OPCODE(opname) _Py_STATS_EXPR(optimization_stats.error_in_opcode[opname]++)
#define OPT_HIST(length, name) \
do { \
PyStats *s = _PyStats_GET(); \
if (s) { \
int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
s->optimization_stats.name[bucket]++; \
} \
} while (0)
#define RARE_EVENT_STAT_INC(name) _Py_STATS_EXPR(rare_event_stats.name++)
#define OPCODE_DEFERRED_INC(opname) _Py_STATS_COND_EXPR(opcode==opname, opcode_stats[opname].specialization.deferred++)
#ifdef Py_GIL_DISABLED
#define FT_STAT_MUTEX_SLEEP_INC() _Py_STATS_EXPR(ft_stats.mutex_sleeps++)
#define FT_STAT_QSBR_POLL_INC() _Py_STATS_EXPR(ft_stats.qsbr_polls++)
#define FT_STAT_WORLD_STOP_INC() _Py_STATS_EXPR(ft_stats.world_stops++)
#else
#define FT_STAT_MUTEX_SLEEP_INC()
#define FT_STAT_QSBR_POLL_INC()
#define FT_STAT_WORLD_STOP_INC()
#endif
// Export for '_opcode' shared extension
PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
@@ -71,6 +88,9 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
#define OPT_HIST(length, name) ((void)0)
#define RARE_EVENT_STAT_INC(name) ((void)0)
#define OPCODE_DEFERRED_INC(opname) ((void)0)
#define FT_STAT_MUTEX_SLEEP_INC()
#define FT_STAT_QSBR_POLL_INC()
#define FT_STAT_WORLD_STOP_INC()
#endif // !Py_STATS
@@ -90,6 +110,11 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
RARE_EVENT_INTERP_INC(interp, name); \
} while (0); \
PyStatus _PyStats_InterpInit(PyInterpreterState *);
bool _PyStats_ThreadInit(PyInterpreterState *, _PyThreadStateImpl *);
void _PyStats_ThreadFini(_PyThreadStateImpl *);
void _PyStats_Attach(_PyThreadStateImpl *);
void _PyStats_Detach(_PyThreadStateImpl *);
#ifdef __cplusplus
}

View File

@@ -70,8 +70,14 @@ typedef struct _PyThreadStateImpl {
// When >1, code objects do not immortalize their non-string constants.
int suppress_co_const_immortalization;
#ifdef Py_STATS
// per-thread stats, will be merged into interp->pystats_struct
PyStats *pystats_struct; // allocated by _PyStats_ThreadInit()
#endif
#endif // Py_GIL_DISABLED
#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
Py_ssize_t reftotal; // this thread's total refcount operations
#endif