gh-131253: free-threaded build support for pystats (gh-137189)
Allow the --enable-pystats build option to be used with free-threading. The stats are now stored on a per-interpreter basis, rather than process global. For free-threaded builds, the stats structure is allocated per-thread and then periodically merged into the per-interpreter stats structure (on thread exit or when the reporting function is called). Most of the pystats related code has be moved into the file Python/pystats.c.
This commit is contained in:
@@ -217,6 +217,15 @@ struct _ts {
|
||||
*/
|
||||
PyObject *threading_local_sentinel;
|
||||
_PyRemoteDebuggerSupport remote_debugger_support;
|
||||
|
||||
#ifdef Py_STATS
|
||||
// Pointer to PyStats structure, NULL if recording is off. For the
|
||||
// free-threaded build, the structure is per-thread (stored as a pointer
|
||||
// in _PyThreadStateImpl). For the default build, the structure is stored
|
||||
// in the PyInterpreterState structure (threads do not have their own
|
||||
// structure and all share the same per-interpreter structure).
|
||||
PyStats *pystats;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* other API */
|
||||
@@ -239,6 +248,21 @@ PyAPI_FUNC(void) PyThreadState_EnterTracing(PyThreadState *tstate);
|
||||
// function is set, otherwise disable them.
|
||||
PyAPI_FUNC(void) PyThreadState_LeaveTracing(PyThreadState *tstate);
|
||||
|
||||
#ifdef Py_STATS
|
||||
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
|
||||
extern _Py_thread_local PyThreadState *_Py_tss_tstate;
|
||||
|
||||
static inline PyStats*
|
||||
_PyThreadState_GetStatsFast(void)
|
||||
{
|
||||
if (_Py_tss_tstate == NULL) {
|
||||
return NULL; // no attached thread state
|
||||
}
|
||||
return _Py_tss_tstate->pystats;
|
||||
}
|
||||
#endif
|
||||
#endif // Py_STATS
|
||||
|
||||
/* PyGILState */
|
||||
|
||||
/* Helper/diagnostic function - return 1 if the current thread
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
//
|
||||
// - _Py_INCREF_STAT_INC() and _Py_DECREF_STAT_INC() used by Py_INCREF()
|
||||
// and Py_DECREF().
|
||||
// - _Py_stats variable
|
||||
// - _PyStats_GET()
|
||||
//
|
||||
// Functions of the sys module:
|
||||
//
|
||||
@@ -14,7 +14,7 @@
|
||||
// - sys._stats_dump()
|
||||
//
|
||||
// Python must be built with ./configure --enable-pystats to define the
|
||||
// Py_STATS macro.
|
||||
// _PyStats_GET() macro.
|
||||
//
|
||||
// Define _PY_INTERPRETER macro to increment interpreter_increfs and
|
||||
// interpreter_decrefs. Otherwise, increment increfs and decrefs.
|
||||
@@ -109,6 +109,18 @@ typedef struct _gc_stats {
|
||||
uint64_t objects_not_transitively_reachable;
|
||||
} GCStats;
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// stats specific to free-threaded build
|
||||
typedef struct _ft_stats {
|
||||
// number of times interpreter had to spin or park when trying to acquire a mutex
|
||||
uint64_t mutex_sleeps;
|
||||
// number of times that the QSBR mechanism polled (compute read sequence value)
|
||||
uint64_t qsbr_polls;
|
||||
// number of times stop-the-world mechanism was used
|
||||
uint64_t world_stops;
|
||||
} FTStats;
|
||||
#endif
|
||||
|
||||
typedef struct _uop_stats {
|
||||
uint64_t execution_count;
|
||||
uint64_t miss;
|
||||
@@ -173,22 +185,48 @@ typedef struct _stats {
|
||||
CallStats call_stats;
|
||||
ObjectStats object_stats;
|
||||
OptimizationStats optimization_stats;
|
||||
#ifdef Py_GIL_DISABLED
|
||||
FTStats ft_stats;
|
||||
#endif
|
||||
RareEventStats rare_event_stats;
|
||||
GCStats *gc_stats;
|
||||
GCStats gc_stats[3]; // must match NUM_GENERATIONS
|
||||
} PyStats;
|
||||
|
||||
// Export for most shared extensions
|
||||
PyAPI_FUNC(PyStats *) _PyStats_GetLocal(void);
|
||||
|
||||
// Export for shared extensions like 'math'
|
||||
PyAPI_DATA(PyStats*) _Py_stats;
|
||||
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
|
||||
// use inline function version defined in cpython/pystate.h
|
||||
static inline PyStats *_PyThreadState_GetStatsFast(void);
|
||||
#define _PyStats_GET _PyThreadState_GetStatsFast
|
||||
#else
|
||||
#define _PyStats_GET _PyStats_GetLocal
|
||||
#endif
|
||||
|
||||
#define _Py_STATS_EXPR(expr) \
|
||||
do { \
|
||||
PyStats *s = _PyStats_GET(); \
|
||||
if (s != NULL) { \
|
||||
s->expr; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define _Py_STATS_COND_EXPR(cond, expr) \
|
||||
do { \
|
||||
PyStats *s = _PyStats_GET(); \
|
||||
if (s != NULL && (cond)) { \
|
||||
s->expr; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#ifdef _PY_INTERPRETER
|
||||
# define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_increfs++; } while (0)
|
||||
# define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_decrefs++; } while (0)
|
||||
# define _Py_INCREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_immortal_increfs++; } while (0)
|
||||
# define _Py_DECREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_immortal_decrefs++; } while (0)
|
||||
# define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_increfs++)
|
||||
# define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_decrefs++)
|
||||
# define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_increfs++)
|
||||
# define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_decrefs++)
|
||||
#else
|
||||
# define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.increfs++; } while (0)
|
||||
# define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.decrefs++; } while (0)
|
||||
# define _Py_INCREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.immortal_increfs++; } while (0)
|
||||
# define _Py_DECREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.immortal_decrefs++; } while (0)
|
||||
# define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.increfs++)
|
||||
# define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.decrefs++)
|
||||
# define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_increfs++)
|
||||
# define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_decrefs++)
|
||||
#endif
|
||||
|
||||
@@ -199,7 +199,7 @@ enum _GCPhase {
|
||||
};
|
||||
|
||||
/* If we change this, we need to change the default value in the
|
||||
signature of gc.collect. */
|
||||
signature of gc.collect and change the size of PyStats.gc_stats */
|
||||
#define NUM_GENERATIONS 3
|
||||
|
||||
struct _gc_runtime_state {
|
||||
@@ -963,6 +963,18 @@ struct _is {
|
||||
# ifdef Py_STACKREF_CLOSE_DEBUG
|
||||
_Py_hashtable_t *closed_stackrefs_table;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef Py_STATS
|
||||
// true if recording of pystats is on, this is used when new threads
|
||||
// are created to decide if recording should be on for them
|
||||
int pystats_enabled;
|
||||
// allocated when (and if) stats are first enabled
|
||||
PyStats *pystats_struct;
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// held when pystats related interpreter state is being updated
|
||||
PyMutex pystats_mutex;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* the initial PyInterpreterState.threads.head */
|
||||
|
||||
@@ -9,7 +9,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef Py_STATS
|
||||
extern void _Py_StatsOn(void);
|
||||
extern int _Py_StatsOn(void);
|
||||
extern void _Py_StatsOff(void);
|
||||
extern void _Py_StatsClear(void);
|
||||
extern int _Py_PrintSpecializationStats(int to_file);
|
||||
|
||||
@@ -15,39 +15,56 @@ extern "C" {
|
||||
|
||||
#include "pycore_bitutils.h" // _Py_bit_length
|
||||
|
||||
#define STAT_INC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name++; } while (0)
|
||||
#define STAT_DEC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name--; } while (0)
|
||||
#define OPCODE_EXE_INC(opname) do { if (_Py_stats) _Py_stats->opcode_stats[opname].execution_count++; } while (0)
|
||||
#define CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.name++; } while (0)
|
||||
#define OBJECT_STAT_INC(name) do { if (_Py_stats) _Py_stats->object_stats.name++; } while (0)
|
||||
#define OBJECT_STAT_INC_COND(name, cond) \
|
||||
do { if (_Py_stats && cond) _Py_stats->object_stats.name++; } while (0)
|
||||
#define EVAL_CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.eval_calls[name]++; } while (0)
|
||||
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
|
||||
do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
|
||||
#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
|
||||
#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
|
||||
#define OPT_STAT_ADD(name, n) do { if (_Py_stats) _Py_stats->optimization_stats.name += (n); } while (0)
|
||||
#define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0)
|
||||
#define UOP_PAIR_INC(uopcode, lastuop) \
|
||||
do { \
|
||||
if (lastuop && _Py_stats) { \
|
||||
_Py_stats->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \
|
||||
} \
|
||||
lastuop = uopcode; \
|
||||
} while (0)
|
||||
#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0)
|
||||
#define OPT_ERROR_IN_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.error_in_opcode[opname]++; } while (0)
|
||||
#define OPT_HIST(length, name) \
|
||||
#define STAT_INC(opname, name) _Py_STATS_EXPR(opcode_stats[opname].specialization.name++)
|
||||
#define STAT_DEC(opname, name) _Py_STATS_EXPR(opcode_stats[opname].specialization.name--)
|
||||
#define OPCODE_EXE_INC(opname) _Py_STATS_EXPR(opcode_stats[opname].execution_count++)
|
||||
#define CALL_STAT_INC(name) _Py_STATS_EXPR(call_stats.name++)
|
||||
#define OBJECT_STAT_INC(name) _Py_STATS_EXPR(object_stats.name++)
|
||||
#define OBJECT_STAT_INC_COND(name, cond) _Py_STATS_COND_EXPR(cond, object_stats.name++)
|
||||
#define EVAL_CALL_STAT_INC(name) _Py_STATS_EXPR(call_stats.eval_calls[name]++)
|
||||
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) _Py_STATS_COND_EXPR(PyFunction_Check(callable), call_stats.eval_calls[name]++)
|
||||
#define GC_STAT_ADD(gen, name, n) _Py_STATS_EXPR(gc_stats[(gen)].name += (n))
|
||||
#define OPT_STAT_INC(name) _Py_STATS_EXPR(optimization_stats.name++)
|
||||
#define OPT_STAT_ADD(name, n) _Py_STATS_EXPR(optimization_stats.name += (n))
|
||||
#define UOP_STAT_INC(opname, name) \
|
||||
do { \
|
||||
if (_Py_stats) { \
|
||||
int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
|
||||
bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
|
||||
_Py_stats->optimization_stats.name[bucket]++; \
|
||||
PyStats *s = _PyStats_GET(); \
|
||||
if (s) { \
|
||||
assert(opname < 512); \
|
||||
s->optimization_stats.opcode[opname].name++; \
|
||||
} \
|
||||
} while (0)
|
||||
#define RARE_EVENT_STAT_INC(name) do { if (_Py_stats) _Py_stats->rare_event_stats.name++; } while (0)
|
||||
#define OPCODE_DEFERRED_INC(opname) do { if (_Py_stats && opcode == opname) _Py_stats->opcode_stats[opname].specialization.deferred++; } while (0)
|
||||
#define UOP_PAIR_INC(uopcode, lastuop) \
|
||||
do { \
|
||||
PyStats *s = _PyStats_GET(); \
|
||||
if (lastuop && s) { \
|
||||
s->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \
|
||||
} \
|
||||
lastuop = uopcode; \
|
||||
} while (0)
|
||||
#define OPT_UNSUPPORTED_OPCODE(opname) _Py_STATS_EXPR(optimization_stats.unsupported_opcode[opname]++)
|
||||
#define OPT_ERROR_IN_OPCODE(opname) _Py_STATS_EXPR(optimization_stats.error_in_opcode[opname]++)
|
||||
#define OPT_HIST(length, name) \
|
||||
do { \
|
||||
PyStats *s = _PyStats_GET(); \
|
||||
if (s) { \
|
||||
int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
|
||||
bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
|
||||
s->optimization_stats.name[bucket]++; \
|
||||
} \
|
||||
} while (0)
|
||||
#define RARE_EVENT_STAT_INC(name) _Py_STATS_EXPR(rare_event_stats.name++)
|
||||
#define OPCODE_DEFERRED_INC(opname) _Py_STATS_COND_EXPR(opcode==opname, opcode_stats[opname].specialization.deferred++)
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
#define FT_STAT_MUTEX_SLEEP_INC() _Py_STATS_EXPR(ft_stats.mutex_sleeps++)
|
||||
#define FT_STAT_QSBR_POLL_INC() _Py_STATS_EXPR(ft_stats.qsbr_polls++)
|
||||
#define FT_STAT_WORLD_STOP_INC() _Py_STATS_EXPR(ft_stats.world_stops++)
|
||||
#else
|
||||
#define FT_STAT_MUTEX_SLEEP_INC()
|
||||
#define FT_STAT_QSBR_POLL_INC()
|
||||
#define FT_STAT_WORLD_STOP_INC()
|
||||
#endif
|
||||
|
||||
// Export for '_opcode' shared extension
|
||||
PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
|
||||
@@ -71,6 +88,9 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
|
||||
#define OPT_HIST(length, name) ((void)0)
|
||||
#define RARE_EVENT_STAT_INC(name) ((void)0)
|
||||
#define OPCODE_DEFERRED_INC(opname) ((void)0)
|
||||
#define FT_STAT_MUTEX_SLEEP_INC()
|
||||
#define FT_STAT_QSBR_POLL_INC()
|
||||
#define FT_STAT_WORLD_STOP_INC()
|
||||
#endif // !Py_STATS
|
||||
|
||||
|
||||
@@ -90,6 +110,11 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
|
||||
RARE_EVENT_INTERP_INC(interp, name); \
|
||||
} while (0); \
|
||||
|
||||
PyStatus _PyStats_InterpInit(PyInterpreterState *);
|
||||
bool _PyStats_ThreadInit(PyInterpreterState *, _PyThreadStateImpl *);
|
||||
void _PyStats_ThreadFini(_PyThreadStateImpl *);
|
||||
void _PyStats_Attach(_PyThreadStateImpl *);
|
||||
void _PyStats_Detach(_PyThreadStateImpl *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -70,8 +70,14 @@ typedef struct _PyThreadStateImpl {
|
||||
|
||||
// When >1, code objects do not immortalize their non-string constants.
|
||||
int suppress_co_const_immortalization;
|
||||
|
||||
#ifdef Py_STATS
|
||||
// per-thread stats, will be merged into interp->pystats_struct
|
||||
PyStats *pystats_struct; // allocated by _PyStats_ThreadInit()
|
||||
#endif
|
||||
|
||||
#endif // Py_GIL_DISABLED
|
||||
|
||||
#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
|
||||
Py_ssize_t reftotal; // this thread's total refcount operations
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user