Files
cpython/Python/pystats.c
Neil Schemenauer c98c5b3449 gh-131253: free-threaded build support for pystats (gh-137189)
Allow the --enable-pystats build option to be used with free-threading.  The
stats are now stored on a per-interpreter basis, rather than process global.
For free-threaded builds, the stats structure is allocated per-thread and
then periodically merged into the per-interpreter stats structure (on thread
exit or when the reporting function is called). Most of the pystats related
code has be moved into the file Python/pystats.c.
2025-11-03 11:36:37 -08:00

820 lines
29 KiB
C

#include "Python.h"
#include "pycore_opcode_metadata.h" // _PyOpcode_Caches
#include "pycore_pyatomic_ft_wrappers.h"
#include "pycore_pylifecycle.h" // _PyOS_URandomNonblock()
#include "pycore_tstate.h"
#include "pycore_initconfig.h" // _PyStatus_OK()
#include "pycore_uop_metadata.h" // _PyOpcode_uop_name
#include "pycore_uop_ids.h" // MAX_UOP_ID
#include "pycore_pystate.h" // _PyThreadState_GET()
#include "pycore_runtime.h" // NUM_GENERATIONS
#include <stdlib.h> // rand()
#ifdef Py_STATS
PyStats *
_PyStats_GetLocal(void)
{
PyThreadState *tstate = _PyThreadState_GET();
if (tstate) {
return tstate->pystats;
}
return NULL;
}
#ifdef Py_GIL_DISABLED
#define STATS_LOCK(interp) PyMutex_Lock(&interp->pystats_mutex)
#define STATS_UNLOCK(interp) PyMutex_Unlock(&interp->pystats_mutex)
#else
#define STATS_LOCK(interp)
#define STATS_UNLOCK(interp)
#endif
#if PYSTATS_MAX_UOP_ID < MAX_UOP_ID
#error "Not enough space allocated for pystats. Increase PYSTATS_MAX_UOP_ID to at least MAX_UOP_ID"
#endif
#define ADD_STAT_TO_DICT(res, field) \
do { \
PyObject *val = PyLong_FromUnsignedLongLong(stats->field); \
if (val == NULL) { \
Py_DECREF(res); \
return NULL; \
} \
if (PyDict_SetItemString(res, #field, val) == -1) { \
Py_DECREF(res); \
Py_DECREF(val); \
return NULL; \
} \
Py_DECREF(val); \
} while(0);
static PyObject*
stats_to_dict(SpecializationStats *stats)
{
PyObject *res = PyDict_New();
if (res == NULL) {
return NULL;
}
ADD_STAT_TO_DICT(res, success);
ADD_STAT_TO_DICT(res, failure);
ADD_STAT_TO_DICT(res, hit);
ADD_STAT_TO_DICT(res, deferred);
ADD_STAT_TO_DICT(res, miss);
ADD_STAT_TO_DICT(res, deopt);
PyObject *failure_kinds = PyTuple_New(SPECIALIZATION_FAILURE_KINDS);
if (failure_kinds == NULL) {
Py_DECREF(res);
return NULL;
}
for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
PyObject *stat = PyLong_FromUnsignedLongLong(stats->failure_kinds[i]);
if (stat == NULL) {
Py_DECREF(res);
Py_DECREF(failure_kinds);
return NULL;
}
PyTuple_SET_ITEM(failure_kinds, i, stat);
}
if (PyDict_SetItemString(res, "failure_kinds", failure_kinds)) {
Py_DECREF(res);
Py_DECREF(failure_kinds);
return NULL;
}
Py_DECREF(failure_kinds);
return res;
}
#undef ADD_STAT_TO_DICT
static int
add_stat_dict(
PyStats *src,
PyObject *res,
int opcode,
const char *name) {
SpecializationStats *stats = &src->opcode_stats[opcode].specialization;
PyObject *d = stats_to_dict(stats);
if (d == NULL) {
return -1;
}
int err = PyDict_SetItemString(res, name, d);
Py_DECREF(d);
return err;
}
PyObject*
_Py_GetSpecializationStats(void) {
PyThreadState *tstate = _PyThreadState_GET();
PyStats *src = FT_ATOMIC_LOAD_PTR_RELAXED(tstate->interp->pystats_struct);
if (src == NULL) {
Py_RETURN_NONE;
}
PyObject *stats = PyDict_New();
if (stats == NULL) {
return NULL;
}
int err = 0;
err += add_stat_dict(src, stats, CONTAINS_OP, "contains_op");
err += add_stat_dict(src, stats, LOAD_SUPER_ATTR, "load_super_attr");
err += add_stat_dict(src, stats, LOAD_ATTR, "load_attr");
err += add_stat_dict(src, stats, LOAD_GLOBAL, "load_global");
err += add_stat_dict(src, stats, STORE_SUBSCR, "store_subscr");
err += add_stat_dict(src, stats, STORE_ATTR, "store_attr");
err += add_stat_dict(src, stats, JUMP_BACKWARD, "jump_backward");
err += add_stat_dict(src, stats, CALL, "call");
err += add_stat_dict(src, stats, CALL_KW, "call_kw");
err += add_stat_dict(src, stats, BINARY_OP, "binary_op");
err += add_stat_dict(src, stats, COMPARE_OP, "compare_op");
err += add_stat_dict(src, stats, UNPACK_SEQUENCE, "unpack_sequence");
err += add_stat_dict(src, stats, FOR_ITER, "for_iter");
err += add_stat_dict(src, stats, TO_BOOL, "to_bool");
err += add_stat_dict(src, stats, SEND, "send");
if (err < 0) {
Py_DECREF(stats);
return NULL;
}
return stats;
}
#define PRINT_STAT(i, field) \
if (stats[i].field) { \
fprintf(out, " opcode[%s]." #field " : %" PRIu64 "\n", _PyOpcode_OpName[i], stats[i].field); \
}
static void
print_spec_stats(FILE *out, OpcodeStats *stats)
{
/* Mark some opcodes as specializable for stats,
* even though we don't specialize them yet. */
fprintf(out, "opcode[BINARY_SLICE].specializable : 1\n");
fprintf(out, "opcode[STORE_SLICE].specializable : 1\n");
fprintf(out, "opcode[GET_ITER].specializable : 1\n");
for (int i = 0; i < 256; i++) {
if (_PyOpcode_Caches[i]) {
/* Ignore jumps as they cannot be specialized */
switch (i) {
case POP_JUMP_IF_FALSE:
case POP_JUMP_IF_TRUE:
case POP_JUMP_IF_NONE:
case POP_JUMP_IF_NOT_NONE:
case JUMP_BACKWARD:
break;
default:
fprintf(out, "opcode[%s].specializable : 1\n", _PyOpcode_OpName[i]);
}
}
PRINT_STAT(i, specialization.success);
PRINT_STAT(i, specialization.failure);
PRINT_STAT(i, specialization.hit);
PRINT_STAT(i, specialization.deferred);
PRINT_STAT(i, specialization.miss);
PRINT_STAT(i, specialization.deopt);
PRINT_STAT(i, execution_count);
for (int j = 0; j < SPECIALIZATION_FAILURE_KINDS; j++) {
uint64_t val = stats[i].specialization.failure_kinds[j];
if (val) {
fprintf(out, " opcode[%s].specialization.failure_kinds[%d] : %"
PRIu64 "\n", _PyOpcode_OpName[i], j, val);
}
}
for (int j = 0; j < 256; j++) {
if (stats[i].pair_count[j]) {
fprintf(out, "opcode[%s].pair_count[%s] : %" PRIu64 "\n",
_PyOpcode_OpName[i], _PyOpcode_OpName[j], stats[i].pair_count[j]);
}
}
}
}
#undef PRINT_STAT
static void
print_call_stats(FILE *out, CallStats *stats)
{
fprintf(out, "Calls to PyEval_EvalDefault: %" PRIu64 "\n", stats->pyeval_calls);
fprintf(out, "Calls to Python functions inlined: %" PRIu64 "\n", stats->inlined_py_calls);
fprintf(out, "Frames pushed: %" PRIu64 "\n", stats->frames_pushed);
fprintf(out, "Frame objects created: %" PRIu64 "\n", stats->frame_objects_created);
for (int i = 0; i < EVAL_CALL_KINDS; i++) {
fprintf(out, "Calls via PyEval_EvalFrame[%d] : %" PRIu64 "\n", i, stats->eval_calls[i]);
}
}
static void
print_object_stats(FILE *out, ObjectStats *stats)
{
fprintf(out, "Object allocations from freelist: %" PRIu64 "\n", stats->from_freelist);
fprintf(out, "Object frees to freelist: %" PRIu64 "\n", stats->to_freelist);
fprintf(out, "Object allocations: %" PRIu64 "\n", stats->allocations);
fprintf(out, "Object allocations to 512 bytes: %" PRIu64 "\n", stats->allocations512);
fprintf(out, "Object allocations to 4 kbytes: %" PRIu64 "\n", stats->allocations4k);
fprintf(out, "Object allocations over 4 kbytes: %" PRIu64 "\n", stats->allocations_big);
fprintf(out, "Object frees: %" PRIu64 "\n", stats->frees);
fprintf(out, "Object inline values: %" PRIu64 "\n", stats->inline_values);
fprintf(out, "Object interpreter mortal increfs: %" PRIu64 "\n", stats->interpreter_increfs);
fprintf(out, "Object interpreter mortal decrefs: %" PRIu64 "\n", stats->interpreter_decrefs);
fprintf(out, "Object mortal increfs: %" PRIu64 "\n", stats->increfs);
fprintf(out, "Object mortal decrefs: %" PRIu64 "\n", stats->decrefs);
fprintf(out, "Object interpreter immortal increfs: %" PRIu64 "\n", stats->interpreter_immortal_increfs);
fprintf(out, "Object interpreter immortal decrefs: %" PRIu64 "\n", stats->interpreter_immortal_decrefs);
fprintf(out, "Object immortal increfs: %" PRIu64 "\n", stats->immortal_increfs);
fprintf(out, "Object immortal decrefs: %" PRIu64 "\n", stats->immortal_decrefs);
fprintf(out, "Object materialize dict (on request): %" PRIu64 "\n", stats->dict_materialized_on_request);
fprintf(out, "Object materialize dict (new key): %" PRIu64 "\n", stats->dict_materialized_new_key);
fprintf(out, "Object materialize dict (too big): %" PRIu64 "\n", stats->dict_materialized_too_big);
fprintf(out, "Object materialize dict (str subclass): %" PRIu64 "\n", stats->dict_materialized_str_subclass);
fprintf(out, "Object method cache hits: %" PRIu64 "\n", stats->type_cache_hits);
fprintf(out, "Object method cache misses: %" PRIu64 "\n", stats->type_cache_misses);
fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
}
static void
print_gc_stats(FILE *out, GCStats *stats)
{
for (int i = 0; i < NUM_GENERATIONS; i++) {
fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections);
fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits);
fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected);
fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable);
fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable);
}
}
#ifdef _Py_TIER2
static void
print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
{
for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]);
}
}
extern const char *_PyUOpName(int index);
static void
print_optimization_stats(FILE *out, OptimizationStats *stats)
{
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow);
fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow);
fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long);
fprintf(out, "Optimization trace too short: %" PRIu64 "\n", stats->trace_too_short);
fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);
fprintf(out, "Optimization low confidence: %" PRIu64 "\n", stats->low_confidence);
fprintf(out, "Optimization unknown callee: %" PRIu64 "\n", stats->unknown_callee);
fprintf(out, "Executors invalidated: %" PRIu64 "\n", stats->executors_invalidated);
print_histogram(out, "Trace length", stats->trace_length_hist);
print_histogram(out, "Trace run length", stats->trace_run_length_hist);
print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);
fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts);
fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes);
fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n",
stats->optimizer_failure_reason_no_memory);
fprintf(out, "Optimizer remove globals builtins changed: %" PRIu64 "\n", stats->remove_globals_builtins_changed);
fprintf(out, "Optimizer remove globals incorrect keys: %" PRIu64 "\n", stats->remove_globals_incorrect_keys);
for (int i = 0; i <= MAX_UOP_ID; i++) {
if (stats->opcode[i].execution_count) {
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count);
}
if (stats->opcode[i].miss) {
fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss);
}
}
for (int i = 0; i < 256; i++) {
if (stats->unsupported_opcode[i]) {
fprintf(
out,
"unsupported_opcode[%s].count : %" PRIu64 "\n",
_PyOpcode_OpName[i],
stats->unsupported_opcode[i]
);
}
}
for (int i = 1; i <= MAX_UOP_ID; i++){
for (int j = 1; j <= MAX_UOP_ID; j++) {
if (stats->opcode[i].pair_count[j]) {
fprintf(out, "uop[%s].pair_count[%s] : %" PRIu64 "\n",
_PyOpcode_uop_name[i], _PyOpcode_uop_name[j], stats->opcode[i].pair_count[j]);
}
}
}
for (int i = 0; i < MAX_UOP_ID; i++) {
if (stats->error_in_opcode[i]) {
fprintf(
out,
"error_in_opcode[%s].count : %" PRIu64 "\n",
_PyUOpName(i),
stats->error_in_opcode[i]
);
}
}
fprintf(out, "JIT total memory size: %" PRIu64 "\n", stats->jit_total_memory_size);
fprintf(out, "JIT code size: %" PRIu64 "\n", stats->jit_code_size);
fprintf(out, "JIT trampoline size: %" PRIu64 "\n", stats->jit_trampoline_size);
fprintf(out, "JIT data size: %" PRIu64 "\n", stats->jit_data_size);
fprintf(out, "JIT padding size: %" PRIu64 "\n", stats->jit_padding_size);
fprintf(out, "JIT freed memory size: %" PRIu64 "\n", stats->jit_freed_memory_size);
print_histogram(out, "Trace total memory size", stats->trace_total_memory_hist);
}
#endif
#ifdef Py_GIL_DISABLED
static void
print_ft_stats(FILE *out, FTStats *stats)
{
fprintf(out, "Mutex sleeps (mutex_sleeps): %" PRIu64 "\n", stats->mutex_sleeps);
fprintf(out, "QSBR polls (qsbr_polls): %" PRIu64 "\n", stats->qsbr_polls);
fprintf(out, "World stops (world_stops): %" PRIu64 "\n", stats->world_stops);
}
#endif
static void
print_rare_event_stats(FILE *out, RareEventStats *stats)
{
fprintf(out, "Rare event (set_class): %" PRIu64 "\n", stats->set_class);
fprintf(out, "Rare event (set_bases): %" PRIu64 "\n", stats->set_bases);
fprintf(out, "Rare event (set_eval_frame_func): %" PRIu64 "\n", stats->set_eval_frame_func);
fprintf(out, "Rare event (builtin_dict): %" PRIu64 "\n", stats->builtin_dict);
fprintf(out, "Rare event (func_modification): %" PRIu64 "\n", stats->func_modification);
fprintf(out, "Rare event (watched_dict_modification): %" PRIu64 "\n", stats->watched_dict_modification);
fprintf(out, "Rare event (watched_globals_modification): %" PRIu64 "\n", stats->watched_globals_modification);
}
static void
print_stats(FILE *out, PyStats *stats)
{
print_spec_stats(out, stats->opcode_stats);
print_call_stats(out, &stats->call_stats);
print_object_stats(out, &stats->object_stats);
print_gc_stats(out, stats->gc_stats);
#ifdef _Py_TIER2
print_optimization_stats(out, &stats->optimization_stats);
#endif
#ifdef Py_GIL_DISABLED
print_ft_stats(out, &stats->ft_stats);
#endif
print_rare_event_stats(out, &stats->rare_event_stats);
}
#ifdef Py_GIL_DISABLED
static void
merge_specialization_stats(SpecializationStats *dest, const SpecializationStats *src)
{
dest->success += src->success;
dest->failure += src->failure;
dest->hit += src->hit;
dest->deferred += src->deferred;
dest->miss += src->miss;
dest->deopt += src->deopt;
for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
dest->failure_kinds[i] += src->failure_kinds[i];
}
}
static void
merge_opcode_stats_array(OpcodeStats *dest, const OpcodeStats *src)
{
for (int i = 0; i < 256; i++) {
merge_specialization_stats(&dest[i].specialization, &src[i].specialization);
dest[i].execution_count += src[i].execution_count;
for (int j = 0; j < 256; j++) {
dest[i].pair_count[j] += src[i].pair_count[j];
}
}
}
static void
merge_call_stats(CallStats *dest, const CallStats *src)
{
dest->inlined_py_calls += src->inlined_py_calls;
dest->pyeval_calls += src->pyeval_calls;
dest->frames_pushed += src->frames_pushed;
dest->frame_objects_created += src->frame_objects_created;
for (int i = 0; i < EVAL_CALL_KINDS; i++) {
dest->eval_calls[i] += src->eval_calls[i];
}
}
static void
merge_object_stats(ObjectStats *dest, const ObjectStats *src)
{
dest->increfs += src->increfs;
dest->decrefs += src->decrefs;
dest->interpreter_increfs += src->interpreter_increfs;
dest->interpreter_decrefs += src->interpreter_decrefs;
dest->immortal_increfs += src->immortal_increfs;
dest->immortal_decrefs += src->immortal_decrefs;
dest->interpreter_immortal_increfs += src->interpreter_immortal_increfs;
dest->interpreter_immortal_decrefs += src->interpreter_immortal_decrefs;
dest->allocations += src->allocations;
dest->allocations512 += src->allocations512;
dest->allocations4k += src->allocations4k;
dest->allocations_big += src->allocations_big;
dest->frees += src->frees;
dest->to_freelist += src->to_freelist;
dest->from_freelist += src->from_freelist;
dest->inline_values += src->inline_values;
dest->dict_materialized_on_request += src->dict_materialized_on_request;
dest->dict_materialized_new_key += src->dict_materialized_new_key;
dest->dict_materialized_too_big += src->dict_materialized_too_big;
dest->dict_materialized_str_subclass += src->dict_materialized_str_subclass;
dest->type_cache_hits += src->type_cache_hits;
dest->type_cache_misses += src->type_cache_misses;
dest->type_cache_dunder_hits += src->type_cache_dunder_hits;
dest->type_cache_dunder_misses += src->type_cache_dunder_misses;
dest->type_cache_collisions += src->type_cache_collisions;
dest->object_visits += src->object_visits;
}
static void
merge_uop_stats_array(UOpStats *dest, const UOpStats *src)
{
for (int i = 0; i <= PYSTATS_MAX_UOP_ID; i++) {
dest[i].execution_count += src[i].execution_count;
dest[i].miss += src[i].miss;
for (int j = 0; j <= PYSTATS_MAX_UOP_ID; j++) {
dest[i].pair_count[j] += src[i].pair_count[j];
}
}
}
static void
merge_optimization_stats(OptimizationStats *dest, const OptimizationStats *src)
{
dest->attempts += src->attempts;
dest->traces_created += src->traces_created;
dest->traces_executed += src->traces_executed;
dest->uops_executed += src->uops_executed;
dest->trace_stack_overflow += src->trace_stack_overflow;
dest->trace_stack_underflow += src->trace_stack_underflow;
dest->trace_too_long += src->trace_too_long;
dest->trace_too_short += src->trace_too_short;
dest->inner_loop += src->inner_loop;
dest->recursive_call += src->recursive_call;
dest->low_confidence += src->low_confidence;
dest->unknown_callee += src->unknown_callee;
dest->executors_invalidated += src->executors_invalidated;
dest->optimizer_attempts += src->optimizer_attempts;
dest->optimizer_successes += src->optimizer_successes;
dest->optimizer_failure_reason_no_memory += src->optimizer_failure_reason_no_memory;
dest->remove_globals_builtins_changed += src->remove_globals_builtins_changed;
dest->remove_globals_incorrect_keys += src->remove_globals_incorrect_keys;
dest->jit_total_memory_size += src->jit_total_memory_size;
dest->jit_code_size += src->jit_code_size;
dest->jit_trampoline_size += src->jit_trampoline_size;
dest->jit_data_size += src->jit_data_size;
dest->jit_padding_size += src->jit_padding_size;
dest->jit_freed_memory_size += src->jit_freed_memory_size;
merge_uop_stats_array(dest->opcode, src->opcode);
for (int i = 0; i < 256; i++) {
dest->unsupported_opcode[i] += src->unsupported_opcode[i];
}
for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
dest->trace_length_hist[i] += src->trace_length_hist[i];
dest->trace_run_length_hist[i] += src->trace_run_length_hist[i];
dest->optimized_trace_length_hist[i] += src->optimized_trace_length_hist[i];
dest->trace_total_memory_hist[i] += src->trace_total_memory_hist[i];
}
for (int i = 0; i <= PYSTATS_MAX_UOP_ID; i++) {
dest->error_in_opcode[i] += src->error_in_opcode[i];
}
}
static void
merge_ft_stats(FTStats *dest, const FTStats *src)
{
dest->mutex_sleeps = src->mutex_sleeps;
dest->qsbr_polls = src->qsbr_polls;
dest->world_stops = src->world_stops;
}
static void
merge_rare_event_stats(RareEventStats *dest, const RareEventStats *src)
{
dest->set_class += src->set_class;
dest->set_bases += src->set_bases;
dest->set_eval_frame_func += src->set_eval_frame_func;
dest->builtin_dict += src->builtin_dict;
dest->func_modification += src->func_modification;
dest->watched_dict_modification += src->watched_dict_modification;
dest->watched_globals_modification += src->watched_globals_modification;
}
static void
merge_gc_stats_array(GCStats *dest, const GCStats *src)
{
for (int i = 0; i < NUM_GENERATIONS; i++) {
dest[i].collections += src[i].collections;
dest[i].object_visits += src[i].object_visits;
dest[i].objects_collected += src[i].objects_collected;
dest[i].objects_transitively_reachable += src[i].objects_transitively_reachable;
dest[i].objects_not_transitively_reachable += src[i].objects_not_transitively_reachable;
}
}
void
stats_zero_thread(_PyThreadStateImpl *tstate)
{
// Zero the thread local stat counters
if (tstate->pystats_struct) {
memset(tstate->pystats_struct, 0, sizeof(PyStats));
}
}
// merge stats for a single thread into the global structure
void
stats_merge_thread(_PyThreadStateImpl *tstate)
{
PyStats *src = tstate->pystats_struct;
PyStats *dest = ((PyThreadState *)tstate)->interp->pystats_struct;
if (src == NULL || dest == NULL) {
return;
}
// Merge each category of stats using the helper functions.
merge_opcode_stats_array(dest->opcode_stats, src->opcode_stats);
merge_call_stats(&dest->call_stats, &src->call_stats);
merge_object_stats(&dest->object_stats, &src->object_stats);
merge_optimization_stats(&dest->optimization_stats, &src->optimization_stats);
merge_ft_stats(&dest->ft_stats, &src->ft_stats);
merge_rare_event_stats(&dest->rare_event_stats, &src->rare_event_stats);
merge_gc_stats_array(dest->gc_stats, src->gc_stats);
}
#endif // Py_GIL_DISABLED
// toggle stats collection on or off for all threads
static int
stats_toggle_on_off(PyThreadState *tstate, int on)
{
bool changed = false;
PyInterpreterState *interp = tstate->interp;
STATS_LOCK(interp);
if (on && interp->pystats_struct == NULL) {
PyStats *s = PyMem_RawCalloc(1, sizeof(PyStats));
if (s == NULL) {
STATS_UNLOCK(interp);
return -1;
}
FT_ATOMIC_STORE_PTR_RELAXED(interp->pystats_struct, s);
}
if (tstate->interp->pystats_enabled != on) {
FT_ATOMIC_STORE_INT_RELAXED(interp->pystats_enabled, on);
changed = true;
}
STATS_UNLOCK(interp);
if (!changed) {
return 0;
}
_PyEval_StopTheWorld(interp);
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, ts) {
PyStats *s = NULL;
if (interp->pystats_enabled) {
#ifdef Py_GIL_DISABLED
_PyThreadStateImpl *ts_impl = (_PyThreadStateImpl *)ts;
if (ts_impl->pystats_struct == NULL) {
// first activation for this thread, allocate structure
ts_impl->pystats_struct = PyMem_RawCalloc(1, sizeof(PyStats));
}
s = ts_impl->pystats_struct;
#else
s = ts->interp->pystats_struct;
#endif
}
ts->pystats = s;
}
_PyEval_StartTheWorld(interp);
return 0;
}
// zero stats for all threads and for the interpreter
static void
stats_zero_all(void)
{
PyThreadState *tstate = _PyThreadState_GET();
if (tstate == NULL) {
return;
}
if (FT_ATOMIC_LOAD_PTR_RELAXED(tstate->interp->pystats_struct) == NULL) {
return;
}
PyInterpreterState *interp = tstate->interp;
_PyEval_StopTheWorld(interp);
#ifdef Py_GIL_DISABLED
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, ts) {
stats_zero_thread((_PyThreadStateImpl *)ts);
}
#endif
if (interp->pystats_struct) {
memset(interp->pystats_struct, 0, sizeof(PyStats));
}
_PyEval_StartTheWorld(interp);
}
// merge stats for all threads into the per-interpreter structure
static void
stats_merge_all(void)
{
PyThreadState *tstate = _PyThreadState_GET();
if (tstate == NULL) {
return;
}
if (FT_ATOMIC_LOAD_PTR_RELAXED(tstate->interp->pystats_struct) == NULL) {
return;
}
PyInterpreterState *interp = tstate->interp;
_PyEval_StopTheWorld(interp);
#ifdef Py_GIL_DISABLED
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, ts) {
stats_merge_thread((_PyThreadStateImpl *)ts);
stats_zero_thread((_PyThreadStateImpl *)ts);
}
#endif
_PyEval_StartTheWorld(interp);
}
int
_Py_StatsOn(void)
{
PyThreadState *tstate = _PyThreadState_GET();
return stats_toggle_on_off(tstate, 1);
}
void
_Py_StatsOff(void)
{
PyThreadState *tstate = _PyThreadState_GET();
stats_toggle_on_off(tstate, 0);
}
void
_Py_StatsClear(void)
{
stats_zero_all();
}
static int
mem_is_zero(unsigned char *ptr, size_t size)
{
for (size_t i=0; i < size; i++) {
if (*ptr != 0) {
return 0;
}
ptr++;
}
return 1;
}
int
_Py_PrintSpecializationStats(int to_file)
{
assert(to_file);
stats_merge_all();
PyThreadState *tstate = _PyThreadState_GET();
STATS_LOCK(tstate->interp);
PyStats *stats = tstate->interp->pystats_struct;
if (stats == NULL) {
STATS_UNLOCK(tstate->interp);
return 0;
}
#define MEM_IS_ZERO(DATA) mem_is_zero((unsigned char*)DATA, sizeof(*(DATA)))
int is_zero = (
MEM_IS_ZERO(stats->gc_stats) // is a pointer
&& MEM_IS_ZERO(&stats->opcode_stats)
&& MEM_IS_ZERO(&stats->call_stats)
&& MEM_IS_ZERO(&stats->object_stats)
);
#undef MEM_IS_ZERO
STATS_UNLOCK(tstate->interp);
if (is_zero) {
// gh-108753: -X pystats command line was used, but then _stats_off()
// and _stats_clear() have been called: in this case, avoid printing
// useless "all zeros" statistics.
return 0;
}
FILE *out = stderr;
if (to_file) {
/* Write to a file instead of stderr. */
# ifdef MS_WINDOWS
const char *dirname = "c:\\temp\\py_stats\\";
# else
const char *dirname = "/tmp/py_stats/";
# endif
/* Use random 160 bit number as file name,
* to avoid both accidental collisions and
* symlink attacks. */
unsigned char rand[20];
char hex_name[41];
_PyOS_URandomNonblock(rand, 20);
for (int i = 0; i < 20; i++) {
hex_name[2*i] = Py_hexdigits[rand[i]&15];
hex_name[2*i+1] = Py_hexdigits[(rand[i]>>4)&15];
}
hex_name[40] = '\0';
char buf[64];
assert(strlen(dirname) + 40 + strlen(".txt") < 64);
sprintf(buf, "%s%s.txt", dirname, hex_name);
FILE *fout = fopen(buf, "w");
if (fout) {
out = fout;
}
}
else {
fprintf(out, "Specialization stats:\n");
}
STATS_LOCK(tstate->interp);
print_stats(out, stats);
STATS_UNLOCK(tstate->interp);
if (out != stderr) {
fclose(out);
}
return 1;
}
PyStatus
_PyStats_InterpInit(PyInterpreterState *interp)
{
if (interp->config._pystats) {
// start with pystats enabled, can be disabled via sys._stats_off()
// this needs to be set before the first tstate is created
interp->pystats_enabled = 1;
interp->pystats_struct = PyMem_RawCalloc(1, sizeof(PyStats));
if (interp->pystats_struct == NULL) {
return _PyStatus_ERR("out-of-memory while initializing interpreter");
}
}
return _PyStatus_OK();
}
bool
_PyStats_ThreadInit(PyInterpreterState *interp, _PyThreadStateImpl *tstate)
{
#ifdef Py_GIL_DISABLED
if (FT_ATOMIC_LOAD_INT_RELAXED(interp->pystats_enabled)) {
assert(interp->pystats_struct != NULL);
tstate->pystats_struct = PyMem_RawCalloc(1, sizeof(PyStats));
if (tstate->pystats_struct == NULL) {
return false;
}
}
#endif
return true;
}
void
_PyStats_ThreadFini(_PyThreadStateImpl *tstate)
{
#ifdef Py_GIL_DISABLED
STATS_LOCK(((PyThreadState *)tstate)->interp);
stats_merge_thread(tstate);
STATS_UNLOCK(((PyThreadState *)tstate)->interp);
PyMem_RawFree(tstate->pystats_struct);
#endif
}
void
_PyStats_Attach(_PyThreadStateImpl *tstate_impl)
{
PyStats *s;
PyThreadState *tstate = (PyThreadState *)tstate_impl;
PyInterpreterState *interp = tstate->interp;
if (FT_ATOMIC_LOAD_INT_RELAXED(interp->pystats_enabled)) {
#ifdef Py_GIL_DISABLED
s = ((_PyThreadStateImpl *)tstate)->pystats_struct;
#else
s = tstate->interp->pystats_struct;
#endif
}
else {
s = NULL;
}
tstate->pystats = s;
}
void
_PyStats_Detach(_PyThreadStateImpl *tstate_impl)
{
((PyThreadState *)tstate_impl)->pystats = NULL;
}
#endif // Py_STATS