gh-131253: free-threaded build support for pystats (gh-137189)
Allow the --enable-pystats build option to be used with free-threading. The stats are now stored on a per-interpreter basis, rather than process global. For free-threaded builds, the stats structure is allocated per-thread and then periodically merged into the per-interpreter stats structure (on thread exit or when the reporting function is called). Most of the pystats related code has be moved into the file Python/pystats.c.
This commit is contained in:
@@ -217,6 +217,15 @@ struct _ts {
|
||||
*/
|
||||
PyObject *threading_local_sentinel;
|
||||
_PyRemoteDebuggerSupport remote_debugger_support;
|
||||
|
||||
#ifdef Py_STATS
|
||||
// Pointer to PyStats structure, NULL if recording is off. For the
|
||||
// free-threaded build, the structure is per-thread (stored as a pointer
|
||||
// in _PyThreadStateImpl). For the default build, the structure is stored
|
||||
// in the PyInterpreterState structure (threads do not have their own
|
||||
// structure and all share the same per-interpreter structure).
|
||||
PyStats *pystats;
|
||||
#endif
|
||||
};
|
||||
|
||||
/* other API */
|
||||
@@ -239,6 +248,21 @@ PyAPI_FUNC(void) PyThreadState_EnterTracing(PyThreadState *tstate);
|
||||
// function is set, otherwise disable them.
|
||||
PyAPI_FUNC(void) PyThreadState_LeaveTracing(PyThreadState *tstate);
|
||||
|
||||
#ifdef Py_STATS
|
||||
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
|
||||
extern _Py_thread_local PyThreadState *_Py_tss_tstate;
|
||||
|
||||
static inline PyStats*
|
||||
_PyThreadState_GetStatsFast(void)
|
||||
{
|
||||
if (_Py_tss_tstate == NULL) {
|
||||
return NULL; // no attached thread state
|
||||
}
|
||||
return _Py_tss_tstate->pystats;
|
||||
}
|
||||
#endif
|
||||
#endif // Py_STATS
|
||||
|
||||
/* PyGILState */
|
||||
|
||||
/* Helper/diagnostic function - return 1 if the current thread
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
//
|
||||
// - _Py_INCREF_STAT_INC() and _Py_DECREF_STAT_INC() used by Py_INCREF()
|
||||
// and Py_DECREF().
|
||||
// - _Py_stats variable
|
||||
// - _PyStats_GET()
|
||||
//
|
||||
// Functions of the sys module:
|
||||
//
|
||||
@@ -14,7 +14,7 @@
|
||||
// - sys._stats_dump()
|
||||
//
|
||||
// Python must be built with ./configure --enable-pystats to define the
|
||||
// Py_STATS macro.
|
||||
// _PyStats_GET() macro.
|
||||
//
|
||||
// Define _PY_INTERPRETER macro to increment interpreter_increfs and
|
||||
// interpreter_decrefs. Otherwise, increment increfs and decrefs.
|
||||
@@ -109,6 +109,18 @@ typedef struct _gc_stats {
|
||||
uint64_t objects_not_transitively_reachable;
|
||||
} GCStats;
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// stats specific to free-threaded build
|
||||
typedef struct _ft_stats {
|
||||
// number of times interpreter had to spin or park when trying to acquire a mutex
|
||||
uint64_t mutex_sleeps;
|
||||
// number of times that the QSBR mechanism polled (compute read sequence value)
|
||||
uint64_t qsbr_polls;
|
||||
// number of times stop-the-world mechanism was used
|
||||
uint64_t world_stops;
|
||||
} FTStats;
|
||||
#endif
|
||||
|
||||
typedef struct _uop_stats {
|
||||
uint64_t execution_count;
|
||||
uint64_t miss;
|
||||
@@ -173,22 +185,48 @@ typedef struct _stats {
|
||||
CallStats call_stats;
|
||||
ObjectStats object_stats;
|
||||
OptimizationStats optimization_stats;
|
||||
#ifdef Py_GIL_DISABLED
|
||||
FTStats ft_stats;
|
||||
#endif
|
||||
RareEventStats rare_event_stats;
|
||||
GCStats *gc_stats;
|
||||
GCStats gc_stats[3]; // must match NUM_GENERATIONS
|
||||
} PyStats;
|
||||
|
||||
// Export for most shared extensions
|
||||
PyAPI_FUNC(PyStats *) _PyStats_GetLocal(void);
|
||||
|
||||
// Export for shared extensions like 'math'
|
||||
PyAPI_DATA(PyStats*) _Py_stats;
|
||||
#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE)
|
||||
// use inline function version defined in cpython/pystate.h
|
||||
static inline PyStats *_PyThreadState_GetStatsFast(void);
|
||||
#define _PyStats_GET _PyThreadState_GetStatsFast
|
||||
#else
|
||||
#define _PyStats_GET _PyStats_GetLocal
|
||||
#endif
|
||||
|
||||
#define _Py_STATS_EXPR(expr) \
|
||||
do { \
|
||||
PyStats *s = _PyStats_GET(); \
|
||||
if (s != NULL) { \
|
||||
s->expr; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define _Py_STATS_COND_EXPR(cond, expr) \
|
||||
do { \
|
||||
PyStats *s = _PyStats_GET(); \
|
||||
if (s != NULL && (cond)) { \
|
||||
s->expr; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#ifdef _PY_INTERPRETER
|
||||
# define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_increfs++; } while (0)
|
||||
# define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_decrefs++; } while (0)
|
||||
# define _Py_INCREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_immortal_increfs++; } while (0)
|
||||
# define _Py_DECREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_immortal_decrefs++; } while (0)
|
||||
# define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_increfs++)
|
||||
# define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_decrefs++)
|
||||
# define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_increfs++)
|
||||
# define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.interpreter_immortal_decrefs++)
|
||||
#else
|
||||
# define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.increfs++; } while (0)
|
||||
# define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.decrefs++; } while (0)
|
||||
# define _Py_INCREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.immortal_increfs++; } while (0)
|
||||
# define _Py_DECREF_IMMORTAL_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.immortal_decrefs++; } while (0)
|
||||
# define _Py_INCREF_STAT_INC() _Py_STATS_EXPR(object_stats.increfs++)
|
||||
# define _Py_DECREF_STAT_INC() _Py_STATS_EXPR(object_stats.decrefs++)
|
||||
# define _Py_INCREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_increfs++)
|
||||
# define _Py_DECREF_IMMORTAL_STAT_INC() _Py_STATS_EXPR(object_stats.immortal_decrefs++)
|
||||
#endif
|
||||
|
||||
@@ -199,7 +199,7 @@ enum _GCPhase {
|
||||
};
|
||||
|
||||
/* If we change this, we need to change the default value in the
|
||||
signature of gc.collect. */
|
||||
signature of gc.collect and change the size of PyStats.gc_stats */
|
||||
#define NUM_GENERATIONS 3
|
||||
|
||||
struct _gc_runtime_state {
|
||||
@@ -963,6 +963,18 @@ struct _is {
|
||||
# ifdef Py_STACKREF_CLOSE_DEBUG
|
||||
_Py_hashtable_t *closed_stackrefs_table;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef Py_STATS
|
||||
// true if recording of pystats is on, this is used when new threads
|
||||
// are created to decide if recording should be on for them
|
||||
int pystats_enabled;
|
||||
// allocated when (and if) stats are first enabled
|
||||
PyStats *pystats_struct;
|
||||
#ifdef Py_GIL_DISABLED
|
||||
// held when pystats related interpreter state is being updated
|
||||
PyMutex pystats_mutex;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* the initial PyInterpreterState.threads.head */
|
||||
|
||||
@@ -9,7 +9,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef Py_STATS
|
||||
extern void _Py_StatsOn(void);
|
||||
extern int _Py_StatsOn(void);
|
||||
extern void _Py_StatsOff(void);
|
||||
extern void _Py_StatsClear(void);
|
||||
extern int _Py_PrintSpecializationStats(int to_file);
|
||||
|
||||
@@ -15,39 +15,56 @@ extern "C" {
|
||||
|
||||
#include "pycore_bitutils.h" // _Py_bit_length
|
||||
|
||||
#define STAT_INC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name++; } while (0)
|
||||
#define STAT_DEC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name--; } while (0)
|
||||
#define OPCODE_EXE_INC(opname) do { if (_Py_stats) _Py_stats->opcode_stats[opname].execution_count++; } while (0)
|
||||
#define CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.name++; } while (0)
|
||||
#define OBJECT_STAT_INC(name) do { if (_Py_stats) _Py_stats->object_stats.name++; } while (0)
|
||||
#define OBJECT_STAT_INC_COND(name, cond) \
|
||||
do { if (_Py_stats && cond) _Py_stats->object_stats.name++; } while (0)
|
||||
#define EVAL_CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.eval_calls[name]++; } while (0)
|
||||
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \
|
||||
do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0)
|
||||
#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0)
|
||||
#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0)
|
||||
#define OPT_STAT_ADD(name, n) do { if (_Py_stats) _Py_stats->optimization_stats.name += (n); } while (0)
|
||||
#define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0)
|
||||
#define STAT_INC(opname, name) _Py_STATS_EXPR(opcode_stats[opname].specialization.name++)
|
||||
#define STAT_DEC(opname, name) _Py_STATS_EXPR(opcode_stats[opname].specialization.name--)
|
||||
#define OPCODE_EXE_INC(opname) _Py_STATS_EXPR(opcode_stats[opname].execution_count++)
|
||||
#define CALL_STAT_INC(name) _Py_STATS_EXPR(call_stats.name++)
|
||||
#define OBJECT_STAT_INC(name) _Py_STATS_EXPR(object_stats.name++)
|
||||
#define OBJECT_STAT_INC_COND(name, cond) _Py_STATS_COND_EXPR(cond, object_stats.name++)
|
||||
#define EVAL_CALL_STAT_INC(name) _Py_STATS_EXPR(call_stats.eval_calls[name]++)
|
||||
#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) _Py_STATS_COND_EXPR(PyFunction_Check(callable), call_stats.eval_calls[name]++)
|
||||
#define GC_STAT_ADD(gen, name, n) _Py_STATS_EXPR(gc_stats[(gen)].name += (n))
|
||||
#define OPT_STAT_INC(name) _Py_STATS_EXPR(optimization_stats.name++)
|
||||
#define OPT_STAT_ADD(name, n) _Py_STATS_EXPR(optimization_stats.name += (n))
|
||||
#define UOP_STAT_INC(opname, name) \
|
||||
do { \
|
||||
PyStats *s = _PyStats_GET(); \
|
||||
if (s) { \
|
||||
assert(opname < 512); \
|
||||
s->optimization_stats.opcode[opname].name++; \
|
||||
} \
|
||||
} while (0)
|
||||
#define UOP_PAIR_INC(uopcode, lastuop) \
|
||||
do { \
|
||||
if (lastuop && _Py_stats) { \
|
||||
_Py_stats->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \
|
||||
PyStats *s = _PyStats_GET(); \
|
||||
if (lastuop && s) { \
|
||||
s->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \
|
||||
} \
|
||||
lastuop = uopcode; \
|
||||
} while (0)
|
||||
#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0)
|
||||
#define OPT_ERROR_IN_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.error_in_opcode[opname]++; } while (0)
|
||||
#define OPT_UNSUPPORTED_OPCODE(opname) _Py_STATS_EXPR(optimization_stats.unsupported_opcode[opname]++)
|
||||
#define OPT_ERROR_IN_OPCODE(opname) _Py_STATS_EXPR(optimization_stats.error_in_opcode[opname]++)
|
||||
#define OPT_HIST(length, name) \
|
||||
do { \
|
||||
if (_Py_stats) { \
|
||||
PyStats *s = _PyStats_GET(); \
|
||||
if (s) { \
|
||||
int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \
|
||||
bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \
|
||||
_Py_stats->optimization_stats.name[bucket]++; \
|
||||
s->optimization_stats.name[bucket]++; \
|
||||
} \
|
||||
} while (0)
|
||||
#define RARE_EVENT_STAT_INC(name) do { if (_Py_stats) _Py_stats->rare_event_stats.name++; } while (0)
|
||||
#define OPCODE_DEFERRED_INC(opname) do { if (_Py_stats && opcode == opname) _Py_stats->opcode_stats[opname].specialization.deferred++; } while (0)
|
||||
#define RARE_EVENT_STAT_INC(name) _Py_STATS_EXPR(rare_event_stats.name++)
|
||||
#define OPCODE_DEFERRED_INC(opname) _Py_STATS_COND_EXPR(opcode==opname, opcode_stats[opname].specialization.deferred++)
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
#define FT_STAT_MUTEX_SLEEP_INC() _Py_STATS_EXPR(ft_stats.mutex_sleeps++)
|
||||
#define FT_STAT_QSBR_POLL_INC() _Py_STATS_EXPR(ft_stats.qsbr_polls++)
|
||||
#define FT_STAT_WORLD_STOP_INC() _Py_STATS_EXPR(ft_stats.world_stops++)
|
||||
#else
|
||||
#define FT_STAT_MUTEX_SLEEP_INC()
|
||||
#define FT_STAT_QSBR_POLL_INC()
|
||||
#define FT_STAT_WORLD_STOP_INC()
|
||||
#endif
|
||||
|
||||
// Export for '_opcode' shared extension
|
||||
PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
|
||||
@@ -71,6 +88,9 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
|
||||
#define OPT_HIST(length, name) ((void)0)
|
||||
#define RARE_EVENT_STAT_INC(name) ((void)0)
|
||||
#define OPCODE_DEFERRED_INC(opname) ((void)0)
|
||||
#define FT_STAT_MUTEX_SLEEP_INC()
|
||||
#define FT_STAT_QSBR_POLL_INC()
|
||||
#define FT_STAT_WORLD_STOP_INC()
|
||||
#endif // !Py_STATS
|
||||
|
||||
|
||||
@@ -90,6 +110,11 @@ PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void);
|
||||
RARE_EVENT_INTERP_INC(interp, name); \
|
||||
} while (0); \
|
||||
|
||||
PyStatus _PyStats_InterpInit(PyInterpreterState *);
|
||||
bool _PyStats_ThreadInit(PyInterpreterState *, _PyThreadStateImpl *);
|
||||
void _PyStats_ThreadFini(_PyThreadStateImpl *);
|
||||
void _PyStats_Attach(_PyThreadStateImpl *);
|
||||
void _PyStats_Detach(_PyThreadStateImpl *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -70,8 +70,14 @@ typedef struct _PyThreadStateImpl {
|
||||
|
||||
// When >1, code objects do not immortalize their non-string constants.
|
||||
int suppress_co_const_immortalization;
|
||||
|
||||
#ifdef Py_STATS
|
||||
// per-thread stats, will be merged into interp->pystats_struct
|
||||
PyStats *pystats_struct; // allocated by _PyStats_ThreadInit()
|
||||
#endif
|
||||
|
||||
#endif // Py_GIL_DISABLED
|
||||
|
||||
#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED)
|
||||
Py_ssize_t reftotal; // this thread's total refcount operations
|
||||
#endif
|
||||
|
||||
215
Lib/test/test_pystats.py
Normal file
215
Lib/test/test_pystats.py
Normal file
@@ -0,0 +1,215 @@
|
||||
import sys
|
||||
import textwrap
|
||||
import unittest
|
||||
from test.support import script_helper
|
||||
|
||||
# This function is available for the --enable-pystats config.
|
||||
HAVE_PYSTATS = hasattr(sys, '_stats_on')
|
||||
|
||||
TEST_TEMPLATE = """
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
THREADS = 2
|
||||
|
||||
class A:
|
||||
pass
|
||||
|
||||
class B:
|
||||
pass
|
||||
|
||||
def modify_class():
|
||||
# This is used as a rare event we can assume doesn't happen unless we do it.
|
||||
# It increments the "Rare event (set_class)" count.
|
||||
a = A()
|
||||
a.__class__ = B
|
||||
|
||||
TURNED_ON = False
|
||||
def stats_on():
|
||||
global TURNED_ON
|
||||
sys._stats_on()
|
||||
TURNED_ON = True
|
||||
|
||||
TURNED_OFF = False
|
||||
def stats_off():
|
||||
global TURNED_OFF
|
||||
sys._stats_off()
|
||||
TURNED_OFF = True
|
||||
|
||||
CLEARED = False
|
||||
def stats_clear():
|
||||
global CLEARED
|
||||
sys._stats_clear()
|
||||
CLEARED = True
|
||||
|
||||
def func_start():
|
||||
pass
|
||||
|
||||
def func_end():
|
||||
pass
|
||||
|
||||
def func_test(thread_id):
|
||||
pass
|
||||
|
||||
_TEST_CODE_
|
||||
|
||||
func_start()
|
||||
threads = []
|
||||
for i in range(THREADS):
|
||||
t = threading.Thread(target=func_test, args=(i,))
|
||||
threads.append(t)
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
func_end()
|
||||
"""
|
||||
|
||||
|
||||
def run_test_code(
|
||||
test_code,
|
||||
args=[],
|
||||
env_vars=None,
|
||||
):
|
||||
"""Run test code and return the value of the "set_class" stats counter.
|
||||
"""
|
||||
code = textwrap.dedent(TEST_TEMPLATE)
|
||||
code = code.replace('_TEST_CODE_', textwrap.dedent(test_code))
|
||||
script_args = args + ['-c', code]
|
||||
env_vars = env_vars or {}
|
||||
res, _ = script_helper.run_python_until_end(*script_args, **env_vars)
|
||||
stderr = res.err.decode("ascii", "backslashreplace")
|
||||
for line in stderr.split('\n'):
|
||||
if 'Rare event (set_class)' in line:
|
||||
label, _, value = line.partition(':')
|
||||
return value.strip()
|
||||
return ''
|
||||
|
||||
|
||||
@unittest.skipUnless(HAVE_PYSTATS, "requires pystats build option")
|
||||
class TestPyStats(unittest.TestCase):
|
||||
"""Tests for pystats functionality (requires --enable-pystats build
|
||||
option).
|
||||
"""
|
||||
|
||||
def test_stats_toggle_on(self):
|
||||
"""Check the toggle on functionality.
|
||||
"""
|
||||
code = """
|
||||
def func_start():
|
||||
modify_class()
|
||||
"""
|
||||
|
||||
# If turned on with command line flag, should get one count.
|
||||
stat_count = run_test_code(code, args=['-X', 'pystats'])
|
||||
self.assertEqual(stat_count, '1')
|
||||
|
||||
# If turned on with env var, should get one count.
|
||||
stat_count = run_test_code(code, env_vars={'PYTHONSTATS': '1'})
|
||||
self.assertEqual(stat_count, '1')
|
||||
|
||||
# If not turned on, should be no counts.
|
||||
stat_count = run_test_code(code)
|
||||
self.assertEqual(stat_count, '')
|
||||
|
||||
code = """
|
||||
def func_start():
|
||||
modify_class()
|
||||
sys._stats_on()
|
||||
modify_class()
|
||||
"""
|
||||
# Not initially turned on but enabled by sys._stats_on(), should get
|
||||
# one count.
|
||||
stat_count = run_test_code(code)
|
||||
self.assertEqual(stat_count, '1')
|
||||
|
||||
def test_stats_toggle_on_thread(self):
|
||||
"""Check the toggle on functionality when threads are used.
|
||||
"""
|
||||
code = """
|
||||
def func_test(thread_id):
|
||||
if thread_id == 0:
|
||||
modify_class()
|
||||
stats_on()
|
||||
modify_class()
|
||||
else:
|
||||
while not TURNED_ON:
|
||||
pass
|
||||
modify_class()
|
||||
"""
|
||||
# Turning on in one thread will count in other thread.
|
||||
stat_count = run_test_code(code)
|
||||
self.assertEqual(stat_count, '2')
|
||||
|
||||
code = """
|
||||
def func_test(thread_id):
|
||||
if thread_id == 0:
|
||||
modify_class()
|
||||
stats_off()
|
||||
modify_class()
|
||||
else:
|
||||
while not TURNED_OFF:
|
||||
pass
|
||||
modify_class()
|
||||
"""
|
||||
# Turning off in one thread will not count in other threads.
|
||||
stat_count = run_test_code(code, args=['-X', 'pystats'])
|
||||
self.assertEqual(stat_count, '1')
|
||||
|
||||
def test_thread_exit_merge(self):
|
||||
"""Check that per-thread stats (when free-threading enabled) are merged.
|
||||
"""
|
||||
code = """
|
||||
def func_test(thread_id):
|
||||
modify_class()
|
||||
if thread_id == 0:
|
||||
raise SystemExit
|
||||
"""
|
||||
# Stats from a thread exiting early should still be counted.
|
||||
stat_count = run_test_code(code, args=['-X', 'pystats'])
|
||||
self.assertEqual(stat_count, '2')
|
||||
|
||||
def test_stats_dump(self):
|
||||
"""Check that sys._stats_dump() works.
|
||||
"""
|
||||
code = """
|
||||
def func_test(thread_id):
|
||||
if thread_id == 0:
|
||||
stats_on()
|
||||
else:
|
||||
while not TURNED_ON:
|
||||
pass
|
||||
modify_class()
|
||||
sys._stats_dump()
|
||||
stats_off()
|
||||
"""
|
||||
# Stats from a thread exiting early should still be counted.
|
||||
stat_count = run_test_code(code)
|
||||
self.assertEqual(stat_count, '1')
|
||||
|
||||
def test_stats_clear(self):
|
||||
"""Check that sys._stats_clear() works.
|
||||
"""
|
||||
code = """
|
||||
ready = False
|
||||
def func_test(thread_id):
|
||||
global ready
|
||||
if thread_id == 0:
|
||||
stats_on()
|
||||
modify_class()
|
||||
while not ready:
|
||||
pass # wait until other thread has called modify_class()
|
||||
stats_clear() # clears stats for all threads
|
||||
else:
|
||||
while not TURNED_ON:
|
||||
pass
|
||||
modify_class()
|
||||
ready = True
|
||||
"""
|
||||
# Clearing stats will clear for all threads
|
||||
stat_count = run_test_code(code)
|
||||
self.assertEqual(stat_count, '0')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -483,6 +483,7 @@ PYTHON_OBJS= \
|
||||
Python/pylifecycle.o \
|
||||
Python/pymath.o \
|
||||
Python/pystate.o \
|
||||
Python/pystats.o \
|
||||
Python/pythonrun.o \
|
||||
Python/pytime.o \
|
||||
Python/qsbr.o \
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
Support the ``--enable-pystats`` build option for the free-threaded build.
|
||||
@@ -10,8 +10,8 @@
|
||||
|
||||
See the source code for LLVMFuzzerTestOneInput for details. */
|
||||
|
||||
#ifndef Py_BUILD_CORE
|
||||
# define Py_BUILD_CORE 1
|
||||
#ifndef Py_BUILD_CORE_MODULE
|
||||
# define Py_BUILD_CORE_MODULE 1
|
||||
#endif
|
||||
|
||||
#include <Python.h>
|
||||
|
||||
@@ -257,6 +257,7 @@
|
||||
<ClCompile Include="..\Python\pylifecycle.c" />
|
||||
<ClCompile Include="..\Python\pymath.c" />
|
||||
<ClCompile Include="..\Python\pystate.c" />
|
||||
<ClCompile Include="..\Python\pystats.c" />
|
||||
<ClCompile Include="..\Python\pystrcmp.c" />
|
||||
<ClCompile Include="..\Python\pystrhex.c" />
|
||||
<ClCompile Include="..\Python\pystrtod.c" />
|
||||
|
||||
@@ -367,6 +367,9 @@
|
||||
<ClCompile Include="..\Python\pystate.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\pystats.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\Python\pystrcmp.c">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
||||
@@ -658,6 +658,7 @@
|
||||
<ClCompile Include="..\Python\pymath.c" />
|
||||
<ClCompile Include="..\Python\pytime.c" />
|
||||
<ClCompile Include="..\Python\pystate.c" />
|
||||
<ClCompile Include="..\Python\pystats.c" />
|
||||
<ClCompile Include="..\Python\pystrcmp.c" />
|
||||
<ClCompile Include="..\Python\pystrhex.c" />
|
||||
<ClCompile Include="..\Python\pystrtod.c" />
|
||||
|
||||
@@ -62,8 +62,9 @@
|
||||
#ifdef Py_STATS
|
||||
#define INSTRUCTION_STATS(op) \
|
||||
do { \
|
||||
PyStats *s = _PyStats_GET(); \
|
||||
OPCODE_EXE_INC(op); \
|
||||
if (_Py_stats) _Py_stats->opcode_stats[lastopcode].pair_count[op]++; \
|
||||
if (s) s->opcode_stats[lastopcode].pair_count[op]++; \
|
||||
lastopcode = op; \
|
||||
} while (0)
|
||||
#else
|
||||
|
||||
@@ -2111,10 +2111,11 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason)
|
||||
_PyErr_SetRaisedException(tstate, exc);
|
||||
GC_STAT_ADD(generation, objects_collected, stats.collected);
|
||||
#ifdef Py_STATS
|
||||
if (_Py_stats) {
|
||||
PyStats *s = _PyStats_GET();
|
||||
if (s) {
|
||||
GC_STAT_ADD(generation, object_visits,
|
||||
_Py_stats->object_stats.object_visits);
|
||||
_Py_stats->object_stats.object_visits = 0;
|
||||
s->object_stats.object_visits);
|
||||
s->object_stats.object_visits = 0;
|
||||
}
|
||||
#endif
|
||||
validate_spaces(gcstate);
|
||||
|
||||
@@ -2362,8 +2362,9 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
|
||||
assert(generation >= 0 && generation < NUM_GENERATIONS);
|
||||
|
||||
#ifdef Py_STATS
|
||||
if (_Py_stats) {
|
||||
_Py_stats->object_stats.object_visits = 0;
|
||||
PyStats *s = _PyStats_GET();
|
||||
if (s) {
|
||||
s->object_stats.object_visits = 0;
|
||||
}
|
||||
#endif
|
||||
GC_STAT_ADD(generation, collections, 1);
|
||||
@@ -2426,10 +2427,13 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason)
|
||||
|
||||
GC_STAT_ADD(generation, objects_collected, m);
|
||||
#ifdef Py_STATS
|
||||
if (_Py_stats) {
|
||||
{
|
||||
PyStats *s = _PyStats_GET();
|
||||
if (s) {
|
||||
GC_STAT_ADD(generation, object_visits,
|
||||
_Py_stats->object_stats.object_visits);
|
||||
_Py_stats->object_stats.object_visits = 0;
|
||||
s->object_stats.object_visits);
|
||||
s->object_stats.object_visits = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -2810,12 +2810,6 @@ _PyConfig_Write(const PyConfig *config, _PyRuntimeState *runtime)
|
||||
return _PyStatus_NO_MEMORY();
|
||||
}
|
||||
|
||||
#ifdef Py_STATS
|
||||
if (config->_pystats) {
|
||||
_Py_StatsOn();
|
||||
}
|
||||
#endif
|
||||
|
||||
return _PyStatus_OK();
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include "pycore_parking_lot.h"
|
||||
#include "pycore_semaphore.h"
|
||||
#include "pycore_time.h" // _PyTime_Add()
|
||||
#include "pycore_stats.h" // FT_STAT_MUTEX_SLEEP_INC()
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
# ifndef WIN32_LEAN_AND_MEAN
|
||||
@@ -62,6 +63,8 @@ _PyMutex_LockTimed(PyMutex *m, PyTime_t timeout, _PyLockFlags flags)
|
||||
return PY_LOCK_FAILURE;
|
||||
}
|
||||
|
||||
FT_STAT_MUTEX_SLEEP_INC();
|
||||
|
||||
PyTime_t now;
|
||||
// silently ignore error: cannot report error to the caller
|
||||
(void)PyTime_MonotonicRaw(&now);
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
#include "pycore_runtime.h" // _Py_ID()
|
||||
#include "pycore_runtime_init.h" // _PyRuntimeState_INIT
|
||||
#include "pycore_setobject.h" // _PySet_NextEntry()
|
||||
#include "pycore_stats.h" // _PyStats_InterpInit()
|
||||
#include "pycore_sysmodule.h" // _PySys_ClearAttrString()
|
||||
#include "pycore_traceback.h" // _Py_DumpTracebackThreads()
|
||||
#include "pycore_typeobject.h" // _PyTypes_InitTypes()
|
||||
@@ -656,6 +657,14 @@ pycore_create_interpreter(_PyRuntimeState *runtime,
|
||||
return status;
|
||||
}
|
||||
|
||||
#ifdef Py_STATS
|
||||
// initialize pystats. This must be done after the settings are loaded.
|
||||
status = _PyStats_InterpInit(interp);
|
||||
if (_PyStatus_EXCEPTION(status)) {
|
||||
return status;
|
||||
}
|
||||
#endif
|
||||
|
||||
// initialize the interp->obmalloc state. This must be done after
|
||||
// the settings are loaded (so that feature_flags are set) but before
|
||||
// any calls are made to obmalloc functions.
|
||||
@@ -2469,6 +2478,14 @@ new_interpreter(PyThreadState **tstate_p,
|
||||
return status;
|
||||
}
|
||||
|
||||
#ifdef Py_STATS
|
||||
// initialize pystats. This must be done after the settings are loaded.
|
||||
status = _PyStats_InterpInit(interp);
|
||||
if (_PyStatus_EXCEPTION(status)) {
|
||||
return status;
|
||||
}
|
||||
#endif
|
||||
|
||||
// initialize the interp->obmalloc state. This must be done after
|
||||
// the settings are loaded (so that feature_flags are set) but before
|
||||
// any calls are made to obmalloc functions.
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
#include "pycore_runtime.h" // _PyRuntime
|
||||
#include "pycore_runtime_init.h" // _PyRuntimeState_INIT
|
||||
#include "pycore_stackref.h" // Py_STACKREF_DEBUG
|
||||
#include "pycore_stats.h" // FT_STAT_WORLD_STOP_INC()
|
||||
#include "pycore_time.h" // _PyTime_Init()
|
||||
#include "pycore_uop.h" // UOP_BUFFER_SIZE
|
||||
#include "pycore_uniqueid.h" // _PyObject_FinalizePerThreadRefcounts()
|
||||
@@ -465,6 +466,12 @@ alloc_interpreter(void)
|
||||
static void
|
||||
free_interpreter(PyInterpreterState *interp)
|
||||
{
|
||||
#ifdef Py_STATS
|
||||
if (interp->pystats_struct) {
|
||||
PyMem_RawFree(interp->pystats_struct);
|
||||
interp->pystats_struct = NULL;
|
||||
}
|
||||
#endif
|
||||
// The main interpreter is statically allocated so
|
||||
// should not be freed.
|
||||
if (interp != &_PyRuntime._main_interpreter) {
|
||||
@@ -1407,6 +1414,9 @@ static void
|
||||
free_threadstate(_PyThreadStateImpl *tstate)
|
||||
{
|
||||
PyInterpreterState *interp = tstate->base.interp;
|
||||
#ifdef Py_STATS
|
||||
_PyStats_ThreadFini(tstate);
|
||||
#endif
|
||||
// The initial thread state of the interpreter is allocated
|
||||
// as part of the interpreter state so should not be freed.
|
||||
if (tstate == &interp->_initial_thread) {
|
||||
@@ -1535,6 +1545,13 @@ new_threadstate(PyInterpreterState *interp, int whence)
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
#ifdef Py_STATS
|
||||
// The PyStats structure is quite large and is allocated separated from tstate.
|
||||
if (!_PyStats_ThreadInit(interp, tstate)) {
|
||||
free_threadstate(tstate);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* We serialize concurrent creation to protect global state. */
|
||||
HEAD_LOCK(interp->runtime);
|
||||
@@ -1846,6 +1863,9 @@ _PyThreadState_DeleteCurrent(PyThreadState *tstate)
|
||||
_Py_EnsureTstateNotNULL(tstate);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
_Py_qsbr_detach(((_PyThreadStateImpl *)tstate)->qsbr);
|
||||
#endif
|
||||
#ifdef Py_STATS
|
||||
_PyStats_Detach((_PyThreadStateImpl *)tstate);
|
||||
#endif
|
||||
current_fast_clear(tstate->interp->runtime);
|
||||
tstate_delete_common(tstate, 1); // release GIL as part of call
|
||||
@@ -2020,6 +2040,10 @@ tstate_deactivate(PyThreadState *tstate)
|
||||
assert(tstate_is_bound(tstate));
|
||||
assert(tstate->_status.active);
|
||||
|
||||
#if Py_STATS
|
||||
_PyStats_Detach((_PyThreadStateImpl *)tstate);
|
||||
#endif
|
||||
|
||||
tstate->_status.active = 0;
|
||||
|
||||
// We do not unbind the gilstate tstate here.
|
||||
@@ -2123,6 +2147,10 @@ _PyThreadState_Attach(PyThreadState *tstate)
|
||||
_PyCriticalSection_Resume(tstate);
|
||||
}
|
||||
|
||||
#ifdef Py_STATS
|
||||
_PyStats_Attach((_PyThreadStateImpl *)tstate);
|
||||
#endif
|
||||
|
||||
#if defined(Py_DEBUG)
|
||||
errno = err;
|
||||
#endif
|
||||
@@ -2272,6 +2300,7 @@ stop_the_world(struct _stoptheworld_state *stw)
|
||||
stw->thread_countdown = 0;
|
||||
stw->stop_event = (PyEvent){0}; // zero-initialize (unset)
|
||||
stw->requester = _PyThreadState_GET(); // may be NULL
|
||||
FT_STAT_WORLD_STOP_INC();
|
||||
|
||||
_Py_FOR_EACH_STW_INTERP(stw, i) {
|
||||
_Py_FOR_EACH_TSTATE_UNLOCKED(i, t) {
|
||||
|
||||
819
Python/pystats.c
Normal file
819
Python/pystats.c
Normal file
@@ -0,0 +1,819 @@
|
||||
#include "Python.h"
|
||||
|
||||
#include "pycore_opcode_metadata.h" // _PyOpcode_Caches
|
||||
#include "pycore_pyatomic_ft_wrappers.h"
|
||||
#include "pycore_pylifecycle.h" // _PyOS_URandomNonblock()
|
||||
#include "pycore_tstate.h"
|
||||
#include "pycore_initconfig.h" // _PyStatus_OK()
|
||||
#include "pycore_uop_metadata.h" // _PyOpcode_uop_name
|
||||
#include "pycore_uop_ids.h" // MAX_UOP_ID
|
||||
#include "pycore_pystate.h" // _PyThreadState_GET()
|
||||
#include "pycore_runtime.h" // NUM_GENERATIONS
|
||||
|
||||
#include <stdlib.h> // rand()
|
||||
|
||||
#ifdef Py_STATS
|
||||
|
||||
PyStats *
|
||||
_PyStats_GetLocal(void)
|
||||
{
|
||||
PyThreadState *tstate = _PyThreadState_GET();
|
||||
if (tstate) {
|
||||
return tstate->pystats;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
#define STATS_LOCK(interp) PyMutex_Lock(&interp->pystats_mutex)
|
||||
#define STATS_UNLOCK(interp) PyMutex_Unlock(&interp->pystats_mutex)
|
||||
#else
|
||||
#define STATS_LOCK(interp)
|
||||
#define STATS_UNLOCK(interp)
|
||||
#endif
|
||||
|
||||
|
||||
#if PYSTATS_MAX_UOP_ID < MAX_UOP_ID
|
||||
#error "Not enough space allocated for pystats. Increase PYSTATS_MAX_UOP_ID to at least MAX_UOP_ID"
|
||||
#endif
|
||||
|
||||
#define ADD_STAT_TO_DICT(res, field) \
|
||||
do { \
|
||||
PyObject *val = PyLong_FromUnsignedLongLong(stats->field); \
|
||||
if (val == NULL) { \
|
||||
Py_DECREF(res); \
|
||||
return NULL; \
|
||||
} \
|
||||
if (PyDict_SetItemString(res, #field, val) == -1) { \
|
||||
Py_DECREF(res); \
|
||||
Py_DECREF(val); \
|
||||
return NULL; \
|
||||
} \
|
||||
Py_DECREF(val); \
|
||||
} while(0);
|
||||
|
||||
static PyObject*
|
||||
stats_to_dict(SpecializationStats *stats)
|
||||
{
|
||||
PyObject *res = PyDict_New();
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
ADD_STAT_TO_DICT(res, success);
|
||||
ADD_STAT_TO_DICT(res, failure);
|
||||
ADD_STAT_TO_DICT(res, hit);
|
||||
ADD_STAT_TO_DICT(res, deferred);
|
||||
ADD_STAT_TO_DICT(res, miss);
|
||||
ADD_STAT_TO_DICT(res, deopt);
|
||||
PyObject *failure_kinds = PyTuple_New(SPECIALIZATION_FAILURE_KINDS);
|
||||
if (failure_kinds == NULL) {
|
||||
Py_DECREF(res);
|
||||
return NULL;
|
||||
}
|
||||
for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
|
||||
PyObject *stat = PyLong_FromUnsignedLongLong(stats->failure_kinds[i]);
|
||||
if (stat == NULL) {
|
||||
Py_DECREF(res);
|
||||
Py_DECREF(failure_kinds);
|
||||
return NULL;
|
||||
}
|
||||
PyTuple_SET_ITEM(failure_kinds, i, stat);
|
||||
}
|
||||
if (PyDict_SetItemString(res, "failure_kinds", failure_kinds)) {
|
||||
Py_DECREF(res);
|
||||
Py_DECREF(failure_kinds);
|
||||
return NULL;
|
||||
}
|
||||
Py_DECREF(failure_kinds);
|
||||
return res;
|
||||
}
|
||||
#undef ADD_STAT_TO_DICT
|
||||
|
||||
static int
|
||||
add_stat_dict(
|
||||
PyStats *src,
|
||||
PyObject *res,
|
||||
int opcode,
|
||||
const char *name) {
|
||||
|
||||
SpecializationStats *stats = &src->opcode_stats[opcode].specialization;
|
||||
PyObject *d = stats_to_dict(stats);
|
||||
if (d == NULL) {
|
||||
return -1;
|
||||
}
|
||||
int err = PyDict_SetItemString(res, name, d);
|
||||
Py_DECREF(d);
|
||||
return err;
|
||||
}
|
||||
|
||||
PyObject*
|
||||
_Py_GetSpecializationStats(void) {
|
||||
PyThreadState *tstate = _PyThreadState_GET();
|
||||
PyStats *src = FT_ATOMIC_LOAD_PTR_RELAXED(tstate->interp->pystats_struct);
|
||||
if (src == NULL) {
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
PyObject *stats = PyDict_New();
|
||||
if (stats == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
int err = 0;
|
||||
err += add_stat_dict(src, stats, CONTAINS_OP, "contains_op");
|
||||
err += add_stat_dict(src, stats, LOAD_SUPER_ATTR, "load_super_attr");
|
||||
err += add_stat_dict(src, stats, LOAD_ATTR, "load_attr");
|
||||
err += add_stat_dict(src, stats, LOAD_GLOBAL, "load_global");
|
||||
err += add_stat_dict(src, stats, STORE_SUBSCR, "store_subscr");
|
||||
err += add_stat_dict(src, stats, STORE_ATTR, "store_attr");
|
||||
err += add_stat_dict(src, stats, JUMP_BACKWARD, "jump_backward");
|
||||
err += add_stat_dict(src, stats, CALL, "call");
|
||||
err += add_stat_dict(src, stats, CALL_KW, "call_kw");
|
||||
err += add_stat_dict(src, stats, BINARY_OP, "binary_op");
|
||||
err += add_stat_dict(src, stats, COMPARE_OP, "compare_op");
|
||||
err += add_stat_dict(src, stats, UNPACK_SEQUENCE, "unpack_sequence");
|
||||
err += add_stat_dict(src, stats, FOR_ITER, "for_iter");
|
||||
err += add_stat_dict(src, stats, TO_BOOL, "to_bool");
|
||||
err += add_stat_dict(src, stats, SEND, "send");
|
||||
if (err < 0) {
|
||||
Py_DECREF(stats);
|
||||
return NULL;
|
||||
}
|
||||
return stats;
|
||||
}
|
||||
|
||||
|
||||
#define PRINT_STAT(i, field) \
|
||||
if (stats[i].field) { \
|
||||
fprintf(out, " opcode[%s]." #field " : %" PRIu64 "\n", _PyOpcode_OpName[i], stats[i].field); \
|
||||
}
|
||||
|
||||
static void
|
||||
print_spec_stats(FILE *out, OpcodeStats *stats)
|
||||
{
|
||||
/* Mark some opcodes as specializable for stats,
|
||||
* even though we don't specialize them yet. */
|
||||
fprintf(out, "opcode[BINARY_SLICE].specializable : 1\n");
|
||||
fprintf(out, "opcode[STORE_SLICE].specializable : 1\n");
|
||||
fprintf(out, "opcode[GET_ITER].specializable : 1\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (_PyOpcode_Caches[i]) {
|
||||
/* Ignore jumps as they cannot be specialized */
|
||||
switch (i) {
|
||||
case POP_JUMP_IF_FALSE:
|
||||
case POP_JUMP_IF_TRUE:
|
||||
case POP_JUMP_IF_NONE:
|
||||
case POP_JUMP_IF_NOT_NONE:
|
||||
case JUMP_BACKWARD:
|
||||
break;
|
||||
default:
|
||||
fprintf(out, "opcode[%s].specializable : 1\n", _PyOpcode_OpName[i]);
|
||||
}
|
||||
}
|
||||
PRINT_STAT(i, specialization.success);
|
||||
PRINT_STAT(i, specialization.failure);
|
||||
PRINT_STAT(i, specialization.hit);
|
||||
PRINT_STAT(i, specialization.deferred);
|
||||
PRINT_STAT(i, specialization.miss);
|
||||
PRINT_STAT(i, specialization.deopt);
|
||||
PRINT_STAT(i, execution_count);
|
||||
for (int j = 0; j < SPECIALIZATION_FAILURE_KINDS; j++) {
|
||||
uint64_t val = stats[i].specialization.failure_kinds[j];
|
||||
if (val) {
|
||||
fprintf(out, " opcode[%s].specialization.failure_kinds[%d] : %"
|
||||
PRIu64 "\n", _PyOpcode_OpName[i], j, val);
|
||||
}
|
||||
}
|
||||
for (int j = 0; j < 256; j++) {
|
||||
if (stats[i].pair_count[j]) {
|
||||
fprintf(out, "opcode[%s].pair_count[%s] : %" PRIu64 "\n",
|
||||
_PyOpcode_OpName[i], _PyOpcode_OpName[j], stats[i].pair_count[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef PRINT_STAT
|
||||
|
||||
|
||||
static void
|
||||
print_call_stats(FILE *out, CallStats *stats)
|
||||
{
|
||||
fprintf(out, "Calls to PyEval_EvalDefault: %" PRIu64 "\n", stats->pyeval_calls);
|
||||
fprintf(out, "Calls to Python functions inlined: %" PRIu64 "\n", stats->inlined_py_calls);
|
||||
fprintf(out, "Frames pushed: %" PRIu64 "\n", stats->frames_pushed);
|
||||
fprintf(out, "Frame objects created: %" PRIu64 "\n", stats->frame_objects_created);
|
||||
for (int i = 0; i < EVAL_CALL_KINDS; i++) {
|
||||
fprintf(out, "Calls via PyEval_EvalFrame[%d] : %" PRIu64 "\n", i, stats->eval_calls[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_object_stats(FILE *out, ObjectStats *stats)
|
||||
{
|
||||
fprintf(out, "Object allocations from freelist: %" PRIu64 "\n", stats->from_freelist);
|
||||
fprintf(out, "Object frees to freelist: %" PRIu64 "\n", stats->to_freelist);
|
||||
fprintf(out, "Object allocations: %" PRIu64 "\n", stats->allocations);
|
||||
fprintf(out, "Object allocations to 512 bytes: %" PRIu64 "\n", stats->allocations512);
|
||||
fprintf(out, "Object allocations to 4 kbytes: %" PRIu64 "\n", stats->allocations4k);
|
||||
fprintf(out, "Object allocations over 4 kbytes: %" PRIu64 "\n", stats->allocations_big);
|
||||
fprintf(out, "Object frees: %" PRIu64 "\n", stats->frees);
|
||||
fprintf(out, "Object inline values: %" PRIu64 "\n", stats->inline_values);
|
||||
fprintf(out, "Object interpreter mortal increfs: %" PRIu64 "\n", stats->interpreter_increfs);
|
||||
fprintf(out, "Object interpreter mortal decrefs: %" PRIu64 "\n", stats->interpreter_decrefs);
|
||||
fprintf(out, "Object mortal increfs: %" PRIu64 "\n", stats->increfs);
|
||||
fprintf(out, "Object mortal decrefs: %" PRIu64 "\n", stats->decrefs);
|
||||
fprintf(out, "Object interpreter immortal increfs: %" PRIu64 "\n", stats->interpreter_immortal_increfs);
|
||||
fprintf(out, "Object interpreter immortal decrefs: %" PRIu64 "\n", stats->interpreter_immortal_decrefs);
|
||||
fprintf(out, "Object immortal increfs: %" PRIu64 "\n", stats->immortal_increfs);
|
||||
fprintf(out, "Object immortal decrefs: %" PRIu64 "\n", stats->immortal_decrefs);
|
||||
fprintf(out, "Object materialize dict (on request): %" PRIu64 "\n", stats->dict_materialized_on_request);
|
||||
fprintf(out, "Object materialize dict (new key): %" PRIu64 "\n", stats->dict_materialized_new_key);
|
||||
fprintf(out, "Object materialize dict (too big): %" PRIu64 "\n", stats->dict_materialized_too_big);
|
||||
fprintf(out, "Object materialize dict (str subclass): %" PRIu64 "\n", stats->dict_materialized_str_subclass);
|
||||
fprintf(out, "Object method cache hits: %" PRIu64 "\n", stats->type_cache_hits);
|
||||
fprintf(out, "Object method cache misses: %" PRIu64 "\n", stats->type_cache_misses);
|
||||
fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
|
||||
fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
|
||||
fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
|
||||
}
|
||||
|
||||
static void
|
||||
print_gc_stats(FILE *out, GCStats *stats)
|
||||
{
|
||||
for (int i = 0; i < NUM_GENERATIONS; i++) {
|
||||
fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections);
|
||||
fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits);
|
||||
fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected);
|
||||
fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable);
|
||||
fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _Py_TIER2
|
||||
static void
|
||||
print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
|
||||
{
|
||||
for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
|
||||
fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]);
|
||||
}
|
||||
}
|
||||
|
||||
extern const char *_PyUOpName(int index);
|
||||
|
||||
static void
|
||||
print_optimization_stats(FILE *out, OptimizationStats *stats)
|
||||
{
|
||||
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
|
||||
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
|
||||
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
|
||||
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
|
||||
fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow);
|
||||
fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow);
|
||||
fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long);
|
||||
fprintf(out, "Optimization trace too short: %" PRIu64 "\n", stats->trace_too_short);
|
||||
fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
|
||||
fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);
|
||||
fprintf(out, "Optimization low confidence: %" PRIu64 "\n", stats->low_confidence);
|
||||
fprintf(out, "Optimization unknown callee: %" PRIu64 "\n", stats->unknown_callee);
|
||||
fprintf(out, "Executors invalidated: %" PRIu64 "\n", stats->executors_invalidated);
|
||||
|
||||
print_histogram(out, "Trace length", stats->trace_length_hist);
|
||||
print_histogram(out, "Trace run length", stats->trace_run_length_hist);
|
||||
print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);
|
||||
|
||||
fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts);
|
||||
fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes);
|
||||
fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n",
|
||||
stats->optimizer_failure_reason_no_memory);
|
||||
fprintf(out, "Optimizer remove globals builtins changed: %" PRIu64 "\n", stats->remove_globals_builtins_changed);
|
||||
fprintf(out, "Optimizer remove globals incorrect keys: %" PRIu64 "\n", stats->remove_globals_incorrect_keys);
|
||||
for (int i = 0; i <= MAX_UOP_ID; i++) {
|
||||
if (stats->opcode[i].execution_count) {
|
||||
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count);
|
||||
}
|
||||
if (stats->opcode[i].miss) {
|
||||
fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (stats->unsupported_opcode[i]) {
|
||||
fprintf(
|
||||
out,
|
||||
"unsupported_opcode[%s].count : %" PRIu64 "\n",
|
||||
_PyOpcode_OpName[i],
|
||||
stats->unsupported_opcode[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 1; i <= MAX_UOP_ID; i++){
|
||||
for (int j = 1; j <= MAX_UOP_ID; j++) {
|
||||
if (stats->opcode[i].pair_count[j]) {
|
||||
fprintf(out, "uop[%s].pair_count[%s] : %" PRIu64 "\n",
|
||||
_PyOpcode_uop_name[i], _PyOpcode_uop_name[j], stats->opcode[i].pair_count[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < MAX_UOP_ID; i++) {
|
||||
if (stats->error_in_opcode[i]) {
|
||||
fprintf(
|
||||
out,
|
||||
"error_in_opcode[%s].count : %" PRIu64 "\n",
|
||||
_PyUOpName(i),
|
||||
stats->error_in_opcode[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
fprintf(out, "JIT total memory size: %" PRIu64 "\n", stats->jit_total_memory_size);
|
||||
fprintf(out, "JIT code size: %" PRIu64 "\n", stats->jit_code_size);
|
||||
fprintf(out, "JIT trampoline size: %" PRIu64 "\n", stats->jit_trampoline_size);
|
||||
fprintf(out, "JIT data size: %" PRIu64 "\n", stats->jit_data_size);
|
||||
fprintf(out, "JIT padding size: %" PRIu64 "\n", stats->jit_padding_size);
|
||||
fprintf(out, "JIT freed memory size: %" PRIu64 "\n", stats->jit_freed_memory_size);
|
||||
|
||||
print_histogram(out, "Trace total memory size", stats->trace_total_memory_hist);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
static void
|
||||
print_ft_stats(FILE *out, FTStats *stats)
|
||||
{
|
||||
fprintf(out, "Mutex sleeps (mutex_sleeps): %" PRIu64 "\n", stats->mutex_sleeps);
|
||||
fprintf(out, "QSBR polls (qsbr_polls): %" PRIu64 "\n", stats->qsbr_polls);
|
||||
fprintf(out, "World stops (world_stops): %" PRIu64 "\n", stats->world_stops);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
print_rare_event_stats(FILE *out, RareEventStats *stats)
|
||||
{
|
||||
fprintf(out, "Rare event (set_class): %" PRIu64 "\n", stats->set_class);
|
||||
fprintf(out, "Rare event (set_bases): %" PRIu64 "\n", stats->set_bases);
|
||||
fprintf(out, "Rare event (set_eval_frame_func): %" PRIu64 "\n", stats->set_eval_frame_func);
|
||||
fprintf(out, "Rare event (builtin_dict): %" PRIu64 "\n", stats->builtin_dict);
|
||||
fprintf(out, "Rare event (func_modification): %" PRIu64 "\n", stats->func_modification);
|
||||
fprintf(out, "Rare event (watched_dict_modification): %" PRIu64 "\n", stats->watched_dict_modification);
|
||||
fprintf(out, "Rare event (watched_globals_modification): %" PRIu64 "\n", stats->watched_globals_modification);
|
||||
}
|
||||
|
||||
static void
|
||||
print_stats(FILE *out, PyStats *stats)
|
||||
{
|
||||
print_spec_stats(out, stats->opcode_stats);
|
||||
print_call_stats(out, &stats->call_stats);
|
||||
print_object_stats(out, &stats->object_stats);
|
||||
print_gc_stats(out, stats->gc_stats);
|
||||
#ifdef _Py_TIER2
|
||||
print_optimization_stats(out, &stats->optimization_stats);
|
||||
#endif
|
||||
#ifdef Py_GIL_DISABLED
|
||||
print_ft_stats(out, &stats->ft_stats);
|
||||
#endif
|
||||
print_rare_event_stats(out, &stats->rare_event_stats);
|
||||
}
|
||||
|
||||
#ifdef Py_GIL_DISABLED
|
||||
|
||||
static void
|
||||
merge_specialization_stats(SpecializationStats *dest, const SpecializationStats *src)
|
||||
{
|
||||
dest->success += src->success;
|
||||
dest->failure += src->failure;
|
||||
dest->hit += src->hit;
|
||||
dest->deferred += src->deferred;
|
||||
dest->miss += src->miss;
|
||||
dest->deopt += src->deopt;
|
||||
for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
|
||||
dest->failure_kinds[i] += src->failure_kinds[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
merge_opcode_stats_array(OpcodeStats *dest, const OpcodeStats *src)
|
||||
{
|
||||
for (int i = 0; i < 256; i++) {
|
||||
merge_specialization_stats(&dest[i].specialization, &src[i].specialization);
|
||||
dest[i].execution_count += src[i].execution_count;
|
||||
for (int j = 0; j < 256; j++) {
|
||||
dest[i].pair_count[j] += src[i].pair_count[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
merge_call_stats(CallStats *dest, const CallStats *src)
|
||||
{
|
||||
dest->inlined_py_calls += src->inlined_py_calls;
|
||||
dest->pyeval_calls += src->pyeval_calls;
|
||||
dest->frames_pushed += src->frames_pushed;
|
||||
dest->frame_objects_created += src->frame_objects_created;
|
||||
for (int i = 0; i < EVAL_CALL_KINDS; i++) {
|
||||
dest->eval_calls[i] += src->eval_calls[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
merge_object_stats(ObjectStats *dest, const ObjectStats *src)
|
||||
{
|
||||
dest->increfs += src->increfs;
|
||||
dest->decrefs += src->decrefs;
|
||||
dest->interpreter_increfs += src->interpreter_increfs;
|
||||
dest->interpreter_decrefs += src->interpreter_decrefs;
|
||||
dest->immortal_increfs += src->immortal_increfs;
|
||||
dest->immortal_decrefs += src->immortal_decrefs;
|
||||
dest->interpreter_immortal_increfs += src->interpreter_immortal_increfs;
|
||||
dest->interpreter_immortal_decrefs += src->interpreter_immortal_decrefs;
|
||||
dest->allocations += src->allocations;
|
||||
dest->allocations512 += src->allocations512;
|
||||
dest->allocations4k += src->allocations4k;
|
||||
dest->allocations_big += src->allocations_big;
|
||||
dest->frees += src->frees;
|
||||
dest->to_freelist += src->to_freelist;
|
||||
dest->from_freelist += src->from_freelist;
|
||||
dest->inline_values += src->inline_values;
|
||||
dest->dict_materialized_on_request += src->dict_materialized_on_request;
|
||||
dest->dict_materialized_new_key += src->dict_materialized_new_key;
|
||||
dest->dict_materialized_too_big += src->dict_materialized_too_big;
|
||||
dest->dict_materialized_str_subclass += src->dict_materialized_str_subclass;
|
||||
dest->type_cache_hits += src->type_cache_hits;
|
||||
dest->type_cache_misses += src->type_cache_misses;
|
||||
dest->type_cache_dunder_hits += src->type_cache_dunder_hits;
|
||||
dest->type_cache_dunder_misses += src->type_cache_dunder_misses;
|
||||
dest->type_cache_collisions += src->type_cache_collisions;
|
||||
dest->object_visits += src->object_visits;
|
||||
}
|
||||
|
||||
static void
|
||||
merge_uop_stats_array(UOpStats *dest, const UOpStats *src)
|
||||
{
|
||||
for (int i = 0; i <= PYSTATS_MAX_UOP_ID; i++) {
|
||||
dest[i].execution_count += src[i].execution_count;
|
||||
dest[i].miss += src[i].miss;
|
||||
for (int j = 0; j <= PYSTATS_MAX_UOP_ID; j++) {
|
||||
dest[i].pair_count[j] += src[i].pair_count[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
merge_optimization_stats(OptimizationStats *dest, const OptimizationStats *src)
|
||||
{
|
||||
dest->attempts += src->attempts;
|
||||
dest->traces_created += src->traces_created;
|
||||
dest->traces_executed += src->traces_executed;
|
||||
dest->uops_executed += src->uops_executed;
|
||||
dest->trace_stack_overflow += src->trace_stack_overflow;
|
||||
dest->trace_stack_underflow += src->trace_stack_underflow;
|
||||
dest->trace_too_long += src->trace_too_long;
|
||||
dest->trace_too_short += src->trace_too_short;
|
||||
dest->inner_loop += src->inner_loop;
|
||||
dest->recursive_call += src->recursive_call;
|
||||
dest->low_confidence += src->low_confidence;
|
||||
dest->unknown_callee += src->unknown_callee;
|
||||
dest->executors_invalidated += src->executors_invalidated;
|
||||
dest->optimizer_attempts += src->optimizer_attempts;
|
||||
dest->optimizer_successes += src->optimizer_successes;
|
||||
dest->optimizer_failure_reason_no_memory += src->optimizer_failure_reason_no_memory;
|
||||
dest->remove_globals_builtins_changed += src->remove_globals_builtins_changed;
|
||||
dest->remove_globals_incorrect_keys += src->remove_globals_incorrect_keys;
|
||||
dest->jit_total_memory_size += src->jit_total_memory_size;
|
||||
dest->jit_code_size += src->jit_code_size;
|
||||
dest->jit_trampoline_size += src->jit_trampoline_size;
|
||||
dest->jit_data_size += src->jit_data_size;
|
||||
dest->jit_padding_size += src->jit_padding_size;
|
||||
dest->jit_freed_memory_size += src->jit_freed_memory_size;
|
||||
|
||||
merge_uop_stats_array(dest->opcode, src->opcode);
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
dest->unsupported_opcode[i] += src->unsupported_opcode[i];
|
||||
}
|
||||
for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
|
||||
dest->trace_length_hist[i] += src->trace_length_hist[i];
|
||||
dest->trace_run_length_hist[i] += src->trace_run_length_hist[i];
|
||||
dest->optimized_trace_length_hist[i] += src->optimized_trace_length_hist[i];
|
||||
dest->trace_total_memory_hist[i] += src->trace_total_memory_hist[i];
|
||||
}
|
||||
for (int i = 0; i <= PYSTATS_MAX_UOP_ID; i++) {
|
||||
dest->error_in_opcode[i] += src->error_in_opcode[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
merge_ft_stats(FTStats *dest, const FTStats *src)
|
||||
{
|
||||
dest->mutex_sleeps = src->mutex_sleeps;
|
||||
dest->qsbr_polls = src->qsbr_polls;
|
||||
dest->world_stops = src->world_stops;
|
||||
}
|
||||
|
||||
static void
|
||||
merge_rare_event_stats(RareEventStats *dest, const RareEventStats *src)
|
||||
{
|
||||
dest->set_class += src->set_class;
|
||||
dest->set_bases += src->set_bases;
|
||||
dest->set_eval_frame_func += src->set_eval_frame_func;
|
||||
dest->builtin_dict += src->builtin_dict;
|
||||
dest->func_modification += src->func_modification;
|
||||
dest->watched_dict_modification += src->watched_dict_modification;
|
||||
dest->watched_globals_modification += src->watched_globals_modification;
|
||||
}
|
||||
|
||||
static void
|
||||
merge_gc_stats_array(GCStats *dest, const GCStats *src)
|
||||
{
|
||||
for (int i = 0; i < NUM_GENERATIONS; i++) {
|
||||
dest[i].collections += src[i].collections;
|
||||
dest[i].object_visits += src[i].object_visits;
|
||||
dest[i].objects_collected += src[i].objects_collected;
|
||||
dest[i].objects_transitively_reachable += src[i].objects_transitively_reachable;
|
||||
dest[i].objects_not_transitively_reachable += src[i].objects_not_transitively_reachable;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
stats_zero_thread(_PyThreadStateImpl *tstate)
|
||||
{
|
||||
// Zero the thread local stat counters
|
||||
if (tstate->pystats_struct) {
|
||||
memset(tstate->pystats_struct, 0, sizeof(PyStats));
|
||||
}
|
||||
}
|
||||
|
||||
// merge stats for a single thread into the global structure
|
||||
void
|
||||
stats_merge_thread(_PyThreadStateImpl *tstate)
|
||||
{
|
||||
PyStats *src = tstate->pystats_struct;
|
||||
PyStats *dest = ((PyThreadState *)tstate)->interp->pystats_struct;
|
||||
|
||||
if (src == NULL || dest == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Merge each category of stats using the helper functions.
|
||||
merge_opcode_stats_array(dest->opcode_stats, src->opcode_stats);
|
||||
merge_call_stats(&dest->call_stats, &src->call_stats);
|
||||
merge_object_stats(&dest->object_stats, &src->object_stats);
|
||||
merge_optimization_stats(&dest->optimization_stats, &src->optimization_stats);
|
||||
merge_ft_stats(&dest->ft_stats, &src->ft_stats);
|
||||
merge_rare_event_stats(&dest->rare_event_stats, &src->rare_event_stats);
|
||||
merge_gc_stats_array(dest->gc_stats, src->gc_stats);
|
||||
}
|
||||
#endif // Py_GIL_DISABLED
|
||||
|
||||
// toggle stats collection on or off for all threads
|
||||
static int
|
||||
stats_toggle_on_off(PyThreadState *tstate, int on)
|
||||
{
|
||||
bool changed = false;
|
||||
PyInterpreterState *interp = tstate->interp;
|
||||
STATS_LOCK(interp);
|
||||
if (on && interp->pystats_struct == NULL) {
|
||||
PyStats *s = PyMem_RawCalloc(1, sizeof(PyStats));
|
||||
if (s == NULL) {
|
||||
STATS_UNLOCK(interp);
|
||||
return -1;
|
||||
}
|
||||
FT_ATOMIC_STORE_PTR_RELAXED(interp->pystats_struct, s);
|
||||
}
|
||||
if (tstate->interp->pystats_enabled != on) {
|
||||
FT_ATOMIC_STORE_INT_RELAXED(interp->pystats_enabled, on);
|
||||
changed = true;
|
||||
}
|
||||
STATS_UNLOCK(interp);
|
||||
if (!changed) {
|
||||
return 0;
|
||||
}
|
||||
_PyEval_StopTheWorld(interp);
|
||||
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, ts) {
|
||||
PyStats *s = NULL;
|
||||
if (interp->pystats_enabled) {
|
||||
#ifdef Py_GIL_DISABLED
|
||||
_PyThreadStateImpl *ts_impl = (_PyThreadStateImpl *)ts;
|
||||
if (ts_impl->pystats_struct == NULL) {
|
||||
// first activation for this thread, allocate structure
|
||||
ts_impl->pystats_struct = PyMem_RawCalloc(1, sizeof(PyStats));
|
||||
}
|
||||
s = ts_impl->pystats_struct;
|
||||
#else
|
||||
s = ts->interp->pystats_struct;
|
||||
#endif
|
||||
}
|
||||
ts->pystats = s;
|
||||
}
|
||||
_PyEval_StartTheWorld(interp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// zero stats for all threads and for the interpreter
|
||||
static void
|
||||
stats_zero_all(void)
|
||||
{
|
||||
PyThreadState *tstate = _PyThreadState_GET();
|
||||
if (tstate == NULL) {
|
||||
return;
|
||||
}
|
||||
if (FT_ATOMIC_LOAD_PTR_RELAXED(tstate->interp->pystats_struct) == NULL) {
|
||||
return;
|
||||
}
|
||||
PyInterpreterState *interp = tstate->interp;
|
||||
_PyEval_StopTheWorld(interp);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, ts) {
|
||||
stats_zero_thread((_PyThreadStateImpl *)ts);
|
||||
}
|
||||
#endif
|
||||
if (interp->pystats_struct) {
|
||||
memset(interp->pystats_struct, 0, sizeof(PyStats));
|
||||
}
|
||||
_PyEval_StartTheWorld(interp);
|
||||
}
|
||||
|
||||
// merge stats for all threads into the per-interpreter structure
|
||||
static void
|
||||
stats_merge_all(void)
|
||||
{
|
||||
PyThreadState *tstate = _PyThreadState_GET();
|
||||
if (tstate == NULL) {
|
||||
return;
|
||||
}
|
||||
if (FT_ATOMIC_LOAD_PTR_RELAXED(tstate->interp->pystats_struct) == NULL) {
|
||||
return;
|
||||
}
|
||||
PyInterpreterState *interp = tstate->interp;
|
||||
_PyEval_StopTheWorld(interp);
|
||||
#ifdef Py_GIL_DISABLED
|
||||
_Py_FOR_EACH_TSTATE_UNLOCKED(interp, ts) {
|
||||
stats_merge_thread((_PyThreadStateImpl *)ts);
|
||||
stats_zero_thread((_PyThreadStateImpl *)ts);
|
||||
}
|
||||
#endif
|
||||
_PyEval_StartTheWorld(interp);
|
||||
}
|
||||
|
||||
int
|
||||
_Py_StatsOn(void)
|
||||
{
|
||||
PyThreadState *tstate = _PyThreadState_GET();
|
||||
return stats_toggle_on_off(tstate, 1);
|
||||
}
|
||||
|
||||
void
|
||||
_Py_StatsOff(void)
|
||||
{
|
||||
PyThreadState *tstate = _PyThreadState_GET();
|
||||
stats_toggle_on_off(tstate, 0);
|
||||
}
|
||||
|
||||
void
|
||||
_Py_StatsClear(void)
|
||||
{
|
||||
stats_zero_all();
|
||||
}
|
||||
|
||||
static int
|
||||
mem_is_zero(unsigned char *ptr, size_t size)
|
||||
{
|
||||
for (size_t i=0; i < size; i++) {
|
||||
if (*ptr != 0) {
|
||||
return 0;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
_Py_PrintSpecializationStats(int to_file)
|
||||
{
|
||||
assert(to_file);
|
||||
stats_merge_all();
|
||||
PyThreadState *tstate = _PyThreadState_GET();
|
||||
STATS_LOCK(tstate->interp);
|
||||
PyStats *stats = tstate->interp->pystats_struct;
|
||||
if (stats == NULL) {
|
||||
STATS_UNLOCK(tstate->interp);
|
||||
return 0;
|
||||
}
|
||||
#define MEM_IS_ZERO(DATA) mem_is_zero((unsigned char*)DATA, sizeof(*(DATA)))
|
||||
int is_zero = (
|
||||
MEM_IS_ZERO(stats->gc_stats) // is a pointer
|
||||
&& MEM_IS_ZERO(&stats->opcode_stats)
|
||||
&& MEM_IS_ZERO(&stats->call_stats)
|
||||
&& MEM_IS_ZERO(&stats->object_stats)
|
||||
);
|
||||
#undef MEM_IS_ZERO
|
||||
STATS_UNLOCK(tstate->interp);
|
||||
if (is_zero) {
|
||||
// gh-108753: -X pystats command line was used, but then _stats_off()
|
||||
// and _stats_clear() have been called: in this case, avoid printing
|
||||
// useless "all zeros" statistics.
|
||||
return 0;
|
||||
}
|
||||
|
||||
FILE *out = stderr;
|
||||
if (to_file) {
|
||||
/* Write to a file instead of stderr. */
|
||||
# ifdef MS_WINDOWS
|
||||
const char *dirname = "c:\\temp\\py_stats\\";
|
||||
# else
|
||||
const char *dirname = "/tmp/py_stats/";
|
||||
# endif
|
||||
/* Use random 160 bit number as file name,
|
||||
* to avoid both accidental collisions and
|
||||
* symlink attacks. */
|
||||
unsigned char rand[20];
|
||||
char hex_name[41];
|
||||
_PyOS_URandomNonblock(rand, 20);
|
||||
for (int i = 0; i < 20; i++) {
|
||||
hex_name[2*i] = Py_hexdigits[rand[i]&15];
|
||||
hex_name[2*i+1] = Py_hexdigits[(rand[i]>>4)&15];
|
||||
}
|
||||
hex_name[40] = '\0';
|
||||
char buf[64];
|
||||
assert(strlen(dirname) + 40 + strlen(".txt") < 64);
|
||||
sprintf(buf, "%s%s.txt", dirname, hex_name);
|
||||
FILE *fout = fopen(buf, "w");
|
||||
if (fout) {
|
||||
out = fout;
|
||||
}
|
||||
}
|
||||
else {
|
||||
fprintf(out, "Specialization stats:\n");
|
||||
}
|
||||
STATS_LOCK(tstate->interp);
|
||||
print_stats(out, stats);
|
||||
STATS_UNLOCK(tstate->interp);
|
||||
if (out != stderr) {
|
||||
fclose(out);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
PyStatus
|
||||
_PyStats_InterpInit(PyInterpreterState *interp)
|
||||
{
|
||||
if (interp->config._pystats) {
|
||||
// start with pystats enabled, can be disabled via sys._stats_off()
|
||||
// this needs to be set before the first tstate is created
|
||||
interp->pystats_enabled = 1;
|
||||
interp->pystats_struct = PyMem_RawCalloc(1, sizeof(PyStats));
|
||||
if (interp->pystats_struct == NULL) {
|
||||
return _PyStatus_ERR("out-of-memory while initializing interpreter");
|
||||
}
|
||||
}
|
||||
return _PyStatus_OK();
|
||||
}
|
||||
|
||||
bool
|
||||
_PyStats_ThreadInit(PyInterpreterState *interp, _PyThreadStateImpl *tstate)
|
||||
{
|
||||
#ifdef Py_GIL_DISABLED
|
||||
if (FT_ATOMIC_LOAD_INT_RELAXED(interp->pystats_enabled)) {
|
||||
assert(interp->pystats_struct != NULL);
|
||||
tstate->pystats_struct = PyMem_RawCalloc(1, sizeof(PyStats));
|
||||
if (tstate->pystats_struct == NULL) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
_PyStats_ThreadFini(_PyThreadStateImpl *tstate)
|
||||
{
|
||||
#ifdef Py_GIL_DISABLED
|
||||
STATS_LOCK(((PyThreadState *)tstate)->interp);
|
||||
stats_merge_thread(tstate);
|
||||
STATS_UNLOCK(((PyThreadState *)tstate)->interp);
|
||||
PyMem_RawFree(tstate->pystats_struct);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
_PyStats_Attach(_PyThreadStateImpl *tstate_impl)
|
||||
{
|
||||
PyStats *s;
|
||||
PyThreadState *tstate = (PyThreadState *)tstate_impl;
|
||||
PyInterpreterState *interp = tstate->interp;
|
||||
if (FT_ATOMIC_LOAD_INT_RELAXED(interp->pystats_enabled)) {
|
||||
#ifdef Py_GIL_DISABLED
|
||||
s = ((_PyThreadStateImpl *)tstate)->pystats_struct;
|
||||
#else
|
||||
s = tstate->interp->pystats_struct;
|
||||
#endif
|
||||
}
|
||||
else {
|
||||
s = NULL;
|
||||
}
|
||||
tstate->pystats = s;
|
||||
}
|
||||
|
||||
void
|
||||
_PyStats_Detach(_PyThreadStateImpl *tstate_impl)
|
||||
{
|
||||
((PyThreadState *)tstate_impl)->pystats = NULL;
|
||||
}
|
||||
|
||||
#endif // Py_STATS
|
||||
@@ -36,6 +36,7 @@
|
||||
#include "pycore_pystate.h" // _PyThreadState_GET()
|
||||
#include "pycore_qsbr.h"
|
||||
#include "pycore_tstate.h" // _PyThreadStateImpl
|
||||
#include "pycore_stats.h" // FT_STAT_QSBR_POLL_INC()
|
||||
|
||||
|
||||
// Starting size of the array of qsbr thread states
|
||||
@@ -158,7 +159,7 @@ _Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal)
|
||||
if (_Py_qbsr_goal_reached(qsbr, goal)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
FT_STAT_QSBR_POLL_INC();
|
||||
uint64_t rd_seq = qsbr_poll_scan(qsbr->shared);
|
||||
return QSBR_LEQ(goal, rd_seq);
|
||||
}
|
||||
|
||||
@@ -22,437 +22,23 @@
|
||||
|
||||
#include <stdlib.h> // rand()
|
||||
|
||||
extern const char *_PyUOpName(int index);
|
||||
|
||||
/* For guidance on adding or extending families of instructions see
|
||||
* InternalDocs/interpreter.md `Specialization` section.
|
||||
*/
|
||||
|
||||
#ifdef Py_STATS
|
||||
GCStats _py_gc_stats[NUM_GENERATIONS] = { 0 };
|
||||
static PyStats _Py_stats_struct = { .gc_stats = _py_gc_stats };
|
||||
PyStats *_Py_stats = NULL;
|
||||
|
||||
#if PYSTATS_MAX_UOP_ID < MAX_UOP_ID
|
||||
#error "Not enough space allocated for pystats. Increase PYSTATS_MAX_UOP_ID to at least MAX_UOP_ID"
|
||||
#endif
|
||||
|
||||
#define ADD_STAT_TO_DICT(res, field) \
|
||||
do { \
|
||||
PyObject *val = PyLong_FromUnsignedLongLong(stats->field); \
|
||||
if (val == NULL) { \
|
||||
Py_DECREF(res); \
|
||||
return NULL; \
|
||||
} \
|
||||
if (PyDict_SetItemString(res, #field, val) == -1) { \
|
||||
Py_DECREF(res); \
|
||||
Py_DECREF(val); \
|
||||
return NULL; \
|
||||
} \
|
||||
Py_DECREF(val); \
|
||||
} while(0);
|
||||
|
||||
static PyObject*
|
||||
stats_to_dict(SpecializationStats *stats)
|
||||
{
|
||||
PyObject *res = PyDict_New();
|
||||
if (res == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
ADD_STAT_TO_DICT(res, success);
|
||||
ADD_STAT_TO_DICT(res, failure);
|
||||
ADD_STAT_TO_DICT(res, hit);
|
||||
ADD_STAT_TO_DICT(res, deferred);
|
||||
ADD_STAT_TO_DICT(res, miss);
|
||||
ADD_STAT_TO_DICT(res, deopt);
|
||||
PyObject *failure_kinds = PyTuple_New(SPECIALIZATION_FAILURE_KINDS);
|
||||
if (failure_kinds == NULL) {
|
||||
Py_DECREF(res);
|
||||
return NULL;
|
||||
}
|
||||
for (int i = 0; i < SPECIALIZATION_FAILURE_KINDS; i++) {
|
||||
PyObject *stat = PyLong_FromUnsignedLongLong(stats->failure_kinds[i]);
|
||||
if (stat == NULL) {
|
||||
Py_DECREF(res);
|
||||
Py_DECREF(failure_kinds);
|
||||
return NULL;
|
||||
}
|
||||
PyTuple_SET_ITEM(failure_kinds, i, stat);
|
||||
}
|
||||
if (PyDict_SetItemString(res, "failure_kinds", failure_kinds)) {
|
||||
Py_DECREF(res);
|
||||
Py_DECREF(failure_kinds);
|
||||
return NULL;
|
||||
}
|
||||
Py_DECREF(failure_kinds);
|
||||
return res;
|
||||
}
|
||||
#undef ADD_STAT_TO_DICT
|
||||
|
||||
static int
|
||||
add_stat_dict(
|
||||
PyObject *res,
|
||||
int opcode,
|
||||
const char *name) {
|
||||
|
||||
SpecializationStats *stats = &_Py_stats_struct.opcode_stats[opcode].specialization;
|
||||
PyObject *d = stats_to_dict(stats);
|
||||
if (d == NULL) {
|
||||
return -1;
|
||||
}
|
||||
int err = PyDict_SetItemString(res, name, d);
|
||||
Py_DECREF(d);
|
||||
return err;
|
||||
}
|
||||
|
||||
PyObject*
|
||||
_Py_GetSpecializationStats(void) {
|
||||
PyObject *stats = PyDict_New();
|
||||
if (stats == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
int err = 0;
|
||||
err += add_stat_dict(stats, CONTAINS_OP, "contains_op");
|
||||
err += add_stat_dict(stats, LOAD_SUPER_ATTR, "load_super_attr");
|
||||
err += add_stat_dict(stats, LOAD_ATTR, "load_attr");
|
||||
err += add_stat_dict(stats, LOAD_GLOBAL, "load_global");
|
||||
err += add_stat_dict(stats, STORE_SUBSCR, "store_subscr");
|
||||
err += add_stat_dict(stats, STORE_ATTR, "store_attr");
|
||||
err += add_stat_dict(stats, JUMP_BACKWARD, "jump_backward");
|
||||
err += add_stat_dict(stats, CALL, "call");
|
||||
err += add_stat_dict(stats, CALL_KW, "call_kw");
|
||||
err += add_stat_dict(stats, BINARY_OP, "binary_op");
|
||||
err += add_stat_dict(stats, COMPARE_OP, "compare_op");
|
||||
err += add_stat_dict(stats, UNPACK_SEQUENCE, "unpack_sequence");
|
||||
err += add_stat_dict(stats, FOR_ITER, "for_iter");
|
||||
err += add_stat_dict(stats, TO_BOOL, "to_bool");
|
||||
err += add_stat_dict(stats, SEND, "send");
|
||||
if (err < 0) {
|
||||
Py_DECREF(stats);
|
||||
return NULL;
|
||||
}
|
||||
return stats;
|
||||
}
|
||||
|
||||
|
||||
#define PRINT_STAT(i, field) \
|
||||
if (stats[i].field) { \
|
||||
fprintf(out, " opcode[%s]." #field " : %" PRIu64 "\n", _PyOpcode_OpName[i], stats[i].field); \
|
||||
}
|
||||
|
||||
static void
|
||||
print_spec_stats(FILE *out, OpcodeStats *stats)
|
||||
{
|
||||
/* Mark some opcodes as specializable for stats,
|
||||
* even though we don't specialize them yet. */
|
||||
fprintf(out, "opcode[BINARY_SLICE].specializable : 1\n");
|
||||
fprintf(out, "opcode[STORE_SLICE].specializable : 1\n");
|
||||
fprintf(out, "opcode[GET_ITER].specializable : 1\n");
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (_PyOpcode_Caches[i]) {
|
||||
/* Ignore jumps as they cannot be specialized */
|
||||
switch (i) {
|
||||
case POP_JUMP_IF_FALSE:
|
||||
case POP_JUMP_IF_TRUE:
|
||||
case POP_JUMP_IF_NONE:
|
||||
case POP_JUMP_IF_NOT_NONE:
|
||||
case JUMP_BACKWARD:
|
||||
break;
|
||||
default:
|
||||
fprintf(out, "opcode[%s].specializable : 1\n", _PyOpcode_OpName[i]);
|
||||
}
|
||||
}
|
||||
PRINT_STAT(i, specialization.success);
|
||||
PRINT_STAT(i, specialization.failure);
|
||||
PRINT_STAT(i, specialization.hit);
|
||||
PRINT_STAT(i, specialization.deferred);
|
||||
PRINT_STAT(i, specialization.miss);
|
||||
PRINT_STAT(i, specialization.deopt);
|
||||
PRINT_STAT(i, execution_count);
|
||||
for (int j = 0; j < SPECIALIZATION_FAILURE_KINDS; j++) {
|
||||
uint64_t val = stats[i].specialization.failure_kinds[j];
|
||||
if (val) {
|
||||
fprintf(out, " opcode[%s].specialization.failure_kinds[%d] : %"
|
||||
PRIu64 "\n", _PyOpcode_OpName[i], j, val);
|
||||
}
|
||||
}
|
||||
for (int j = 0; j < 256; j++) {
|
||||
if (stats[i].pair_count[j]) {
|
||||
fprintf(out, "opcode[%s].pair_count[%s] : %" PRIu64 "\n",
|
||||
_PyOpcode_OpName[i], _PyOpcode_OpName[j], stats[i].pair_count[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#undef PRINT_STAT
|
||||
|
||||
|
||||
static void
|
||||
print_call_stats(FILE *out, CallStats *stats)
|
||||
{
|
||||
fprintf(out, "Calls to PyEval_EvalDefault: %" PRIu64 "\n", stats->pyeval_calls);
|
||||
fprintf(out, "Calls to Python functions inlined: %" PRIu64 "\n", stats->inlined_py_calls);
|
||||
fprintf(out, "Frames pushed: %" PRIu64 "\n", stats->frames_pushed);
|
||||
fprintf(out, "Frame objects created: %" PRIu64 "\n", stats->frame_objects_created);
|
||||
for (int i = 0; i < EVAL_CALL_KINDS; i++) {
|
||||
fprintf(out, "Calls via PyEval_EvalFrame[%d] : %" PRIu64 "\n", i, stats->eval_calls[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_object_stats(FILE *out, ObjectStats *stats)
|
||||
{
|
||||
fprintf(out, "Object allocations from freelist: %" PRIu64 "\n", stats->from_freelist);
|
||||
fprintf(out, "Object frees to freelist: %" PRIu64 "\n", stats->to_freelist);
|
||||
fprintf(out, "Object allocations: %" PRIu64 "\n", stats->allocations);
|
||||
fprintf(out, "Object allocations to 512 bytes: %" PRIu64 "\n", stats->allocations512);
|
||||
fprintf(out, "Object allocations to 4 kbytes: %" PRIu64 "\n", stats->allocations4k);
|
||||
fprintf(out, "Object allocations over 4 kbytes: %" PRIu64 "\n", stats->allocations_big);
|
||||
fprintf(out, "Object frees: %" PRIu64 "\n", stats->frees);
|
||||
fprintf(out, "Object inline values: %" PRIu64 "\n", stats->inline_values);
|
||||
fprintf(out, "Object interpreter mortal increfs: %" PRIu64 "\n", stats->interpreter_increfs);
|
||||
fprintf(out, "Object interpreter mortal decrefs: %" PRIu64 "\n", stats->interpreter_decrefs);
|
||||
fprintf(out, "Object mortal increfs: %" PRIu64 "\n", stats->increfs);
|
||||
fprintf(out, "Object mortal decrefs: %" PRIu64 "\n", stats->decrefs);
|
||||
fprintf(out, "Object interpreter immortal increfs: %" PRIu64 "\n", stats->interpreter_immortal_increfs);
|
||||
fprintf(out, "Object interpreter immortal decrefs: %" PRIu64 "\n", stats->interpreter_immortal_decrefs);
|
||||
fprintf(out, "Object immortal increfs: %" PRIu64 "\n", stats->immortal_increfs);
|
||||
fprintf(out, "Object immortal decrefs: %" PRIu64 "\n", stats->immortal_decrefs);
|
||||
fprintf(out, "Object materialize dict (on request): %" PRIu64 "\n", stats->dict_materialized_on_request);
|
||||
fprintf(out, "Object materialize dict (new key): %" PRIu64 "\n", stats->dict_materialized_new_key);
|
||||
fprintf(out, "Object materialize dict (too big): %" PRIu64 "\n", stats->dict_materialized_too_big);
|
||||
fprintf(out, "Object materialize dict (str subclass): %" PRIu64 "\n", stats->dict_materialized_str_subclass);
|
||||
fprintf(out, "Object method cache hits: %" PRIu64 "\n", stats->type_cache_hits);
|
||||
fprintf(out, "Object method cache misses: %" PRIu64 "\n", stats->type_cache_misses);
|
||||
fprintf(out, "Object method cache collisions: %" PRIu64 "\n", stats->type_cache_collisions);
|
||||
fprintf(out, "Object method cache dunder hits: %" PRIu64 "\n", stats->type_cache_dunder_hits);
|
||||
fprintf(out, "Object method cache dunder misses: %" PRIu64 "\n", stats->type_cache_dunder_misses);
|
||||
}
|
||||
|
||||
static void
|
||||
print_gc_stats(FILE *out, GCStats *stats)
|
||||
{
|
||||
for (int i = 0; i < NUM_GENERATIONS; i++) {
|
||||
fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections);
|
||||
fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits);
|
||||
fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected);
|
||||
fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable);
|
||||
fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _Py_TIER2
|
||||
static void
|
||||
print_histogram(FILE *out, const char *name, uint64_t hist[_Py_UOP_HIST_SIZE])
|
||||
{
|
||||
for (int i = 0; i < _Py_UOP_HIST_SIZE; i++) {
|
||||
fprintf(out, "%s[%" PRIu64"]: %" PRIu64 "\n", name, (uint64_t)1 << i, hist[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_optimization_stats(FILE *out, OptimizationStats *stats)
|
||||
{
|
||||
fprintf(out, "Optimization attempts: %" PRIu64 "\n", stats->attempts);
|
||||
fprintf(out, "Optimization traces created: %" PRIu64 "\n", stats->traces_created);
|
||||
fprintf(out, "Optimization traces executed: %" PRIu64 "\n", stats->traces_executed);
|
||||
fprintf(out, "Optimization uops executed: %" PRIu64 "\n", stats->uops_executed);
|
||||
fprintf(out, "Optimization trace stack overflow: %" PRIu64 "\n", stats->trace_stack_overflow);
|
||||
fprintf(out, "Optimization trace stack underflow: %" PRIu64 "\n", stats->trace_stack_underflow);
|
||||
fprintf(out, "Optimization trace too long: %" PRIu64 "\n", stats->trace_too_long);
|
||||
fprintf(out, "Optimization trace too short: %" PRIu64 "\n", stats->trace_too_short);
|
||||
fprintf(out, "Optimization inner loop: %" PRIu64 "\n", stats->inner_loop);
|
||||
fprintf(out, "Optimization recursive call: %" PRIu64 "\n", stats->recursive_call);
|
||||
fprintf(out, "Optimization low confidence: %" PRIu64 "\n", stats->low_confidence);
|
||||
fprintf(out, "Optimization unknown callee: %" PRIu64 "\n", stats->unknown_callee);
|
||||
fprintf(out, "Executors invalidated: %" PRIu64 "\n", stats->executors_invalidated);
|
||||
|
||||
print_histogram(out, "Trace length", stats->trace_length_hist);
|
||||
print_histogram(out, "Trace run length", stats->trace_run_length_hist);
|
||||
print_histogram(out, "Optimized trace length", stats->optimized_trace_length_hist);
|
||||
|
||||
fprintf(out, "Optimization optimizer attempts: %" PRIu64 "\n", stats->optimizer_attempts);
|
||||
fprintf(out, "Optimization optimizer successes: %" PRIu64 "\n", stats->optimizer_successes);
|
||||
fprintf(out, "Optimization optimizer failure no memory: %" PRIu64 "\n",
|
||||
stats->optimizer_failure_reason_no_memory);
|
||||
fprintf(out, "Optimizer remove globals builtins changed: %" PRIu64 "\n", stats->remove_globals_builtins_changed);
|
||||
fprintf(out, "Optimizer remove globals incorrect keys: %" PRIu64 "\n", stats->remove_globals_incorrect_keys);
|
||||
for (int i = 0; i <= MAX_UOP_ID; i++) {
|
||||
if (stats->opcode[i].execution_count) {
|
||||
fprintf(out, "uops[%s].execution_count : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].execution_count);
|
||||
}
|
||||
if (stats->opcode[i].miss) {
|
||||
fprintf(out, "uops[%s].specialization.miss : %" PRIu64 "\n", _PyUOpName(i), stats->opcode[i].miss);
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 256; i++) {
|
||||
if (stats->unsupported_opcode[i]) {
|
||||
fprintf(
|
||||
out,
|
||||
"unsupported_opcode[%s].count : %" PRIu64 "\n",
|
||||
_PyOpcode_OpName[i],
|
||||
stats->unsupported_opcode[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 1; i <= MAX_UOP_ID; i++){
|
||||
for (int j = 1; j <= MAX_UOP_ID; j++) {
|
||||
if (stats->opcode[i].pair_count[j]) {
|
||||
fprintf(out, "uop[%s].pair_count[%s] : %" PRIu64 "\n",
|
||||
_PyOpcode_uop_name[i], _PyOpcode_uop_name[j], stats->opcode[i].pair_count[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < MAX_UOP_ID; i++) {
|
||||
if (stats->error_in_opcode[i]) {
|
||||
fprintf(
|
||||
out,
|
||||
"error_in_opcode[%s].count : %" PRIu64 "\n",
|
||||
_PyUOpName(i),
|
||||
stats->error_in_opcode[i]
|
||||
);
|
||||
}
|
||||
}
|
||||
fprintf(out, "JIT total memory size: %" PRIu64 "\n", stats->jit_total_memory_size);
|
||||
fprintf(out, "JIT code size: %" PRIu64 "\n", stats->jit_code_size);
|
||||
fprintf(out, "JIT trampoline size: %" PRIu64 "\n", stats->jit_trampoline_size);
|
||||
fprintf(out, "JIT data size: %" PRIu64 "\n", stats->jit_data_size);
|
||||
fprintf(out, "JIT padding size: %" PRIu64 "\n", stats->jit_padding_size);
|
||||
fprintf(out, "JIT freed memory size: %" PRIu64 "\n", stats->jit_freed_memory_size);
|
||||
|
||||
print_histogram(out, "Trace total memory size", stats->trace_total_memory_hist);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
print_rare_event_stats(FILE *out, RareEventStats *stats)
|
||||
{
|
||||
fprintf(out, "Rare event (set_class): %" PRIu64 "\n", stats->set_class);
|
||||
fprintf(out, "Rare event (set_bases): %" PRIu64 "\n", stats->set_bases);
|
||||
fprintf(out, "Rare event (set_eval_frame_func): %" PRIu64 "\n", stats->set_eval_frame_func);
|
||||
fprintf(out, "Rare event (builtin_dict): %" PRIu64 "\n", stats->builtin_dict);
|
||||
fprintf(out, "Rare event (func_modification): %" PRIu64 "\n", stats->func_modification);
|
||||
fprintf(out, "Rare event (watched_dict_modification): %" PRIu64 "\n", stats->watched_dict_modification);
|
||||
fprintf(out, "Rare event (watched_globals_modification): %" PRIu64 "\n", stats->watched_globals_modification);
|
||||
}
|
||||
|
||||
static void
|
||||
print_stats(FILE *out, PyStats *stats)
|
||||
{
|
||||
print_spec_stats(out, stats->opcode_stats);
|
||||
print_call_stats(out, &stats->call_stats);
|
||||
print_object_stats(out, &stats->object_stats);
|
||||
print_gc_stats(out, stats->gc_stats);
|
||||
#ifdef _Py_TIER2
|
||||
print_optimization_stats(out, &stats->optimization_stats);
|
||||
#endif
|
||||
print_rare_event_stats(out, &stats->rare_event_stats);
|
||||
}
|
||||
|
||||
void
|
||||
_Py_StatsOn(void)
|
||||
{
|
||||
_Py_stats = &_Py_stats_struct;
|
||||
}
|
||||
|
||||
void
|
||||
_Py_StatsOff(void)
|
||||
{
|
||||
_Py_stats = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
_Py_StatsClear(void)
|
||||
{
|
||||
memset(&_py_gc_stats, 0, sizeof(_py_gc_stats));
|
||||
memset(&_Py_stats_struct, 0, sizeof(_Py_stats_struct));
|
||||
_Py_stats_struct.gc_stats = _py_gc_stats;
|
||||
}
|
||||
|
||||
static int
|
||||
mem_is_zero(unsigned char *ptr, size_t size)
|
||||
{
|
||||
for (size_t i=0; i < size; i++) {
|
||||
if (*ptr != 0) {
|
||||
return 0;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
int
|
||||
_Py_PrintSpecializationStats(int to_file)
|
||||
{
|
||||
PyStats *stats = &_Py_stats_struct;
|
||||
#define MEM_IS_ZERO(DATA) mem_is_zero((unsigned char*)DATA, sizeof(*(DATA)))
|
||||
int is_zero = (
|
||||
MEM_IS_ZERO(stats->gc_stats) // is a pointer
|
||||
&& MEM_IS_ZERO(&stats->opcode_stats)
|
||||
&& MEM_IS_ZERO(&stats->call_stats)
|
||||
&& MEM_IS_ZERO(&stats->object_stats)
|
||||
);
|
||||
#undef MEM_IS_ZERO
|
||||
if (is_zero) {
|
||||
// gh-108753: -X pystats command line was used, but then _stats_off()
|
||||
// and _stats_clear() have been called: in this case, avoid printing
|
||||
// useless "all zeros" statistics.
|
||||
return 0;
|
||||
}
|
||||
|
||||
FILE *out = stderr;
|
||||
if (to_file) {
|
||||
/* Write to a file instead of stderr. */
|
||||
# ifdef MS_WINDOWS
|
||||
const char *dirname = "c:\\temp\\py_stats\\";
|
||||
# else
|
||||
const char *dirname = "/tmp/py_stats/";
|
||||
# endif
|
||||
/* Use random 160 bit number as file name,
|
||||
* to avoid both accidental collisions and
|
||||
* symlink attacks. */
|
||||
unsigned char rand[20];
|
||||
char hex_name[41];
|
||||
_PyOS_URandomNonblock(rand, 20);
|
||||
for (int i = 0; i < 20; i++) {
|
||||
hex_name[2*i] = Py_hexdigits[rand[i]&15];
|
||||
hex_name[2*i+1] = Py_hexdigits[(rand[i]>>4)&15];
|
||||
}
|
||||
hex_name[40] = '\0';
|
||||
char buf[64];
|
||||
assert(strlen(dirname) + 40 + strlen(".txt") < 64);
|
||||
sprintf(buf, "%s%s.txt", dirname, hex_name);
|
||||
FILE *fout = fopen(buf, "w");
|
||||
if (fout) {
|
||||
out = fout;
|
||||
}
|
||||
}
|
||||
else {
|
||||
fprintf(out, "Specialization stats:\n");
|
||||
}
|
||||
print_stats(out, stats);
|
||||
if (out != stderr) {
|
||||
fclose(out);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
#if Py_STATS
|
||||
#define SPECIALIZATION_FAIL(opcode, kind) \
|
||||
do { \
|
||||
if (_Py_stats) { \
|
||||
PyStats *s = _PyStats_GET(); \
|
||||
if (s) { \
|
||||
int _kind = (kind); \
|
||||
assert(_kind < SPECIALIZATION_FAILURE_KINDS); \
|
||||
_Py_stats->opcode_stats[opcode].specialization.failure_kinds[_kind]++; \
|
||||
s->opcode_stats[opcode].specialization.failure_kinds[_kind]++; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#endif // Py_STATS
|
||||
|
||||
|
||||
#ifndef SPECIALIZATION_FAIL
|
||||
#else
|
||||
# define SPECIALIZATION_FAIL(opcode, kind) ((void)0)
|
||||
#endif
|
||||
#endif // Py_STATS
|
||||
|
||||
// Initialize warmup counters and optimize instructions. This cannot fail.
|
||||
void
|
||||
|
||||
@@ -2281,7 +2281,9 @@ static PyObject *
|
||||
sys__stats_on_impl(PyObject *module)
|
||||
/*[clinic end generated code: output=aca53eafcbb4d9fe input=43b5bfe145299e55]*/
|
||||
{
|
||||
_Py_StatsOn();
|
||||
if (_Py_StatsOn() < 0) {
|
||||
return NULL;
|
||||
}
|
||||
Py_RETURN_NONE;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user