diff --git a/Doc/library/profile.rst b/Doc/library/profile.rst index faf8079db3ddd8..d56c5285e8b71e 100644 --- a/Doc/library/profile.rst +++ b/Doc/library/profile.rst @@ -265,6 +265,14 @@ Profile with real-time sampling statistics:: Sample all threads in the process instead of just the main thread +.. option:: --no-native + + Don't include artificial ```` frames to denote calls to non-Python code. + +.. option:: --no-gc + + Don't include artificial ```` frames to denote active garbage collection. + .. option:: --realtime-stats Print real-time sampling statistics during profiling @@ -349,7 +357,7 @@ This section documents the programmatic interface for the :mod:`!profiling.sampl For command-line usage, see :ref:`sampling-profiler-cli`. For conceptual information about statistical profiling, see :ref:`statistical-profiling` -.. function:: sample(pid, *, sort=2, sample_interval_usec=100, duration_sec=10, filename=None, all_threads=False, limit=None, show_summary=True, output_format="pstats", realtime_stats=False) +.. function:: sample(pid, *, sort=2, sample_interval_usec=100, duration_sec=10, filename=None, all_threads=False, limit=None, show_summary=True, output_format="pstats", realtime_stats=False, native=True, gc=True) Sample a Python process and generate profiling data. @@ -367,6 +375,8 @@ about statistical profiling, see :ref:`statistical-profiling` :param bool show_summary: Whether to show summary statistics (default: True) :param str output_format: Output format - 'pstats' or 'collapsed' (default: 'pstats') :param bool realtime_stats: Whether to display real-time statistics (default: False) + :param bool native: Whether to include ```` frames (default: True) + :param bool gc: Whether to include ```` frames (default: True) :raises ValueError: If output_format is not 'pstats' or 'collapsed' diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index 8e7cd16acffa48..205baf3ea504e6 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -210,6 +210,7 @@ typedef struct _Py_DebugOffsets { struct _gc { uint64_t size; uint64_t collecting; + uint64_t frame; } gc; // Generator object offset; @@ -351,6 +352,7 @@ typedef struct _Py_DebugOffsets { .gc = { \ .size = sizeof(struct _gc_runtime_state), \ .collecting = offsetof(struct _gc_runtime_state, collecting), \ + .frame = offsetof(struct _gc_runtime_state, frame), \ }, \ .gen_object = { \ .size = sizeof(PyGenObject), \ diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 92ded14891a101..ecef4364cc32df 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1326,10 +1326,12 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dot_locals)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(empty)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(format)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(gc)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(generic_base)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(json_decoder)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(kwdefaults)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(list_err)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(native)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(str_replace_inf)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(type_params)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(utf_8)); @@ -1763,6 +1765,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fullerror)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(func)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(future)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(gc)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(generation)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get_debug)); @@ -1906,6 +1909,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name_from)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespace_separator)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespaces)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(native)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ndigits)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nested)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(new_file_name)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index cd21b0847b7cdd..4dd73291df4513 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -46,10 +46,12 @@ struct _Py_global_strings { STRUCT_FOR_STR(dot_locals, ".") STRUCT_FOR_STR(empty, "") STRUCT_FOR_STR(format, ".format") + STRUCT_FOR_STR(gc, "") STRUCT_FOR_STR(generic_base, ".generic_base") STRUCT_FOR_STR(json_decoder, "json.decoder") STRUCT_FOR_STR(kwdefaults, ".kwdefaults") STRUCT_FOR_STR(list_err, "list index out of range") + STRUCT_FOR_STR(native, "") STRUCT_FOR_STR(str_replace_inf, "1e309") STRUCT_FOR_STR(type_params, ".type_params") STRUCT_FOR_STR(utf_8, "utf-8") @@ -486,6 +488,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(fullerror) STRUCT_FOR_ID(func) STRUCT_FOR_ID(future) + STRUCT_FOR_ID(gc) STRUCT_FOR_ID(generation) STRUCT_FOR_ID(get) STRUCT_FOR_ID(get_debug) @@ -629,6 +632,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(name_from) STRUCT_FOR_ID(namespace_separator) STRUCT_FOR_ID(namespaces) + STRUCT_FOR_ID(native) STRUCT_FOR_ID(ndigits) STRUCT_FOR_ID(nested) STRUCT_FOR_ID(new_file_name) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index e8cbe9d894e1c7..b575a7b3873214 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -214,6 +214,9 @@ struct _gc_runtime_state { struct gc_generation_stats generation_stats[NUM_GENERATIONS]; /* true if we are currently running the collector */ int collecting; + // The frame that started the current collection. It might be NULL even when + // collecting (if no Python frame is running): + _PyInterpreterFrame *frame; /* list of uncollectable objects */ PyObject *garbage; /* a list of callbacks to be invoked when collection is performed */ diff --git a/Include/internal/pycore_interpframe_structs.h b/Include/internal/pycore_interpframe_structs.h index 835b8e58194863..38510685f4093c 100644 --- a/Include/internal/pycore_interpframe_structs.h +++ b/Include/internal/pycore_interpframe_structs.h @@ -24,7 +24,6 @@ enum _frameowner { FRAME_OWNED_BY_GENERATOR = 1, FRAME_OWNED_BY_FRAME_OBJECT = 2, FRAME_OWNED_BY_INTERPRETER = 3, - FRAME_OWNED_BY_CSTACK = 4, }; struct _PyInterpreterFrame { diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 50d82d0a365037..08f8d0e59d12e6 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1321,10 +1321,12 @@ extern "C" { INIT_STR(dot_locals, "."), \ INIT_STR(empty, ""), \ INIT_STR(format, ".format"), \ + INIT_STR(gc, ""), \ INIT_STR(generic_base, ".generic_base"), \ INIT_STR(json_decoder, "json.decoder"), \ INIT_STR(kwdefaults, ".kwdefaults"), \ INIT_STR(list_err, "list index out of range"), \ + INIT_STR(native, ""), \ INIT_STR(str_replace_inf, "1e309"), \ INIT_STR(type_params, ".type_params"), \ INIT_STR(utf_8, "utf-8"), \ @@ -1761,6 +1763,7 @@ extern "C" { INIT_ID(fullerror), \ INIT_ID(func), \ INIT_ID(future), \ + INIT_ID(gc), \ INIT_ID(generation), \ INIT_ID(get), \ INIT_ID(get_debug), \ @@ -1904,6 +1907,7 @@ extern "C" { INIT_ID(name_from), \ INIT_ID(namespace_separator), \ INIT_ID(namespaces), \ + INIT_ID(native), \ INIT_ID(ndigits), \ INIT_ID(nested), \ INIT_ID(new_file_name), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index b4d920154b6e83..b1e57126b92d26 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -1732,6 +1732,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(gc); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(generation); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -2304,6 +2308,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(native); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(ndigits); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -3236,6 +3244,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(gc); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_STR(anon_null); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); @@ -3260,6 +3272,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(native); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_STR(anon_setcomp); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/profiling/sampling/sample.py b/Lib/profiling/sampling/sample.py index 7a0f739a5428c6..5f2096e9e1b405 100644 --- a/Lib/profiling/sampling/sample.py +++ b/Lib/profiling/sampling/sample.py @@ -136,18 +136,18 @@ def _run_with_sync(original_cmd): class SampleProfiler: - def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL): + def __init__(self, pid, sample_interval_usec, all_threads, *, mode=PROFILING_MODE_WALL, native=True, gc=True): self.pid = pid self.sample_interval_usec = sample_interval_usec self.all_threads = all_threads if _FREE_THREADED_BUILD: self.unwinder = _remote_debugging.RemoteUnwinder( - self.pid, all_threads=self.all_threads, mode=mode + self.pid, all_threads=self.all_threads, mode=mode, native=native, gc=gc ) else: only_active_threads = bool(self.all_threads) self.unwinder = _remote_debugging.RemoteUnwinder( - self.pid, only_active_thread=only_active_threads, mode=mode + self.pid, only_active_thread=only_active_threads, mode=mode, native=native, gc=gc ) # Track sample intervals and total sample count self.sample_intervals = deque(maxlen=100) @@ -613,9 +613,11 @@ def sample( output_format="pstats", realtime_stats=False, mode=PROFILING_MODE_WALL, + native=True, + gc=True, ): profiler = SampleProfiler( - pid, sample_interval_usec, all_threads=all_threads, mode=mode + pid, sample_interval_usec, all_threads=all_threads, mode=mode, native=native, gc=gc ) profiler.realtime_stats = realtime_stats @@ -706,6 +708,8 @@ def wait_for_process_and_sample(pid, sort_value, args): output_format=args.format, realtime_stats=args.realtime_stats, mode=mode, + native=args.native, + gc=args.gc, ) @@ -756,9 +760,20 @@ def main(): sampling_group.add_argument( "--realtime-stats", action="store_true", - default=False, help="Print real-time sampling statistics (Hz, mean, min, max, stdev) during profiling", ) + sampling_group.add_argument( + "--no-native", + action="store_false", + dest="native", + help="Don't include artificial \"\" frames to denote calls to non-Python code.", + ) + sampling_group.add_argument( + "--no-gc", + action="store_false", + dest="gc", + help="Don't include artificial \"\" frames to denote active garbage collection.", + ) # Mode options mode_group = parser.add_argument_group("Mode options") @@ -915,6 +930,8 @@ def main(): output_format=args.format, realtime_stats=args.realtime_stats, mode=mode, + native=args.native, + gc=args.gc, ) elif args.module or args.args: if args.module: diff --git a/Lib/profiling/sampling/stack_collector.py b/Lib/profiling/sampling/stack_collector.py index bc38151e067989..ccc8185d3f892f 100644 --- a/Lib/profiling/sampling/stack_collector.py +++ b/Lib/profiling/sampling/stack_collector.py @@ -36,10 +36,16 @@ def process_frames(self, frames, thread_id): def export(self, filename): lines = [] for (call_tree, thread_id), count in self.stack_counter.items(): - stack_str = ";".join( - f"{os.path.basename(f[0])}:{f[2]}:{f[1]}" for f in call_tree - ) - lines.append((f"tid:{thread_id};{stack_str}", count)) + parts = [f"tid:{thread_id}"] + for file, line, func in call_tree: + # This is what pstats does for "special" frames: + if file == "~" and line == 0: + part = func + else: + part = f"{os.path.basename(file)}:{func}:{line}" + parts.append(part) + stack_str = ";".join(parts) + lines.append((stack_str, count)) lines.sort(key=lambda x: (-x[1], x[0])) diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index 01720457e61f5c..f4108f35996c5c 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -153,6 +153,8 @@ def foo(): FrameInfo([script_name, 12, "baz"]), FrameInfo([script_name, 9, "bar"]), FrameInfo([threading.__file__, ANY, "Thread.run"]), + FrameInfo([threading.__file__, ANY, "Thread._bootstrap_inner"]), + FrameInfo([threading.__file__, ANY, "Thread._bootstrap"]), ] # Is possible that there are more threads, so we check that the # expected stack traces are in the result (looking at you Windows!) diff --git a/Lib/test/test_profiling/test_sampling_profiler.py b/Lib/test/test_profiling/test_sampling_profiler.py index 5b924cb24531b6..31c3b86bd00882 100644 --- a/Lib/test/test_profiling/test_sampling_profiler.py +++ b/Lib/test/test_profiling/test_sampling_profiler.py @@ -124,7 +124,7 @@ def test_subprocess(script): server_socket.close() response = client_socket.recv(1024) if response != b"ready": - raise RuntimeError(f"Unexpected response from subprocess: {response}") + raise RuntimeError(f"Unexpected response from subprocess: {response!r}") yield SubprocessInfo(proc, client_socket) finally: @@ -1689,14 +1689,15 @@ class TestSampleProfilerIntegration(unittest.TestCase): @classmethod def setUpClass(cls): cls.test_script = ''' -import time -import os +import operator +import gc def slow_fibonacci(n): """Recursive fibonacci - should show up prominently in profiler.""" if n <= 1: return n - return slow_fibonacci(n-1) + slow_fibonacci(n-2) + # Use operator.call(...) to force native frames between interpreter frames: + return operator.call(slow_fibonacci, n-1) + slow_fibonacci(n-2) def cpu_intensive_work(): """CPU intensive work that should show in profiler.""" @@ -1707,46 +1708,32 @@ def cpu_intensive_work(): result = result % 1000000 return result -def medium_computation(): - """Medium complexity function.""" - result = 0 - for i in range(100): - result += i * i - return result - -def fast_loop(): - """Fast simple loop.""" - total = 0 - for i in range(50): - total += i - return total - def nested_calls(): """Test nested function calls.""" def level1(): def level2(): - return medium_computation() + return cpu_intensive_work() return level2() return level1() +class ExpensiveGarbage: + def __init__(self): + self.cycle = self + def __del__(self): + cpu_intensive_work() + +def garbage_collection(): + """GC-intensive work.""" + ExpensiveGarbage() + gc.collect() + def main_loop(): """Main test loop with different execution paths.""" - iteration = 0 - while True: - iteration += 1 - - # Different execution paths - focus on CPU intensive work - if iteration % 3 == 0: - # Very CPU intensive - result = cpu_intensive_work() - elif iteration % 5 == 0: - # Expensive recursive operation - result = slow_fibonacci(12) - else: - # Medium operation - result = nested_calls() - + cpu_intensive_work() + slow_fibonacci(24) + garbage_collection() + nested_calls() # No sleep - keep CPU busy if __name__ == "__main__": @@ -1778,6 +1765,8 @@ def test_sampling_basic_functionality(self): # Should see some of our test functions self.assertIn("slow_fibonacci", output) + self.assertIn("", output) + self.assertIn("", output) def test_sampling_with_pstats_export(self): pstats_out = tempfile.NamedTemporaryFile( @@ -1875,6 +1864,8 @@ def test_sampling_with_collapsed_export(self): stack_parts = stack_trace.split(";") for part in stack_parts: # Each part should be file:function:line + if part in {"", ""}: + continue self.assertIn(":", part) def test_sampling_all_threads(self): @@ -1925,7 +1916,8 @@ def test_sample_target_script(self): # Should see some of our test functions self.assertIn("slow_fibonacci", output) - + self.assertIn("", output) + self.assertIn("", output) def test_sample_target_module(self): tempdir = tempfile.TemporaryDirectory(delete=False) @@ -1959,6 +1951,39 @@ def test_sample_target_module(self): # Should see some of our test functions self.assertIn("slow_fibonacci", output) + self.assertIn("", output) + self.assertIn("", output) + + def test_sample_no_native_no_gc(self): + script_file = tempfile.NamedTemporaryFile(delete=False) + script_file.write(self.test_script.encode("utf-8")) + script_file.flush() + self.addCleanup(close_and_unlink, script_file) + + test_args = ["profiling.sampling.sample", "-d", "1", "--no-native", "--no-gc", script_file.name] + + with ( + mock.patch("sys.argv", test_args), + io.StringIO() as captured_output, + mock.patch("sys.stdout", captured_output), + ): + try: + profiling.sampling.sample.main() + except PermissionError: + self.skipTest("Insufficient permissions for remote profiling") + + output = captured_output.getvalue() + + # Basic checks on output + self.assertIn("Captured", output) + self.assertIn("samples", output) + self.assertIn("Profile Stats", output) + + # Should see some of our test functions + self.assertIn("slow_fibonacci", output) + # But not ones we intentionally excluded: + self.assertNotIn("", output) + self.assertNotIn("", output) @skip_if_not_supported @@ -2165,7 +2190,9 @@ def test_cli_module_argument_parsing(self): show_summary=True, output_format="pstats", realtime_stats=False, - mode=0 + mode=0, + native=True, + gc=True, ) @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @@ -2193,7 +2220,9 @@ def test_cli_module_with_arguments(self): show_summary=True, output_format="pstats", realtime_stats=False, - mode=0 + mode=0, + native=True, + gc=True, ) @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @@ -2221,7 +2250,9 @@ def test_cli_script_argument_parsing(self): show_summary=True, output_format="pstats", realtime_stats=False, - mode=0 + mode=0, + native=True, + gc=True, ) @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @@ -2321,7 +2352,9 @@ def test_cli_module_with_profiler_options(self): show_summary=True, output_format="pstats", realtime_stats=False, - mode=0 + mode=0, + native=True, + gc=True, ) @unittest.skipIf(is_emscripten, "socket.SO_REUSEADDR does not exist") @@ -2355,7 +2388,9 @@ def test_cli_script_with_profiler_options(self): show_summary=True, output_format="collapsed", realtime_stats=False, - mode=0 + mode=0, + native=True, + gc=True, ) def test_cli_empty_module_name(self): @@ -2567,7 +2602,9 @@ def test_argument_parsing_basic(self): show_summary=True, output_format="pstats", realtime_stats=False, - mode=0 + mode=0, + native=True, + gc=True, ) def test_sort_options(self): diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-11-05-19-50-37.gh-issue-140643.QCEOqG.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-05-19-50-37.gh-issue-140643.QCEOqG.rst new file mode 100644 index 00000000000000..e1202dd1a17aec --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-11-05-19-50-37.gh-issue-140643.QCEOqG.rst @@ -0,0 +1,3 @@ +Add support for ```` and ```` frames to +:mod:`!profiling.sampling` output to denote active garbage collection and +calls to native code. diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index c6ced39c70cdb3..e1682791ca90bf 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -25,8 +25,9 @@ #include "Python.h" #include // _Py_DebugOffsets #include // FRAME_SUSPENDED_YIELD_FROM -#include // FRAME_OWNED_BY_CSTACK +#include // FRAME_OWNED_BY_INTERPRETER #include // struct llist_node +#include // _PyLong_GetZero #include // Py_TAG_BITS #include "../Python/remote_debug.h" @@ -89,14 +90,16 @@ typedef enum _WIN32_THREADSTATE { #endif #ifdef Py_GIL_DISABLED -#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ - offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \ - offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \ - offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)) +#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ + offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \ + offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \ + offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)), \ + offsetof(PyInterpreterState, gc.frame) + sizeof(_PyInterpreterFrame *)) #else -#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ - offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \ - offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)) +#define INTERP_STATE_MIN_SIZE MAX(MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ + offsetof(PyInterpreterState, threads.head) + sizeof(void*)), \ + offsetof(PyInterpreterState, _gil.last_holder) + sizeof(PyThreadState*)), \ + offsetof(PyInterpreterState, gc.frame) + sizeof(_PyInterpreterFrame *)) #endif #define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256) @@ -265,6 +268,8 @@ typedef struct { int only_active_thread; int mode; // Use enum _ProfilingMode values int skip_non_matching_threads; // New option to skip threads that don't match mode + int native; + int gc; RemoteDebuggingState *cached_state; // Cached module state #ifdef Py_GIL_DISABLED // TLBC cache invalidation tracking @@ -1801,6 +1806,25 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L * CODE OBJECT AND FRAME PARSING FUNCTIONS * ============================================================================ */ +static PyObject * +make_frame_info(RemoteUnwinderObject *unwinder, PyObject *file, PyObject *line, + PyObject *func) +{ + RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); + PyObject *info = PyStructSequence_New(state->FrameInfo_Type); + if (info == NULL) { + set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo"); + return NULL; + } + Py_INCREF(file); + Py_INCREF(line); + Py_INCREF(func); + PyStructSequence_SetItem(info, 0, file); + PyStructSequence_SetItem(info, 1, line); + PyStructSequence_SetItem(info, 2, func); + return info; +} + static int parse_code_object(RemoteUnwinderObject *unwinder, PyObject **result, @@ -1814,8 +1838,6 @@ parse_code_object(RemoteUnwinderObject *unwinder, PyObject *func = NULL; PyObject *file = NULL; PyObject *linetable = NULL; - PyObject *lineno = NULL; - PyObject *tuple = NULL; #ifdef Py_GIL_DISABLED // In free threading builds, code object addresses might have the low bit set @@ -1937,25 +1959,18 @@ parse_code_object(RemoteUnwinderObject *unwinder, info.lineno = -1; } - lineno = PyLong_FromLong(info.lineno); + PyObject *lineno = PyLong_FromLong(info.lineno); if (!lineno) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create line number object"); goto error; } - RemoteDebuggingState *state = RemoteDebugging_GetStateFromObject((PyObject*)unwinder); - tuple = PyStructSequence_New(state->FrameInfo_Type); + PyObject *tuple = make_frame_info(unwinder, meta->file_name, lineno, meta->func_name); + Py_DECREF(lineno); if (!tuple) { - set_exception_cause(unwinder, PyExc_MemoryError, "Failed to create FrameInfo for code object"); goto error; } - Py_INCREF(meta->func_name); - Py_INCREF(meta->file_name); - PyStructSequence_SetItem(tuple, 0, meta->file_name); - PyStructSequence_SetItem(tuple, 1, lineno); - PyStructSequence_SetItem(tuple, 2, meta->func_name); - *result = tuple; return 0; @@ -1963,8 +1978,6 @@ parse_code_object(RemoteUnwinderObject *unwinder, Py_XDECREF(func); Py_XDECREF(file); Py_XDECREF(linetable); - Py_XDECREF(lineno); - Py_XDECREF(tuple); return -1; } @@ -2227,8 +2240,7 @@ is_frame_valid( void* frame = (void*)frame_addr; - if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK || - GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) { + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) { return 0; // C frame } @@ -2447,8 +2459,9 @@ process_frame_chain( RemoteUnwinderObject *unwinder, uintptr_t initial_frame_addr, StackChunkList *chunks, - PyObject *frame_info -) { + PyObject *frame_info, + uintptr_t gc_frame) +{ uintptr_t frame_addr = initial_frame_addr; uintptr_t prev_frame_addr = 0; const size_t MAX_FRAMES = 1024; @@ -2474,25 +2487,63 @@ process_frame_chain( } } - if (!frame) { - break; + if (unwinder->gc && frame_addr == gc_frame) { + _Py_DECLARE_STR(gc, ""); + // Use "~" as file and 0 as line, since that's what pstats uses: + PyObject *gc_info = make_frame_info(unwinder, _Py_LATIN1_CHR('~'), + _PyLong_GetZero(), &_Py_STR(gc)); + if (gc_info == NULL) { + return -1; + } + int error = PyList_Append(frame_info, gc_info); + Py_DECREF(gc_info); + if (error) { + const char *e = "Failed to append GC to frame info list"; + set_exception_cause(unwinder, PyExc_RuntimeError, e); + return -1; + } } - - if (prev_frame_addr && frame_addr != prev_frame_addr) { - PyErr_Format(PyExc_RuntimeError, - "Broken frame chain: expected frame at 0x%lx, got 0x%lx", - prev_frame_addr, frame_addr); - Py_DECREF(frame); - set_exception_cause(unwinder, PyExc_RuntimeError, "Frame chain consistency check failed"); - return -1; + if (frame == NULL) { + if (PyList_GET_SIZE(frame_info) == 0) { + // If the first frame is missing, the chain is broken: + const char *e = "Failed to parse initial frame in chain"; + PyErr_SetString(PyExc_RuntimeError, e); + return -1; + } + if (unwinder->native && + // The last frame is always native, so skip that one: + next_frame_addr && + // If the next frame will be reported as a GC frame, then don't + // add an extra native frame below it: + !(unwinder->gc && next_frame_addr == gc_frame)) + { + _Py_DECLARE_STR(native, ""); + // Use "~" as file and 0 as line, since that's what pstats uses: + frame = make_frame_info(unwinder, _Py_LATIN1_CHR('~'), + _PyLong_GetZero(), &_Py_STR(native)); + if (frame == NULL) { + return -1; + } + } } + if (frame) { + if (prev_frame_addr && frame_addr != prev_frame_addr) { + const char *f = "Broken frame chain: expected frame at 0x%lx, got 0x%lx"; + PyErr_Format(PyExc_RuntimeError, f, prev_frame_addr, frame_addr); + Py_DECREF(frame); + const char *e = "Frame chain consistency check failed"; + set_exception_cause(unwinder, PyExc_RuntimeError, e); + return -1; + } - if (PyList_Append(frame_info, frame) == -1) { + if (PyList_Append(frame_info, frame) == -1) { + Py_DECREF(frame); + const char *e = "Failed to append frame to frame info list"; + set_exception_cause(unwinder, PyExc_RuntimeError, e); + return -1; + } Py_DECREF(frame); - set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to append frame to frame info list"); - return -1; } - Py_DECREF(frame); prev_frame_addr = next_frame_addr; frame_addr = next_frame_addr; @@ -2633,7 +2684,8 @@ static PyObject* unwind_stack_for_thread( RemoteUnwinderObject *unwinder, uintptr_t *current_tstate, - uintptr_t gil_holder_tstate + uintptr_t gil_holder_tstate, + uintptr_t gc_frame ) { PyObject *frame_info = NULL; PyObject *thread_id = NULL; @@ -2699,7 +2751,7 @@ unwind_stack_for_thread( goto error; } - if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info) < 0) { + if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info, gc_frame) < 0) { set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain"); goto error; } @@ -2762,6 +2814,8 @@ _remote_debugging.RemoteUnwinder.__init__ mode: int = 0 debug: bool = False skip_non_matching_threads: bool = True + native: bool = False + gc: bool = False Initialize a new RemoteUnwinder object for debugging a remote Python process. @@ -2776,6 +2830,10 @@ Initialize a new RemoteUnwinder object for debugging a remote Python process. lead to the exception. skip_non_matching_threads: If True, skip threads that don't match the selected mode. If False, include all threads regardless of mode. + native: If True, include artificial "" frames to denote calls to + non-Python code. + gc: If True, include artificial "" frames to denote active garbage + collection. The RemoteUnwinder provides functionality to inspect and debug a running Python process, including examining thread states, stack frames and other runtime data. @@ -2792,8 +2850,9 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int pid, int all_threads, int only_active_thread, int mode, int debug, - int skip_non_matching_threads) -/*[clinic end generated code: output=abf5ea5cd58bcb36 input=08fb6ace023ec3b5]*/ + int skip_non_matching_threads, + int native, int gc) +/*[clinic end generated code: output=e9eb6b4df119f6e0 input=606d099059207df2]*/ { // Validate that all_threads and only_active_thread are not both True if (all_threads && only_active_thread) { @@ -2810,6 +2869,8 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, } #endif + self->native = native; + self->gc = gc; self->debug = debug; self->only_active_thread = only_active_thread; self->mode = mode; @@ -2970,6 +3031,13 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self goto exit; } + uintptr_t gc_frame = 0; + if (self->gc) { + gc_frame = GET_MEMBER(uintptr_t, interp_state_buffer, + self->debug_offsets.interpreter_state.gc + + self->debug_offsets.gc.frame); + } + int64_t interpreter_id = GET_MEMBER(int64_t, interp_state_buffer, self->debug_offsets.interpreter_state.id); @@ -3029,7 +3097,9 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self } while (current_tstate != 0) { - PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate, gil_holder_tstate); + PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate, + gil_holder_tstate, + gc_frame); if (!frame_info) { // Check if this was an intentional skip due to mode-based filtering if ((self->mode == PROFILING_MODE_CPU || self->mode == PROFILING_MODE_GIL) && !PyErr_Occurred()) { diff --git a/Modules/clinic/_remote_debugging_module.c.h b/Modules/clinic/_remote_debugging_module.c.h index 7dd54e3124887b..60adb357e32e71 100644 --- a/Modules/clinic/_remote_debugging_module.c.h +++ b/Modules/clinic/_remote_debugging_module.c.h @@ -11,7 +11,8 @@ preserve PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, "RemoteUnwinder(pid, *, all_threads=False, only_active_thread=False,\n" -" mode=0, debug=False, skip_non_matching_threads=True)\n" +" mode=0, debug=False, skip_non_matching_threads=True,\n" +" native=False, gc=False)\n" "--\n" "\n" "Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" @@ -27,6 +28,10 @@ PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, " lead to the exception.\n" " skip_non_matching_threads: If True, skip threads that don\'t match the selected mode.\n" " If False, include all threads regardless of mode.\n" +" native: If True, include artificial \"\" frames to denote calls to\n" +" non-Python code.\n" +" gc: If True, include artificial \"\" frames to denote active garbage\n" +" collection.\n" "\n" "The RemoteUnwinder provides functionality to inspect and debug a running Python\n" "process, including examining thread states, stack frames and other runtime data.\n" @@ -42,7 +47,8 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, int pid, int all_threads, int only_active_thread, int mode, int debug, - int skip_non_matching_threads); + int skip_non_matching_threads, + int native, int gc); static int _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) @@ -50,7 +56,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int return_value = -1; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - #define NUM_KEYWORDS 6 + #define NUM_KEYWORDS 8 static struct { PyGC_Head _this_is_not_used; PyObject_VAR_HEAD @@ -59,7 +65,7 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) .ob_hash = -1, - .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), }, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), &_Py_ID(only_active_thread), &_Py_ID(mode), &_Py_ID(debug), &_Py_ID(skip_non_matching_threads), &_Py_ID(native), &_Py_ID(gc), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -68,14 +74,14 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", NULL}; + static const char * const _keywords[] = {"pid", "all_threads", "only_active_thread", "mode", "debug", "skip_non_matching_threads", "native", "gc", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "RemoteUnwinder", .kwtuple = KWTUPLE, }; #undef KWTUPLE - PyObject *argsbuf[6]; + PyObject *argsbuf[8]; PyObject * const *fastargs; Py_ssize_t nargs = PyTuple_GET_SIZE(args); Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; @@ -85,6 +91,8 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje int mode = 0; int debug = 0; int skip_non_matching_threads = 1; + int native = 0; + int gc = 0; fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); @@ -134,12 +142,30 @@ _remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObje goto skip_optional_kwonly; } } - skip_non_matching_threads = PyObject_IsTrue(fastargs[5]); - if (skip_non_matching_threads < 0) { + if (fastargs[5]) { + skip_non_matching_threads = PyObject_IsTrue(fastargs[5]); + if (skip_non_matching_threads < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + if (fastargs[6]) { + native = PyObject_IsTrue(fastargs[6]); + if (native < 0) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_kwonly; + } + } + gc = PyObject_IsTrue(fastargs[7]); + if (gc < 0) { goto exit; } skip_optional_kwonly: - return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads); + return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads, only_active_thread, mode, debug, skip_non_matching_threads, native, gc); exit: return return_value; @@ -321,4 +347,4 @@ _remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject return return_value; } -/*[clinic end generated code: output=2caefeddf7683d32 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=99fed5c94cf36881 input=a9049054013a1b77]*/ diff --git a/Python/gc.c b/Python/gc.c index 03a5d7366ea6c9..064f9406e0a17c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -2074,6 +2074,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) // Don't start a garbage collection if one is already in progress. return 0; } + gcstate->frame = tstate->current_frame; struct gc_collection_stats stats = { 0 }; if (reason != _Py_GC_REASON_SHUTDOWN) { @@ -2119,6 +2120,7 @@ _PyGC_Collect(PyThreadState *tstate, int generation, _PyGC_Reason reason) } #endif validate_spaces(gcstate); + gcstate->frame = NULL; _Py_atomic_store_int(&gcstate->collecting, 0); if (gcstate->debug & _PyGC_DEBUG_STATS) { diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index f39793c3eeb532..2e03822d5ac1cc 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -2358,6 +2358,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) _Py_atomic_store_int(&gcstate->collecting, 0); return 0; } + gcstate->frame = tstate->current_frame; assert(generation >= 0 && generation < NUM_GENERATIONS); @@ -2446,6 +2447,7 @@ gc_collect_main(PyThreadState *tstate, int generation, _PyGC_Reason reason) } assert(!_PyErr_Occurred(tstate)); + gcstate->frame = NULL; _Py_atomic_store_int(&gcstate->collecting, 0); return n + m; }