gh-96143: Add some comments and minor fixes missed in the original PR (#96433)

* gh-96132: Add some comments and minor fixes missed in the original PR

* Update Doc/using/cmdline.rst

Co-authored-by: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com>

Co-authored-by: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com>
This commit is contained in:
Pablo Galindo Salgado 2022-08-30 19:37:22 +01:00 committed by GitHub
parent 45fd3685aa
commit f49dd54b72
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 17 additions and 1 deletions

View File

@ -155,6 +155,9 @@ active since the start of the Python interpreter, you can use the `-Xperf` optio
$ python -Xperf my_script.py $ python -Xperf my_script.py
You can also set the :envvar:`PYTHONPERFSUPPORT` to a nonzero value to actiavate perf
profiling mode globally.
There is also support for dynamically activating and deactivating the perf There is also support for dynamically activating and deactivating the perf
profiling mode by using the APIs in the :mod:`sys` module: profiling mode by using the APIs in the :mod:`sys` module:

View File

@ -582,6 +582,8 @@ Miscellaneous options
.. versionadded:: 3.11 .. versionadded:: 3.11
The ``-X frozen_modules`` option. The ``-X frozen_modules`` option.
.. versionadded:: 3.12
The ``-X perf`` option.
Options you shouldn't use Options you shouldn't use

View File

@ -58,7 +58,7 @@ class TestPerfTrampoline(unittest.TestCase):
script = make_script(script_dir, "perftest", code) script = make_script(script_dir, "perftest", code)
with subprocess.Popen( with subprocess.Popen(
[sys.executable, "-Xperf", script], [sys.executable, "-Xperf", script],
universal_newlines=True, text=True,
stderr=subprocess.PIPE, stderr=subprocess.PIPE,
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
) as process: ) as process:

View File

@ -284,12 +284,23 @@ new_code_arena(void)
void *start = &_Py_trampoline_func_start; void *start = &_Py_trampoline_func_start;
void *end = &_Py_trampoline_func_end; void *end = &_Py_trampoline_func_end;
size_t code_size = end - start; size_t code_size = end - start;
// TODO: Check the effect of alignment of the code chunks. Initial investigation
// showed that this has no effect on performance in x86-64 or aarch64 and the current
// version has the advantage that the unwinder in GDB can unwind across JIT-ed code.
//
// We should check the values in the future and see if there is a
// measurable performance improvement by rounding trampolines up to 32-bit
// or 64-bit alignment.
size_t n_copies = mem_size / code_size; size_t n_copies = mem_size / code_size;
for (size_t i = 0; i < n_copies; i++) { for (size_t i = 0; i < n_copies; i++) {
memcpy(memory + i * code_size, start, code_size * sizeof(char)); memcpy(memory + i * code_size, start, code_size * sizeof(char));
} }
// Some systems may prevent us from creating executable code on the fly. // Some systems may prevent us from creating executable code on the fly.
// TODO: Call icache invalidation intrinsics if available:
// __builtin___clear_cache/__clear_cache (depending if clang/gcc). This is
// technically not necessary but we could be missing something so better be
// safe.
int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC); int res = mprotect(memory, mem_size, PROT_READ | PROT_EXEC);
if (res == -1) { if (res == -1) {
PyErr_SetFromErrno(PyExc_OSError); PyErr_SetFromErrno(PyExc_OSError);