GH-136895: Update JIT builds to use LLVM 20 (#140329)
Co-authored-by: Emma Harper Smith <emma@emmatyping.dev>
This commit is contained in:
committed by
GitHub
parent
b373d3494c
commit
4e2ff4ac4c
6
.github/workflows/jit.yml
vendored
6
.github/workflows/jit.yml
vendored
@@ -68,7 +68,7 @@ jobs:
|
||||
- true
|
||||
- false
|
||||
llvm:
|
||||
- 19
|
||||
- 20
|
||||
include:
|
||||
- target: i686-pc-windows-msvc/msvc
|
||||
architecture: Win32
|
||||
@@ -138,7 +138,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
llvm:
|
||||
- 19
|
||||
- 20
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
@@ -166,7 +166,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
llvm:
|
||||
- 19
|
||||
- 20
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
|
||||
@@ -0,0 +1 @@
|
||||
Update JIT compilation to use LLVM 20 at build time.
|
||||
@@ -3,6 +3,7 @@
|
||||
import argparse
|
||||
import os
|
||||
import pathlib
|
||||
import shutil
|
||||
import sys
|
||||
import time
|
||||
import urllib.error
|
||||
@@ -22,15 +23,13 @@ def retrieve_with_retries(download_location, output_path, reporthook,
|
||||
)
|
||||
except (urllib.error.URLError, ConnectionError) as ex:
|
||||
if attempt == max_retries:
|
||||
msg = f"Download from {download_location} failed."
|
||||
raise OSError(msg) from ex
|
||||
raise OSError(f'Download from {download_location} failed.') from ex
|
||||
time.sleep(2.25**attempt)
|
||||
else:
|
||||
return resp
|
||||
|
||||
|
||||
def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
|
||||
repo = f'cpython-{"bin" if binary else "source"}-deps'
|
||||
repo = 'cpython-bin-deps' if binary else 'cpython-source-deps'
|
||||
url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip'
|
||||
reporthook = None
|
||||
if verbose:
|
||||
@@ -44,6 +43,23 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
|
||||
return filename
|
||||
|
||||
|
||||
def fetch_release(tag, tarball_dir, *, org='python', verbose=False):
|
||||
url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{tag}.tar.xz'
|
||||
reporthook = None
|
||||
if verbose:
|
||||
reporthook = print
|
||||
tarball_dir.mkdir(parents=True, exist_ok=True)
|
||||
output_path = tarball_dir / f'{tag}.tar.xz'
|
||||
retrieve_with_retries(url, output_path, reporthook)
|
||||
return output_path
|
||||
|
||||
|
||||
def extract_tarball(externals_dir, tarball_path, tag):
|
||||
output_path = externals_dir / tag
|
||||
shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path))
|
||||
return output_path
|
||||
|
||||
|
||||
def extract_zip(externals_dir, zip_path):
|
||||
with zipfile.ZipFile(os.fspath(zip_path)) as zf:
|
||||
zf.extractall(os.fspath(externals_dir))
|
||||
@@ -55,6 +71,8 @@ def parse_args():
|
||||
p.add_argument('-v', '--verbose', action='store_true')
|
||||
p.add_argument('-b', '--binary', action='store_true',
|
||||
help='Is the dependency in the binary repo?')
|
||||
p.add_argument('-r', '--release', action='store_true',
|
||||
help='Download from GitHub release assets instead of branch')
|
||||
p.add_argument('-O', '--organization',
|
||||
help='Organization owning the deps repos', default='python')
|
||||
p.add_argument('-e', '--externals-dir', type=pathlib.Path,
|
||||
@@ -67,15 +85,36 @@ def parse_args():
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
zip_path = fetch_zip(
|
||||
args.tag,
|
||||
args.externals_dir / 'zips',
|
||||
org=args.organization,
|
||||
binary=args.binary,
|
||||
verbose=args.verbose,
|
||||
)
|
||||
final_name = args.externals_dir / args.tag
|
||||
extracted = extract_zip(args.externals_dir, zip_path)
|
||||
|
||||
# Check if the dependency already exists in externals/ directory
|
||||
# (either already downloaded/extracted, or checked into the git tree)
|
||||
if final_name.exists():
|
||||
if args.verbose:
|
||||
print(f'{args.tag} already exists at {final_name}, skipping download.')
|
||||
return
|
||||
|
||||
# Determine download method: release artifacts for large deps (like LLVM),
|
||||
# otherwise zip download from GitHub branches
|
||||
if args.release:
|
||||
tarball_path = fetch_release(
|
||||
args.tag,
|
||||
args.externals_dir / 'tarballs',
|
||||
org=args.organization,
|
||||
verbose=args.verbose,
|
||||
)
|
||||
extracted = extract_tarball(args.externals_dir, tarball_path, args.tag)
|
||||
else:
|
||||
# Use zip download from GitHub branches
|
||||
# (cpython-bin-deps if --binary, cpython-source-deps otherwise)
|
||||
zip_path = fetch_zip(
|
||||
args.tag,
|
||||
args.externals_dir / 'zips',
|
||||
org=args.organization,
|
||||
binary=args.binary,
|
||||
verbose=args.verbose,
|
||||
)
|
||||
extracted = extract_zip(args.externals_dir, zip_path)
|
||||
for wait in [1, 2, 3, 5, 8, 0]:
|
||||
try:
|
||||
extracted.replace(final_name)
|
||||
|
||||
@@ -82,7 +82,7 @@ if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.4.4
|
||||
if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.18
|
||||
if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0
|
||||
if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06
|
||||
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-19.1.7.0
|
||||
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-20.1.8.0
|
||||
|
||||
for %%b in (%binaries%) do (
|
||||
if exist "%EXTERNALS_DIR%\%%b" (
|
||||
@@ -92,7 +92,11 @@ for %%b in (%binaries%) do (
|
||||
git clone --depth 1 https://github.com/%ORG%/cpython-bin-deps --branch %%b "%EXTERNALS_DIR%\%%b"
|
||||
) else (
|
||||
echo.Fetching %%b...
|
||||
%PYTHON% -E "%PCBUILD%\get_external.py" -b -O %ORG% -e "%EXTERNALS_DIR%" %%b
|
||||
if "%%b"=="llvm-20.1.8.0" (
|
||||
%PYTHON% -E "%PCBUILD%\get_external.py" --release --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
|
||||
) else (
|
||||
%PYTHON% -E "%PCBUILD%\get_external.py" --binary --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
72
Python/jit.c
72
Python/jit.c
@@ -444,17 +444,42 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
|
||||
}
|
||||
|
||||
void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
|
||||
void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);
|
||||
|
||||
#include "jit_stencils.h"
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
#define TRAMPOLINE_SIZE 16
|
||||
#define DATA_ALIGN 8
|
||||
#elif defined(__x86_64__) && defined(__APPLE__)
|
||||
// LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative
|
||||
// range.
|
||||
#define TRAMPOLINE_SIZE 16 // 14 bytes + 2 bytes padding for alignment
|
||||
#define DATA_ALIGN 8
|
||||
#else
|
||||
#define TRAMPOLINE_SIZE 0
|
||||
#define DATA_ALIGN 1
|
||||
#endif
|
||||
|
||||
// Get the trampoline memory location for a given symbol ordinal.
|
||||
static unsigned char *
|
||||
get_trampoline_slot(int ordinal, jit_state *state)
|
||||
{
|
||||
const uint32_t symbol_mask = 1 << (ordinal % 32);
|
||||
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
|
||||
assert(symbol_mask & trampoline_mask);
|
||||
|
||||
// Count the number of set bits in the trampoline mask lower than ordinal
|
||||
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
|
||||
for (int i = 0; i < ordinal / 32; i++) {
|
||||
index += _Py_popcount32(state->trampolines.mask[i]);
|
||||
}
|
||||
|
||||
unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
|
||||
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
|
||||
return trampoline;
|
||||
}
|
||||
|
||||
// Generate and patch AArch64 trampolines. The symbols to jump to are stored
|
||||
// in the jit_stencils.h in the symbols_map.
|
||||
void
|
||||
@@ -471,20 +496,8 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
|
||||
return;
|
||||
}
|
||||
|
||||
// Masking is done modulo 32 as the mask is stored as an array of uint32_t
|
||||
const uint32_t symbol_mask = 1 << (ordinal % 32);
|
||||
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
|
||||
assert(symbol_mask & trampoline_mask);
|
||||
|
||||
// Count the number of set bits in the trampoline mask lower than ordinal,
|
||||
// this gives the index into the array of trampolines.
|
||||
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
|
||||
for (int i = 0; i < ordinal / 32; i++) {
|
||||
index += _Py_popcount32(state->trampolines.mask[i]);
|
||||
}
|
||||
|
||||
uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE);
|
||||
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
|
||||
// Out of range - need a trampoline
|
||||
uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state);
|
||||
|
||||
|
||||
/* Generate the trampoline
|
||||
@@ -501,6 +514,37 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
|
||||
patch_aarch64_26r(location, (uintptr_t)p);
|
||||
}
|
||||
|
||||
// Generate and patch x86_64 trampolines.
|
||||
void
|
||||
patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
|
||||
{
|
||||
uint64_t value = (uintptr_t)symbols_map[ordinal];
|
||||
int64_t range = (int64_t)value - 4 - (int64_t)location;
|
||||
|
||||
// If we are in range of 32 signed bits, we can patch directly
|
||||
if (range >= -(1LL << 31) && range < (1LL << 31)) {
|
||||
patch_32r(location, value - 4);
|
||||
return;
|
||||
}
|
||||
|
||||
// Out of range - need a trampoline
|
||||
unsigned char *trampoline = get_trampoline_slot(ordinal, state);
|
||||
|
||||
/* Generate the trampoline (14 bytes, padded to 16):
|
||||
0: ff 25 00 00 00 00 jmp *(%rip)
|
||||
6: XX XX XX XX XX XX XX XX (64-bit target address)
|
||||
|
||||
Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64)
|
||||
*/
|
||||
trampoline[0] = 0xFF;
|
||||
trampoline[1] = 0x25;
|
||||
memset(trampoline + 2, 0, 4);
|
||||
memcpy(trampoline + 6, &value, 8);
|
||||
|
||||
// Patch the call site to call the trampoline instead
|
||||
patch_32r(location, (uintptr_t)trampoline - 4);
|
||||
}
|
||||
|
||||
static void
|
||||
combine_symbol_mask(const symbol_mask src, symbol_mask dest)
|
||||
{
|
||||
|
||||
@@ -9,32 +9,32 @@ Python 3.11 or newer is required to build the JIT.
|
||||
|
||||
The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).
|
||||
|
||||
LLVM version 19 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
|
||||
LLVM version 20 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
|
||||
|
||||
It's easy to install all of the required tools:
|
||||
|
||||
### Linux
|
||||
|
||||
Install LLVM 19 on Ubuntu/Debian:
|
||||
Install LLVM 20 on Ubuntu/Debian:
|
||||
|
||||
```sh
|
||||
wget https://apt.llvm.org/llvm.sh
|
||||
chmod +x llvm.sh
|
||||
sudo ./llvm.sh 19
|
||||
sudo ./llvm.sh 20
|
||||
```
|
||||
|
||||
Install LLVM 19 on Fedora Linux 40 or newer:
|
||||
Install LLVM 20 on Fedora Linux 40 or newer:
|
||||
|
||||
```sh
|
||||
sudo dnf install 'clang(major) = 19' 'llvm(major) = 19'
|
||||
sudo dnf install 'clang(major) = 20' 'llvm(major) = 20'
|
||||
```
|
||||
|
||||
### macOS
|
||||
|
||||
Install LLVM 19 with [Homebrew](https://brew.sh):
|
||||
Install LLVM 20 with [Homebrew](https://brew.sh):
|
||||
|
||||
```sh
|
||||
brew install llvm@19
|
||||
brew install llvm@20
|
||||
```
|
||||
|
||||
Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them.
|
||||
@@ -43,18 +43,18 @@ Homebrew won't add any of the tools to your `$PATH`. That's okay; the build scri
|
||||
|
||||
LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`.
|
||||
|
||||
Otherwise, you can install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
|
||||
Otherwise, you can install LLVM 20 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=20), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
|
||||
|
||||
Alternatively, you can use [chocolatey](https://chocolatey.org):
|
||||
|
||||
```sh
|
||||
choco install llvm --version=19.1.0
|
||||
choco install llvm --version=20.1.8
|
||||
```
|
||||
|
||||
### Dev Containers
|
||||
|
||||
If you are working on CPython in a [Codespaces instance](https://devguide.python.org/getting-started/setup-building/#using-codespaces), there's no
|
||||
need to install LLVM as the Fedora 41 base image includes LLVM 19 out of the box.
|
||||
need to install LLVM as the Fedora 42 base image includes LLVM 20 out of the box.
|
||||
|
||||
## Building
|
||||
|
||||
|
||||
@@ -11,8 +11,8 @@ import typing
|
||||
import _targets
|
||||
|
||||
|
||||
_LLVM_VERSION = "19"
|
||||
_EXTERNALS_LLVM_TAG = "llvm-19.1.7.0"
|
||||
_LLVM_VERSION = "20"
|
||||
_EXTERNALS_LLVM_TAG = "llvm-20.1.8.0"
|
||||
|
||||
_P = typing.ParamSpec("_P")
|
||||
_R = typing.TypeVar("_R")
|
||||
|
||||
@@ -253,6 +253,23 @@ class StencilGroup:
|
||||
self._trampolines.add(ordinal)
|
||||
hole.addend = ordinal
|
||||
hole.symbol = None
|
||||
# x86_64 Darwin trampolines for external symbols
|
||||
elif (
|
||||
hole.kind == "X86_64_RELOC_BRANCH"
|
||||
and hole.value is HoleValue.ZERO
|
||||
and hole.symbol not in self.symbols
|
||||
):
|
||||
hole.func = "patch_x86_64_trampoline"
|
||||
hole.need_state = True
|
||||
assert hole.symbol is not None
|
||||
if hole.symbol in known_symbols:
|
||||
ordinal = known_symbols[hole.symbol]
|
||||
else:
|
||||
ordinal = len(known_symbols)
|
||||
known_symbols[hole.symbol] = ordinal
|
||||
self._trampolines.add(ordinal)
|
||||
hole.addend = ordinal
|
||||
hole.symbol = None
|
||||
self.data.pad(8)
|
||||
for stencil in [self.code, self.data]:
|
||||
for hole in stencil.holes:
|
||||
|
||||
@@ -166,10 +166,6 @@ class _Target(typing.Generic[_S, _R]):
|
||||
"-fno-asynchronous-unwind-tables",
|
||||
# Don't call built-in functions that we can't find or patch:
|
||||
"-fno-builtin",
|
||||
# Emit relaxable 64-bit calls/jumps, so we don't have to worry about
|
||||
# about emitting in-range trampolines for out-of-range targets.
|
||||
# We can probably remove this and emit trampolines in the future:
|
||||
"-fno-plt",
|
||||
# Don't call stack-smashing canaries that we can't find or patch:
|
||||
"-fno-stack-protector",
|
||||
"-std=c11",
|
||||
@@ -571,14 +567,14 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
|
||||
elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
|
||||
host = "aarch64-pc-windows-msvc"
|
||||
condition = "defined(_M_ARM64)"
|
||||
args = ["-fms-runtime-lib=dll", "-fplt"]
|
||||
args = ["-fms-runtime-lib=dll"]
|
||||
optimizer = _optimizers.OptimizerAArch64
|
||||
target = _COFF64(host, condition, args=args, optimizer=optimizer)
|
||||
elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
|
||||
host = "aarch64-unknown-linux-gnu"
|
||||
condition = "defined(__aarch64__) && defined(__linux__)"
|
||||
# -mno-outline-atomics: Keep intrinsics from being emitted.
|
||||
args = ["-fpic", "-mno-outline-atomics"]
|
||||
args = ["-fpic", "-mno-outline-atomics", "-fno-plt"]
|
||||
optimizer = _optimizers.OptimizerAArch64
|
||||
target = _ELF(host, condition, args=args, optimizer=optimizer)
|
||||
elif re.fullmatch(r"i686-pc-windows-msvc", host):
|
||||
@@ -602,7 +598,7 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
|
||||
elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
|
||||
host = "x86_64-unknown-linux-gnu"
|
||||
condition = "defined(__x86_64__) && defined(__linux__)"
|
||||
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"]
|
||||
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0", "-fno-plt"]
|
||||
optimizer = _optimizers.OptimizerX86
|
||||
target = _ELF(host, condition, args=args, optimizer=optimizer)
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user