GH-136895: Update JIT builds to use LLVM 20 (#140329)

Co-authored-by: Emma Harper Smith <emma@emmatyping.dev>
This commit is contained in:
Savannah Ostrowski
2025-11-03 10:01:44 -08:00
committed by GitHub
parent b373d3494c
commit 4e2ff4ac4c
9 changed files with 151 additions and 50 deletions

View File

@@ -68,7 +68,7 @@ jobs:
- true
- false
llvm:
- 19
- 20
include:
- target: i686-pc-windows-msvc/msvc
architecture: Win32
@@ -138,7 +138,7 @@ jobs:
fail-fast: false
matrix:
llvm:
- 19
- 20
steps:
- uses: actions/checkout@v4
with:
@@ -166,7 +166,7 @@ jobs:
fail-fast: false
matrix:
llvm:
- 19
- 20
steps:
- uses: actions/checkout@v4
with:

View File

@@ -0,0 +1 @@
Update JIT compilation to use LLVM 20 at build time.

View File

@@ -3,6 +3,7 @@
import argparse
import os
import pathlib
import shutil
import sys
import time
import urllib.error
@@ -22,15 +23,13 @@ def retrieve_with_retries(download_location, output_path, reporthook,
)
except (urllib.error.URLError, ConnectionError) as ex:
if attempt == max_retries:
msg = f"Download from {download_location} failed."
raise OSError(msg) from ex
raise OSError(f'Download from {download_location} failed.') from ex
time.sleep(2.25**attempt)
else:
return resp
def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
repo = f'cpython-{"bin" if binary else "source"}-deps'
repo = 'cpython-bin-deps' if binary else 'cpython-source-deps'
url = f'https://github.com/{org}/{repo}/archive/{commit_hash}.zip'
reporthook = None
if verbose:
@@ -44,6 +43,23 @@ def fetch_zip(commit_hash, zip_dir, *, org='python', binary=False, verbose):
return filename
def fetch_release(tag, tarball_dir, *, org='python', verbose=False):
url = f'https://github.com/{org}/cpython-bin-deps/releases/download/{tag}/{tag}.tar.xz'
reporthook = None
if verbose:
reporthook = print
tarball_dir.mkdir(parents=True, exist_ok=True)
output_path = tarball_dir / f'{tag}.tar.xz'
retrieve_with_retries(url, output_path, reporthook)
return output_path
def extract_tarball(externals_dir, tarball_path, tag):
output_path = externals_dir / tag
shutil.unpack_archive(os.fspath(tarball_path), os.fspath(output_path))
return output_path
def extract_zip(externals_dir, zip_path):
with zipfile.ZipFile(os.fspath(zip_path)) as zf:
zf.extractall(os.fspath(externals_dir))
@@ -55,6 +71,8 @@ def parse_args():
p.add_argument('-v', '--verbose', action='store_true')
p.add_argument('-b', '--binary', action='store_true',
help='Is the dependency in the binary repo?')
p.add_argument('-r', '--release', action='store_true',
help='Download from GitHub release assets instead of branch')
p.add_argument('-O', '--organization',
help='Organization owning the deps repos', default='python')
p.add_argument('-e', '--externals-dir', type=pathlib.Path,
@@ -67,15 +85,36 @@ def parse_args():
def main():
args = parse_args()
zip_path = fetch_zip(
args.tag,
args.externals_dir / 'zips',
org=args.organization,
binary=args.binary,
verbose=args.verbose,
)
final_name = args.externals_dir / args.tag
extracted = extract_zip(args.externals_dir, zip_path)
# Check if the dependency already exists in externals/ directory
# (either already downloaded/extracted, or checked into the git tree)
if final_name.exists():
if args.verbose:
print(f'{args.tag} already exists at {final_name}, skipping download.')
return
# Determine download method: release artifacts for large deps (like LLVM),
# otherwise zip download from GitHub branches
if args.release:
tarball_path = fetch_release(
args.tag,
args.externals_dir / 'tarballs',
org=args.organization,
verbose=args.verbose,
)
extracted = extract_tarball(args.externals_dir, tarball_path, args.tag)
else:
# Use zip download from GitHub branches
# (cpython-bin-deps if --binary, cpython-source-deps otherwise)
zip_path = fetch_zip(
args.tag,
args.externals_dir / 'zips',
org=args.organization,
binary=args.binary,
verbose=args.verbose,
)
extracted = extract_zip(args.externals_dir, zip_path)
for wait in [1, 2, 3, 5, 8, 0]:
try:
extracted.replace(final_name)

View File

@@ -82,7 +82,7 @@ if NOT "%IncludeLibffi%"=="false" set binaries=%binaries% libffi-3.4.4
if NOT "%IncludeSSL%"=="false" set binaries=%binaries% openssl-bin-3.0.18
if NOT "%IncludeTkinter%"=="false" set binaries=%binaries% tcltk-8.6.15.0
if NOT "%IncludeSSLSrc%"=="false" set binaries=%binaries% nasm-2.11.06
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-19.1.7.0
if NOT "%IncludeLLVM%"=="false" set binaries=%binaries% llvm-20.1.8.0
for %%b in (%binaries%) do (
if exist "%EXTERNALS_DIR%\%%b" (
@@ -92,7 +92,11 @@ for %%b in (%binaries%) do (
git clone --depth 1 https://github.com/%ORG%/cpython-bin-deps --branch %%b "%EXTERNALS_DIR%\%%b"
) else (
echo.Fetching %%b...
%PYTHON% -E "%PCBUILD%\get_external.py" -b -O %ORG% -e "%EXTERNALS_DIR%" %%b
if "%%b"=="llvm-20.1.8.0" (
%PYTHON% -E "%PCBUILD%\get_external.py" --release --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
) else (
%PYTHON% -E "%PCBUILD%\get_external.py" --binary --organization %ORG% --externals-dir "%EXTERNALS_DIR%" %%b
)
)
)

View File

@@ -444,17 +444,42 @@ patch_x86_64_32rx(unsigned char *location, uint64_t value)
}
void patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state);
void patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state);
#include "jit_stencils.h"
#if defined(__aarch64__) || defined(_M_ARM64)
#define TRAMPOLINE_SIZE 16
#define DATA_ALIGN 8
#elif defined(__x86_64__) && defined(__APPLE__)
// LLVM 20 on macOS x86_64 debug builds: GOT entries may exceed ±2GB PC-relative
// range.
#define TRAMPOLINE_SIZE 16 // 14 bytes + 2 bytes padding for alignment
#define DATA_ALIGN 8
#else
#define TRAMPOLINE_SIZE 0
#define DATA_ALIGN 1
#endif
// Get the trampoline memory location for a given symbol ordinal.
static unsigned char *
get_trampoline_slot(int ordinal, jit_state *state)
{
const uint32_t symbol_mask = 1 << (ordinal % 32);
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
assert(symbol_mask & trampoline_mask);
// Count the number of set bits in the trampoline mask lower than ordinal
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
for (int i = 0; i < ordinal / 32; i++) {
index += _Py_popcount32(state->trampolines.mask[i]);
}
unsigned char *trampoline = state->trampolines.mem + index * TRAMPOLINE_SIZE;
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
return trampoline;
}
// Generate and patch AArch64 trampolines. The symbols to jump to are stored
// in the jit_stencils.h in the symbols_map.
void
@@ -471,20 +496,8 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
return;
}
// Masking is done modulo 32 as the mask is stored as an array of uint32_t
const uint32_t symbol_mask = 1 << (ordinal % 32);
const uint32_t trampoline_mask = state->trampolines.mask[ordinal / 32];
assert(symbol_mask & trampoline_mask);
// Count the number of set bits in the trampoline mask lower than ordinal,
// this gives the index into the array of trampolines.
int index = _Py_popcount32(trampoline_mask & (symbol_mask - 1));
for (int i = 0; i < ordinal / 32; i++) {
index += _Py_popcount32(state->trampolines.mask[i]);
}
uint32_t *p = (uint32_t*)(state->trampolines.mem + index * TRAMPOLINE_SIZE);
assert((size_t)(index + 1) * TRAMPOLINE_SIZE <= state->trampolines.size);
// Out of range - need a trampoline
uint32_t *p = (uint32_t *)get_trampoline_slot(ordinal, state);
/* Generate the trampoline
@@ -501,6 +514,37 @@ patch_aarch64_trampoline(unsigned char *location, int ordinal, jit_state *state)
patch_aarch64_26r(location, (uintptr_t)p);
}
// Generate and patch x86_64 trampolines.
void
patch_x86_64_trampoline(unsigned char *location, int ordinal, jit_state *state)
{
uint64_t value = (uintptr_t)symbols_map[ordinal];
int64_t range = (int64_t)value - 4 - (int64_t)location;
// If we are in range of 32 signed bits, we can patch directly
if (range >= -(1LL << 31) && range < (1LL << 31)) {
patch_32r(location, value - 4);
return;
}
// Out of range - need a trampoline
unsigned char *trampoline = get_trampoline_slot(ordinal, state);
/* Generate the trampoline (14 bytes, padded to 16):
0: ff 25 00 00 00 00 jmp *(%rip)
6: XX XX XX XX XX XX XX XX (64-bit target address)
Reference: https://wiki.osdev.org/X86-64_Instruction_Encoding#FF (JMP r/m64)
*/
trampoline[0] = 0xFF;
trampoline[1] = 0x25;
memset(trampoline + 2, 0, 4);
memcpy(trampoline + 6, &value, 8);
// Patch the call site to call the trampoline instead
patch_32r(location, (uintptr_t)trampoline - 4);
}
static void
combine_symbol_mask(const symbol_mask src, symbol_mask dest)
{

View File

@@ -9,32 +9,32 @@ Python 3.11 or newer is required to build the JIT.
The JIT compiler does not require end users to install any third-party dependencies, but part of it must be *built* using LLVM[^why-llvm]. You are *not* required to build the rest of CPython using LLVM, or even the same version of LLVM (in fact, this is uncommon).
LLVM version 19 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
LLVM version 20 is the officially supported version. You can modify if needed using the `LLVM_VERSION` env var during configure. Both `clang` and `llvm-readobj` need to be installed and discoverable (version suffixes, like `clang-19`, are okay). It's highly recommended that you also have `llvm-objdump` available, since this allows the build script to dump human-readable assembly for the generated code.
It's easy to install all of the required tools:
### Linux
Install LLVM 19 on Ubuntu/Debian:
Install LLVM 20 on Ubuntu/Debian:
```sh
wget https://apt.llvm.org/llvm.sh
chmod +x llvm.sh
sudo ./llvm.sh 19
sudo ./llvm.sh 20
```
Install LLVM 19 on Fedora Linux 40 or newer:
Install LLVM 20 on Fedora Linux 40 or newer:
```sh
sudo dnf install 'clang(major) = 19' 'llvm(major) = 19'
sudo dnf install 'clang(major) = 20' 'llvm(major) = 20'
```
### macOS
Install LLVM 19 with [Homebrew](https://brew.sh):
Install LLVM 20 with [Homebrew](https://brew.sh):
```sh
brew install llvm@19
brew install llvm@20
```
Homebrew won't add any of the tools to your `$PATH`. That's okay; the build script knows how to find them.
@@ -43,18 +43,18 @@ Homebrew won't add any of the tools to your `$PATH`. That's okay; the build scri
LLVM is downloaded automatically (along with other external binary dependencies) by `PCbuild\build.bat`.
Otherwise, you can install LLVM 19 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=19), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
Otherwise, you can install LLVM 20 [by searching for it on LLVM's GitHub releases page](https://github.com/llvm/llvm-project/releases?q=20), clicking on "Assets", downloading the appropriate Windows installer for your platform (likely the file ending with `-win64.exe`), and running it. **When installing, be sure to select the option labeled "Add LLVM to the system PATH".**
Alternatively, you can use [chocolatey](https://chocolatey.org):
```sh
choco install llvm --version=19.1.0
choco install llvm --version=20.1.8
```
### Dev Containers
If you are working on CPython in a [Codespaces instance](https://devguide.python.org/getting-started/setup-building/#using-codespaces), there's no
need to install LLVM as the Fedora 41 base image includes LLVM 19 out of the box.
need to install LLVM as the Fedora 42 base image includes LLVM 20 out of the box.
## Building

View File

@@ -11,8 +11,8 @@ import typing
import _targets
_LLVM_VERSION = "19"
_EXTERNALS_LLVM_TAG = "llvm-19.1.7.0"
_LLVM_VERSION = "20"
_EXTERNALS_LLVM_TAG = "llvm-20.1.8.0"
_P = typing.ParamSpec("_P")
_R = typing.TypeVar("_R")

View File

@@ -253,6 +253,23 @@ class StencilGroup:
self._trampolines.add(ordinal)
hole.addend = ordinal
hole.symbol = None
# x86_64 Darwin trampolines for external symbols
elif (
hole.kind == "X86_64_RELOC_BRANCH"
and hole.value is HoleValue.ZERO
and hole.symbol not in self.symbols
):
hole.func = "patch_x86_64_trampoline"
hole.need_state = True
assert hole.symbol is not None
if hole.symbol in known_symbols:
ordinal = known_symbols[hole.symbol]
else:
ordinal = len(known_symbols)
known_symbols[hole.symbol] = ordinal
self._trampolines.add(ordinal)
hole.addend = ordinal
hole.symbol = None
self.data.pad(8)
for stencil in [self.code, self.data]:
for hole in stencil.holes:

View File

@@ -166,10 +166,6 @@ class _Target(typing.Generic[_S, _R]):
"-fno-asynchronous-unwind-tables",
# Don't call built-in functions that we can't find or patch:
"-fno-builtin",
# Emit relaxable 64-bit calls/jumps, so we don't have to worry about
# about emitting in-range trampolines for out-of-range targets.
# We can probably remove this and emit trampolines in the future:
"-fno-plt",
# Don't call stack-smashing canaries that we can't find or patch:
"-fno-stack-protector",
"-std=c11",
@@ -571,14 +567,14 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
elif re.fullmatch(r"aarch64-pc-windows-msvc", host):
host = "aarch64-pc-windows-msvc"
condition = "defined(_M_ARM64)"
args = ["-fms-runtime-lib=dll", "-fplt"]
args = ["-fms-runtime-lib=dll"]
optimizer = _optimizers.OptimizerAArch64
target = _COFF64(host, condition, args=args, optimizer=optimizer)
elif re.fullmatch(r"aarch64-.*-linux-gnu", host):
host = "aarch64-unknown-linux-gnu"
condition = "defined(__aarch64__) && defined(__linux__)"
# -mno-outline-atomics: Keep intrinsics from being emitted.
args = ["-fpic", "-mno-outline-atomics"]
args = ["-fpic", "-mno-outline-atomics", "-fno-plt"]
optimizer = _optimizers.OptimizerAArch64
target = _ELF(host, condition, args=args, optimizer=optimizer)
elif re.fullmatch(r"i686-pc-windows-msvc", host):
@@ -602,7 +598,7 @@ def get_target(host: str) -> _COFF32 | _COFF64 | _ELF | _MachO:
elif re.fullmatch(r"x86_64-.*-linux-gnu", host):
host = "x86_64-unknown-linux-gnu"
condition = "defined(__x86_64__) && defined(__linux__)"
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0"]
args = ["-fno-pic", "-mcmodel=medium", "-mlarge-data-threshold=0", "-fno-plt"]
optimizer = _optimizers.OptimizerX86
target = _ELF(host, condition, args=args, optimizer=optimizer)
else: