This PR changes the current JIT model from trace projection to trace recording. Benchmarking: better pyperformance (about 1.7% overall) geomean versus current https://raw.githubusercontent.com/facebookexperimental/free-threading-benchmarking/refs/heads/main/results/bm-20251108-3.15.0a1%2B-7e2bc1d-JIT/bm-20251108-vultr-x86_64-Fidget%252dSpinner-tracing_jit-3.15.0a1%2B-7e2bc1d-vs-base.svg, 100% faster Richards on the most improved benchmark versus the current JIT. Slowdown of about 10-15% on the worst benchmark versus the current JIT. **Note: the fastest version isn't the one merged, as it relies on fixing bugs in the specializing interpreter, which is left to another PR**. The speedup in the merged version is about 1.1%. https://raw.githubusercontent.com/facebookexperimental/free-threading-benchmarking/refs/heads/main/results/bm-20251112-3.15.0a1%2B-f8a764a-JIT/bm-20251112-vultr-x86_64-Fidget%252dSpinner-tracing_jit-3.15.0a1%2B-f8a764a-vs-base.svg Stats: 50% more uops executed, 30% more traces entered the last time we ran them. It also suggests our trace lengths for a real trace recording JIT are too short, as a lot of trace too long aborts https://github.com/facebookexperimental/free-threading-benchmarking/blob/main/results/bm-20251023-3.15.0a1%2B-eb73378-CLANG%2CJIT/bm-20251023-vultr-x86_64-Fidget%252dSpinner-tracing_jit-3.15.0a1%2B-eb73378-pystats-vs-base.md . This new JIT frontend is already able to record/execute significantly more instructions than the previous JIT frontend. In this PR, we are now able to record through custom dunders, simple object creation, generators, etc. None of these were done by the old JIT frontend. Some custom dunders uops were discovered to be broken as part of this work gh-140277 The optimizer stack space check is disabled, as it's no longer valid to deal with underflow. Pros: * Ignoring the generated tracer code as it's automatically created, this is only additional 1k lines of code. The maintenance burden is handled by the DSL and code generator. * `optimizer.c` is now significantly simpler, as we don't have to do strange things to recover the bytecode from a trace. * The new JIT frontend is able to handle a lot more control-flow than the old one. * Tracing is very low overhead. We use the tail calling interpreter/computed goto interpreter to switch between tracing mode and non-tracing mode. I call this mechanism dual dispatch, as we have two dispatch tables dispatching to each other. Specialization is still enabled while tracing. * Better handling of polymorphism. We leverage the specializing interpreter for this. Cons: * (For now) requires tail calling interpreter or computed gotos. This means no Windows JIT for now :(. Not to fret, tail calling is coming soon to Windows though https://github.com/python/cpython/pull/139962 Design: * After each instruction, the `record_previous_inst` function/label is executed. This does as the name suggests. * The tracing interpreter lowers bytecode to uops directly so that it can obtain "fresh" values at the point of lowering. * The tracing version behaves nearly identical to the normal interpreter, in fact it even has specialization! This allows it to run without much of a slowdown when tracing. The actual cost of tracing is only a function call and writes to memory. * The tracing interpreter uses the specializing interpreter's deopt to naturally form the side exit chains. This allows it to side exit chain effectively, without repeating much code. We force a re-specializing when tracing a deopt. * The tracing interpreter can even handle goto errors/exceptions, but I chose to disable them for now as it's not tested. * Because we do not share interpreter dispatch, there is should be no significant slowdown to the original specializing interpreter on tailcall and computed got with JIT disabled. With JIT enabled, there might be a slowdown in the form of the JIT trying to trace. * Things that could have dynamic instruction pointer effects are guarded on. The guard deopts to a new instruction --- `_DYNAMIC_EXIT`.
1369 lines
43 KiB
Python
1369 lines
43 KiB
Python
from dataclasses import dataclass
|
|
import itertools
|
|
import lexer
|
|
import parser
|
|
import re
|
|
from typing import Optional, Callable
|
|
|
|
from parser import Stmt, SimpleStmt, BlockStmt, IfStmt, WhileStmt, ForStmt, MacroIfStmt
|
|
|
|
@dataclass
|
|
class EscapingCall:
|
|
stmt: SimpleStmt
|
|
call: lexer.Token
|
|
kills: lexer.Token | None
|
|
|
|
@dataclass
|
|
class Properties:
|
|
escaping_calls: dict[SimpleStmt, EscapingCall]
|
|
escapes: bool
|
|
error_with_pop: bool
|
|
error_without_pop: bool
|
|
deopts: bool
|
|
deopts_periodic: bool
|
|
oparg: bool
|
|
jumps: bool
|
|
eval_breaker: bool
|
|
needs_this: bool
|
|
always_exits: bool
|
|
stores_sp: bool
|
|
uses_co_consts: bool
|
|
uses_co_names: bool
|
|
uses_locals: bool
|
|
has_free: bool
|
|
side_exit: bool
|
|
pure: bool
|
|
uses_opcode: bool
|
|
needs_guard_ip: bool
|
|
unpredictable_jump: bool
|
|
tier: int | None = None
|
|
const_oparg: int = -1
|
|
needs_prev: bool = False
|
|
no_save_ip: bool = False
|
|
|
|
def dump(self, indent: str) -> None:
|
|
simple_properties = self.__dict__.copy()
|
|
del simple_properties["escaping_calls"]
|
|
text = "escaping_calls:\n"
|
|
for tkns in self.escaping_calls.values():
|
|
text += f"{indent} {tkns}\n"
|
|
text += ", ".join([f"{key}: {value}" for (key, value) in simple_properties.items()])
|
|
print(indent, text, sep="")
|
|
|
|
@staticmethod
|
|
def from_list(properties: list["Properties"]) -> "Properties":
|
|
escaping_calls: dict[SimpleStmt, EscapingCall] = {}
|
|
for p in properties:
|
|
escaping_calls.update(p.escaping_calls)
|
|
return Properties(
|
|
escaping_calls=escaping_calls,
|
|
escapes = any(p.escapes for p in properties),
|
|
error_with_pop=any(p.error_with_pop for p in properties),
|
|
error_without_pop=any(p.error_without_pop for p in properties),
|
|
deopts=any(p.deopts for p in properties),
|
|
deopts_periodic=any(p.deopts_periodic for p in properties),
|
|
oparg=any(p.oparg for p in properties),
|
|
jumps=any(p.jumps for p in properties),
|
|
eval_breaker=any(p.eval_breaker for p in properties),
|
|
needs_this=any(p.needs_this for p in properties),
|
|
always_exits=any(p.always_exits for p in properties),
|
|
stores_sp=any(p.stores_sp for p in properties),
|
|
uses_co_consts=any(p.uses_co_consts for p in properties),
|
|
uses_co_names=any(p.uses_co_names for p in properties),
|
|
uses_locals=any(p.uses_locals for p in properties),
|
|
uses_opcode=any(p.uses_opcode for p in properties),
|
|
has_free=any(p.has_free for p in properties),
|
|
side_exit=any(p.side_exit for p in properties),
|
|
pure=all(p.pure for p in properties),
|
|
needs_prev=any(p.needs_prev for p in properties),
|
|
no_save_ip=all(p.no_save_ip for p in properties),
|
|
needs_guard_ip=any(p.needs_guard_ip for p in properties),
|
|
unpredictable_jump=any(p.unpredictable_jump for p in properties),
|
|
)
|
|
|
|
@property
|
|
def infallible(self) -> bool:
|
|
return not self.error_with_pop and not self.error_without_pop
|
|
|
|
SKIP_PROPERTIES = Properties(
|
|
escaping_calls={},
|
|
escapes=False,
|
|
error_with_pop=False,
|
|
error_without_pop=False,
|
|
deopts=False,
|
|
deopts_periodic=False,
|
|
oparg=False,
|
|
jumps=False,
|
|
eval_breaker=False,
|
|
needs_this=False,
|
|
always_exits=False,
|
|
stores_sp=False,
|
|
uses_co_consts=False,
|
|
uses_co_names=False,
|
|
uses_locals=False,
|
|
uses_opcode=False,
|
|
has_free=False,
|
|
side_exit=False,
|
|
pure=True,
|
|
no_save_ip=False,
|
|
needs_guard_ip=False,
|
|
unpredictable_jump=False,
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class Skip:
|
|
"Unused cache entry"
|
|
size: int
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return f"unused/{self.size}"
|
|
|
|
@property
|
|
def properties(self) -> Properties:
|
|
return SKIP_PROPERTIES
|
|
|
|
|
|
class Flush:
|
|
@property
|
|
def properties(self) -> Properties:
|
|
return SKIP_PROPERTIES
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
return "flush"
|
|
|
|
@property
|
|
def size(self) -> int:
|
|
return 0
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
class StackItem:
|
|
name: str
|
|
size: str
|
|
peek: bool = False
|
|
used: bool = False
|
|
|
|
def __str__(self) -> str:
|
|
size = f"[{self.size}]" if self.size else ""
|
|
return f"{self.name}{size} {self.peek}"
|
|
|
|
def is_array(self) -> bool:
|
|
return self.size != ""
|
|
|
|
def get_size(self) -> str:
|
|
return self.size if self.size else "1"
|
|
|
|
|
|
@dataclass
|
|
class StackEffect:
|
|
inputs: list[StackItem]
|
|
outputs: list[StackItem]
|
|
|
|
def __str__(self) -> str:
|
|
return f"({', '.join([str(i) for i in self.inputs])} -- {', '.join([str(i) for i in self.outputs])})"
|
|
|
|
|
|
@dataclass
|
|
class CacheEntry:
|
|
name: str
|
|
size: int
|
|
|
|
def __str__(self) -> str:
|
|
return f"{self.name}/{self.size}"
|
|
|
|
|
|
@dataclass
|
|
class Uop:
|
|
name: str
|
|
context: parser.Context | None
|
|
annotations: list[str]
|
|
stack: StackEffect
|
|
caches: list[CacheEntry]
|
|
local_stores: list[lexer.Token]
|
|
body: BlockStmt
|
|
properties: Properties
|
|
_size: int = -1
|
|
implicitly_created: bool = False
|
|
replicated = range(0)
|
|
replicates: "Uop | None" = None
|
|
# Size of the instruction(s), only set for uops containing the INSTRUCTION_SIZE macro
|
|
instruction_size: int | None = None
|
|
|
|
def dump(self, indent: str) -> None:
|
|
print(
|
|
indent, self.name, ", ".join(self.annotations) if self.annotations else ""
|
|
)
|
|
print(indent, self.stack, ", ".join([str(c) for c in self.caches]))
|
|
self.properties.dump(" " + indent)
|
|
|
|
@property
|
|
def size(self) -> int:
|
|
if self._size < 0:
|
|
self._size = sum(c.size for c in self.caches)
|
|
return self._size
|
|
|
|
def why_not_viable(self) -> str | None:
|
|
if self.name == "_SAVE_RETURN_OFFSET":
|
|
return None # Adjusts next_instr, but only in tier 1 code
|
|
if "INSTRUMENTED" in self.name:
|
|
return "is instrumented"
|
|
if "replaced" in self.annotations:
|
|
return "is replaced"
|
|
if self.name in ("INTERPRETER_EXIT", "JUMP_BACKWARD"):
|
|
return "has tier 1 control flow"
|
|
if self.properties.needs_this:
|
|
return "uses the 'this_instr' variable"
|
|
if len([c for c in self.caches if c.name != "unused"]) > 2:
|
|
return "has too many cache entries"
|
|
if self.properties.error_with_pop and self.properties.error_without_pop:
|
|
return "has both popping and not-popping errors"
|
|
return None
|
|
|
|
def is_viable(self) -> bool:
|
|
return self.why_not_viable() is None
|
|
|
|
def is_super(self) -> bool:
|
|
for tkn in self.body.tokens():
|
|
if tkn.kind == "IDENTIFIER" and tkn.text == "oparg1":
|
|
return True
|
|
return False
|
|
|
|
|
|
class Label:
|
|
|
|
def __init__(self, name: str, spilled: bool, body: BlockStmt, properties: Properties):
|
|
self.name = name
|
|
self.spilled = spilled
|
|
self.body = body
|
|
self.properties = properties
|
|
|
|
size:int = 0
|
|
local_stores: list[lexer.Token] = []
|
|
instruction_size = None
|
|
|
|
def __str__(self) -> str:
|
|
return f"label({self.name})"
|
|
|
|
|
|
Part = Uop | Skip | Flush
|
|
CodeSection = Uop | Label
|
|
|
|
|
|
@dataclass
|
|
class Instruction:
|
|
where: lexer.Token
|
|
name: str
|
|
parts: list[Part]
|
|
_properties: Properties | None
|
|
is_target: bool = False
|
|
family: Optional["Family"] = None
|
|
opcode: int = -1
|
|
|
|
@property
|
|
def properties(self) -> Properties:
|
|
if self._properties is None:
|
|
self._properties = self._compute_properties()
|
|
return self._properties
|
|
|
|
def _compute_properties(self) -> Properties:
|
|
return Properties.from_list([part.properties for part in self.parts])
|
|
|
|
def dump(self, indent: str) -> None:
|
|
print(indent, self.name, "=", ", ".join([part.name for part in self.parts]))
|
|
self.properties.dump(" " + indent)
|
|
|
|
@property
|
|
def size(self) -> int:
|
|
return 1 + sum(part.size for part in self.parts)
|
|
|
|
def is_super(self) -> bool:
|
|
if len(self.parts) != 1:
|
|
return False
|
|
uop = self.parts[0]
|
|
if isinstance(uop, Uop):
|
|
return uop.is_super()
|
|
else:
|
|
return False
|
|
|
|
|
|
@dataclass
|
|
class PseudoInstruction:
|
|
name: str
|
|
stack: StackEffect
|
|
targets: list[Instruction]
|
|
as_sequence: bool
|
|
flags: list[str]
|
|
opcode: int = -1
|
|
|
|
def dump(self, indent: str) -> None:
|
|
print(indent, self.name, "->", " or ".join([t.name for t in self.targets]))
|
|
|
|
@property
|
|
def properties(self) -> Properties:
|
|
return Properties.from_list([i.properties for i in self.targets])
|
|
|
|
|
|
@dataclass
|
|
class Family:
|
|
name: str
|
|
size: str
|
|
members: list[Instruction]
|
|
|
|
def dump(self, indent: str) -> None:
|
|
print(indent, self.name, "= ", ", ".join([m.name for m in self.members]))
|
|
|
|
|
|
@dataclass
|
|
class Analysis:
|
|
instructions: dict[str, Instruction]
|
|
uops: dict[str, Uop]
|
|
families: dict[str, Family]
|
|
pseudos: dict[str, PseudoInstruction]
|
|
labels: dict[str, Label]
|
|
opmap: dict[str, int]
|
|
have_arg: int
|
|
min_instrumented: int
|
|
|
|
|
|
def analysis_error(message: str, tkn: lexer.Token) -> SyntaxError:
|
|
# To do -- support file and line output
|
|
# Construct a SyntaxError instance from message and token
|
|
return lexer.make_syntax_error(message, tkn.filename, tkn.line, tkn.column, "")
|
|
|
|
|
|
def override_error(
|
|
name: str,
|
|
context: parser.Context | None,
|
|
prev_context: parser.Context | None,
|
|
token: lexer.Token,
|
|
) -> SyntaxError:
|
|
return analysis_error(
|
|
f"Duplicate definition of '{name}' @ {context} "
|
|
f"previous definition @ {prev_context}",
|
|
token,
|
|
)
|
|
|
|
|
|
def convert_stack_item(
|
|
item: parser.StackEffect, replace_op_arg_1: str | None
|
|
) -> StackItem:
|
|
return StackItem(item.name, item.size)
|
|
|
|
def check_unused(stack: list[StackItem], input_names: dict[str, lexer.Token]) -> None:
|
|
"Unused items cannot be on the stack above used, non-peek items"
|
|
seen_unused = False
|
|
for item in reversed(stack):
|
|
if item.name == "unused":
|
|
seen_unused = True
|
|
elif item.peek:
|
|
break
|
|
elif seen_unused:
|
|
raise analysis_error(f"Cannot have used input '{item.name}' below an unused value on the stack", input_names[item.name])
|
|
|
|
|
|
def analyze_stack(
|
|
op: parser.InstDef | parser.Pseudo, replace_op_arg_1: str | None = None
|
|
) -> StackEffect:
|
|
inputs: list[StackItem] = [
|
|
convert_stack_item(i, replace_op_arg_1)
|
|
for i in op.inputs
|
|
if isinstance(i, parser.StackEffect)
|
|
]
|
|
outputs: list[StackItem] = [
|
|
convert_stack_item(i, replace_op_arg_1) for i in op.outputs
|
|
]
|
|
# Mark variables with matching names at the base of the stack as "peek"
|
|
modified = False
|
|
input_names: dict[str, lexer.Token] = { i.name : i.first_token for i in op.inputs if i.name != "unused" }
|
|
for input, output in itertools.zip_longest(inputs, outputs):
|
|
if output is None:
|
|
pass
|
|
elif input is None:
|
|
if output.name in input_names:
|
|
raise analysis_error(
|
|
f"Reuse of variable '{output.name}' at different stack location",
|
|
input_names[output.name])
|
|
elif input.name == output.name:
|
|
if not modified:
|
|
input.peek = output.peek = True
|
|
else:
|
|
modified = True
|
|
if output.name in input_names:
|
|
raise analysis_error(
|
|
f"Reuse of variable '{output.name}' at different stack location",
|
|
input_names[output.name])
|
|
if isinstance(op, parser.InstDef):
|
|
output_names = [out.name for out in outputs]
|
|
for input in inputs:
|
|
if (
|
|
variable_used(op, input.name)
|
|
or variable_used(op, "DECREF_INPUTS")
|
|
or (not input.peek and input.name in output_names)
|
|
):
|
|
input.used = True
|
|
for output in outputs:
|
|
if variable_used(op, output.name):
|
|
output.used = True
|
|
check_unused(inputs, input_names)
|
|
return StackEffect(inputs, outputs)
|
|
|
|
|
|
def analyze_caches(inputs: list[parser.InputEffect]) -> list[CacheEntry]:
|
|
caches: list[parser.CacheEffect] = [
|
|
i for i in inputs if isinstance(i, parser.CacheEffect)
|
|
]
|
|
if caches:
|
|
# Middle entries are allowed to be unused. Check first and last caches.
|
|
for index in (0, -1):
|
|
cache = caches[index]
|
|
if cache.name == "unused":
|
|
position = "First" if index == 0 else "Last"
|
|
msg = f"{position} cache entry in op is unused. Move to enclosing macro."
|
|
raise analysis_error(msg, cache.tokens[0])
|
|
return [CacheEntry(i.name, int(i.size)) for i in caches]
|
|
|
|
|
|
def find_variable_stores(node: parser.InstDef) -> list[lexer.Token]:
|
|
res: list[lexer.Token] = []
|
|
outnames = { out.name for out in node.outputs }
|
|
innames = { out.name for out in node.inputs }
|
|
|
|
def find_stores_in_tokens(tokens: list[lexer.Token], callback: Callable[[lexer.Token], None]) -> None:
|
|
while tokens and tokens[0].kind == "COMMENT":
|
|
tokens = tokens[1:]
|
|
if len(tokens) < 4:
|
|
return
|
|
if tokens[1].kind == "EQUALS":
|
|
if tokens[0].kind == "IDENTIFIER":
|
|
name = tokens[0].text
|
|
if name in outnames or name in innames:
|
|
callback(tokens[0])
|
|
#Passing the address of a local is also a definition
|
|
for idx, tkn in enumerate(tokens):
|
|
if tkn.kind == "AND":
|
|
name_tkn = tokens[idx+1]
|
|
if name_tkn.text in outnames:
|
|
callback(name_tkn)
|
|
|
|
def visit(stmt: Stmt) -> None:
|
|
if isinstance(stmt, IfStmt):
|
|
def error(tkn: lexer.Token) -> None:
|
|
raise analysis_error("Cannot define variable in 'if' condition", tkn)
|
|
find_stores_in_tokens(stmt.condition, error)
|
|
elif isinstance(stmt, SimpleStmt):
|
|
find_stores_in_tokens(stmt.contents, res.append)
|
|
|
|
node.block.accept(visit)
|
|
return res
|
|
|
|
|
|
#def analyze_deferred_refs(node: parser.InstDef) -> dict[lexer.Token, str | None]:
|
|
#"""Look for PyStackRef_FromPyObjectNew() calls"""
|
|
|
|
#def in_frame_push(idx: int) -> bool:
|
|
#for tkn in reversed(node.block.tokens[: idx - 1]):
|
|
#if tkn.kind in {"SEMI", "LBRACE", "RBRACE"}:
|
|
#return False
|
|
#if tkn.kind == "IDENTIFIER" and tkn.text == "_PyFrame_PushUnchecked":
|
|
#return True
|
|
#return False
|
|
|
|
#refs: dict[lexer.Token, str | None] = {}
|
|
#for idx, tkn in enumerate(node.block.tokens):
|
|
#if tkn.kind != "IDENTIFIER" or tkn.text != "PyStackRef_FromPyObjectNew":
|
|
#continue
|
|
|
|
#if idx == 0 or node.block.tokens[idx - 1].kind != "EQUALS":
|
|
#if in_frame_push(idx):
|
|
## PyStackRef_FromPyObjectNew() is called in _PyFrame_PushUnchecked()
|
|
#refs[tkn] = None
|
|
#continue
|
|
#raise analysis_error("Expected '=' before PyStackRef_FromPyObjectNew", tkn)
|
|
|
|
#lhs = find_assignment_target(node, idx - 1)
|
|
#if len(lhs) == 0:
|
|
#raise analysis_error(
|
|
#"PyStackRef_FromPyObjectNew() must be assigned to an output", tkn
|
|
#)
|
|
|
|
#if lhs[0].kind == "TIMES" or any(
|
|
#t.kind == "ARROW" or t.kind == "LBRACKET" for t in lhs[1:]
|
|
#):
|
|
## Don't handle: *ptr = ..., ptr->field = ..., or ptr[field] = ...
|
|
## Assume that they are visible to the GC.
|
|
#refs[tkn] = None
|
|
#continue
|
|
|
|
#if len(lhs) != 1 or lhs[0].kind != "IDENTIFIER":
|
|
#raise analysis_error(
|
|
#"PyStackRef_FromPyObjectNew() must be assigned to an output", tkn
|
|
#)
|
|
|
|
#name = lhs[0].text
|
|
#match = (
|
|
#any(var.name == name for var in node.inputs)
|
|
#or any(var.name == name for var in node.outputs)
|
|
#)
|
|
#if not match:
|
|
#raise analysis_error(
|
|
#f"PyStackRef_FromPyObjectNew() must be assigned to an input or output, not '{name}'",
|
|
#tkn,
|
|
#)
|
|
|
|
#refs[tkn] = name
|
|
|
|
#return refs
|
|
|
|
|
|
def variable_used(node: parser.CodeDef, name: str) -> bool:
|
|
"""Determine whether a variable with a given name is used in a node."""
|
|
return any(
|
|
token.kind == "IDENTIFIER" and token.text == name for token in node.block.tokens()
|
|
)
|
|
|
|
|
|
def oparg_used(node: parser.CodeDef) -> bool:
|
|
"""Determine whether `oparg` is used in a node."""
|
|
return any(
|
|
token.kind == "IDENTIFIER" and token.text == "oparg" for token in node.tokens
|
|
)
|
|
|
|
|
|
def tier_variable(node: parser.CodeDef) -> int | None:
|
|
"""Determine whether a tier variable is used in a node."""
|
|
if isinstance(node, parser.LabelDef):
|
|
return None
|
|
for token in node.tokens:
|
|
if token.kind == "ANNOTATION":
|
|
if token.text == "specializing":
|
|
return 1
|
|
if re.fullmatch(r"tier\d", token.text):
|
|
return int(token.text[-1])
|
|
return None
|
|
|
|
|
|
def has_error_with_pop(op: parser.CodeDef) -> bool:
|
|
return (
|
|
variable_used(op, "ERROR_IF")
|
|
or variable_used(op, "exception_unwind")
|
|
)
|
|
|
|
|
|
def has_error_without_pop(op: parser.CodeDef) -> bool:
|
|
return (
|
|
variable_used(op, "ERROR_NO_POP")
|
|
or variable_used(op, "exception_unwind")
|
|
)
|
|
|
|
|
|
NON_ESCAPING_FUNCTIONS = (
|
|
"PyCFunction_GET_FLAGS",
|
|
"PyCFunction_GET_FUNCTION",
|
|
"PyCFunction_GET_SELF",
|
|
"PyCell_GetRef",
|
|
"PyCell_New",
|
|
"PyCell_SwapTakeRef",
|
|
"PyExceptionInstance_Class",
|
|
"PyException_GetCause",
|
|
"PyException_GetContext",
|
|
"PyException_GetTraceback",
|
|
"PyFloat_AS_DOUBLE",
|
|
"PyFloat_FromDouble",
|
|
"PyFunction_GET_CODE",
|
|
"PyFunction_GET_GLOBALS",
|
|
"PyList_GET_ITEM",
|
|
"PyList_GET_SIZE",
|
|
"PyList_SET_ITEM",
|
|
"PyLong_AsLong",
|
|
"PyLong_FromLong",
|
|
"PyLong_FromSsize_t",
|
|
"PySlice_New",
|
|
"PyStackRef_AsPyObjectBorrow",
|
|
"PyStackRef_AsPyObjectNew",
|
|
"PyStackRef_FromPyObjectNewMortal",
|
|
"PyStackRef_AsPyObjectSteal",
|
|
"PyStackRef_Borrow",
|
|
"PyStackRef_CLEAR",
|
|
"PyStackRef_CLOSE_SPECIALIZED",
|
|
"PyStackRef_DUP",
|
|
"PyStackRef_False",
|
|
"PyStackRef_FromPyObjectBorrow",
|
|
"PyStackRef_FromPyObjectNew",
|
|
"PyStackRef_FromPyObjectSteal",
|
|
"PyStackRef_IsExactly",
|
|
"PyStackRef_FromPyObjectStealMortal",
|
|
"PyStackRef_IsNone",
|
|
"PyStackRef_Is",
|
|
"PyStackRef_IsHeapSafe",
|
|
"PyStackRef_IsTrue",
|
|
"PyStackRef_IsFalse",
|
|
"PyStackRef_IsNull",
|
|
"PyStackRef_MakeHeapSafe",
|
|
"PyStackRef_None",
|
|
"PyStackRef_RefcountOnObject",
|
|
"PyStackRef_TYPE",
|
|
"PyStackRef_True",
|
|
"PyTuple_GET_ITEM",
|
|
"PyTuple_GET_SIZE",
|
|
"PyType_HasFeature",
|
|
"PyUnicode_Concat",
|
|
"PyUnicode_GET_LENGTH",
|
|
"PyUnicode_READ_CHAR",
|
|
"Py_ARRAY_LENGTH",
|
|
"Py_FatalError",
|
|
"Py_INCREF",
|
|
"Py_IS_TYPE",
|
|
"Py_NewRef",
|
|
"Py_REFCNT",
|
|
"Py_SIZE",
|
|
"Py_TYPE",
|
|
"Py_UNREACHABLE",
|
|
"Py_Unicode_GET_LENGTH",
|
|
"_PyCode_CODE",
|
|
"_PyDictValues_AddToInsertionOrder",
|
|
"_PyErr_Occurred",
|
|
"_PyFloat_FromDouble_ConsumeInputs",
|
|
"_PyFrame_GetBytecode",
|
|
"_PyFrame_GetCode",
|
|
"_PyFrame_IsIncomplete",
|
|
"_PyFrame_PushUnchecked",
|
|
"_PyFrame_SetStackPointer",
|
|
"_PyFrame_StackPush",
|
|
"_PyFunction_SetVersion",
|
|
"_PyGen_GetGeneratorFromFrame",
|
|
"_PyInterpreterState_GET",
|
|
"_PyList_AppendTakeRef",
|
|
"_PyList_ITEMS",
|
|
"_PyLong_CompactValue",
|
|
"_PyLong_DigitCount",
|
|
"_PyLong_IsCompact",
|
|
"_PyLong_IsNegative",
|
|
"_PyLong_IsNonNegativeCompact",
|
|
"_PyLong_IsZero",
|
|
"_PyLong_BothAreCompact",
|
|
"_PyCompactLong_Add",
|
|
"_PyCompactLong_Multiply",
|
|
"_PyCompactLong_Subtract",
|
|
"_PyManagedDictPointer_IsValues",
|
|
"_PyObject_GC_IS_SHARED",
|
|
"_PyObject_GC_IS_TRACKED",
|
|
"_PyObject_GC_MAY_BE_TRACKED",
|
|
"_PyObject_GC_TRACK",
|
|
"_PyObject_GetManagedDict",
|
|
"_PyObject_InlineValues",
|
|
"_PyObject_IsUniquelyReferenced",
|
|
"_PyObject_ManagedDictPointer",
|
|
"_PyThreadState_HasStackSpace",
|
|
"_PyTuple_FromStackRefStealOnSuccess",
|
|
"_PyTuple_ITEMS",
|
|
"_PyType_HasFeature",
|
|
"_PyType_NewManagedObject",
|
|
"_PyUnicode_Equal",
|
|
"_PyUnicode_JoinArray",
|
|
"_Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY",
|
|
"_Py_DECREF_NO_DEALLOC",
|
|
"_Py_ID",
|
|
"_Py_IsImmortal",
|
|
"_Py_IsOwnedByCurrentThread",
|
|
"_Py_LeaveRecursiveCallPy",
|
|
"_Py_LeaveRecursiveCallTstate",
|
|
"_Py_NewRef",
|
|
"_Py_SINGLETON",
|
|
"_Py_STR",
|
|
"_Py_TryIncrefCompare",
|
|
"_Py_TryIncrefCompareStackRef",
|
|
"_Py_atomic_compare_exchange_uint8",
|
|
"_Py_atomic_load_ptr_acquire",
|
|
"_Py_atomic_load_uintptr_relaxed",
|
|
"_Py_set_eval_breaker_bit",
|
|
"advance_backoff_counter",
|
|
"assert",
|
|
"backoff_counter_triggers",
|
|
"initial_temperature_backoff_counter",
|
|
"JUMP_TO_LABEL",
|
|
"restart_backoff_counter",
|
|
"_Py_ReachedRecursionLimit",
|
|
"PyStackRef_IsTaggedInt",
|
|
"PyStackRef_TagInt",
|
|
"PyStackRef_UntagInt",
|
|
"PyStackRef_IncrementTaggedIntNoOverflow",
|
|
"PyStackRef_IsNullOrInt",
|
|
"PyStackRef_IsError",
|
|
"PyStackRef_IsValid",
|
|
"PyStackRef_Wrap",
|
|
"PyStackRef_Unwrap",
|
|
"_PyLong_CheckExactAndCompact",
|
|
"_PyExecutor_FromExit",
|
|
"_PyJit_TryInitializeTracing",
|
|
"_Py_unset_eval_breaker_bit",
|
|
"_Py_set_eval_breaker_bit",
|
|
"trigger_backoff_counter",
|
|
)
|
|
|
|
|
|
def check_escaping_calls(instr: parser.CodeDef, escapes: dict[SimpleStmt, EscapingCall]) -> None:
|
|
error: lexer.Token | None = None
|
|
calls = {e.call for e in escapes.values()}
|
|
|
|
def visit(stmt: Stmt) -> None:
|
|
nonlocal error
|
|
if isinstance(stmt, IfStmt) or isinstance(stmt, WhileStmt):
|
|
for tkn in stmt.condition:
|
|
if tkn in calls:
|
|
error = tkn
|
|
elif isinstance(stmt, SimpleStmt):
|
|
in_if = 0
|
|
tkn_iter = iter(stmt.contents)
|
|
for tkn in tkn_iter:
|
|
if tkn.kind == "IDENTIFIER" and tkn.text in ("DEOPT_IF", "ERROR_IF", "EXIT_IF", "HANDLE_PENDING_AND_DEOPT_IF", "AT_END_EXIT_IF"):
|
|
in_if = 1
|
|
next(tkn_iter)
|
|
elif tkn.kind == "LPAREN":
|
|
if in_if:
|
|
in_if += 1
|
|
elif tkn.kind == "RPAREN":
|
|
if in_if:
|
|
in_if -= 1
|
|
elif tkn in calls and in_if:
|
|
error = tkn
|
|
|
|
|
|
instr.block.accept(visit)
|
|
if error is not None:
|
|
raise analysis_error(f"Escaping call '{error.text} in condition", error)
|
|
|
|
def escaping_call_in_simple_stmt(stmt: SimpleStmt, result: dict[SimpleStmt, EscapingCall]) -> None:
|
|
tokens = stmt.contents
|
|
for idx, tkn in enumerate(tokens):
|
|
try:
|
|
next_tkn = tokens[idx+1]
|
|
except IndexError:
|
|
break
|
|
if next_tkn.kind != lexer.LPAREN:
|
|
continue
|
|
if tkn.kind == lexer.IDENTIFIER:
|
|
if tkn.text.upper() == tkn.text:
|
|
# simple macro
|
|
continue
|
|
#if not tkn.text.startswith(("Py", "_Py", "monitor")):
|
|
# continue
|
|
if tkn.text.startswith(("sym_", "optimize_", "PyJitRef")):
|
|
# Optimize functions
|
|
continue
|
|
if tkn.text.endswith("Check"):
|
|
continue
|
|
if tkn.text.startswith("Py_Is"):
|
|
continue
|
|
if tkn.text.endswith("CheckExact"):
|
|
continue
|
|
if tkn.text in NON_ESCAPING_FUNCTIONS:
|
|
continue
|
|
elif tkn.kind == "RPAREN":
|
|
prev = tokens[idx-1]
|
|
if prev.text.endswith("_t") or prev.text == "*" or prev.text == "int":
|
|
#cast
|
|
continue
|
|
elif tkn.kind != "RBRACKET":
|
|
continue
|
|
if tkn.text in ("PyStackRef_CLOSE", "PyStackRef_XCLOSE"):
|
|
if len(tokens) <= idx+2:
|
|
raise analysis_error("Unexpected end of file", next_tkn)
|
|
kills = tokens[idx+2]
|
|
if kills.kind != "IDENTIFIER":
|
|
raise analysis_error(f"Expected identifier, got '{kills.text}'", kills)
|
|
else:
|
|
kills = None
|
|
result[stmt] = EscapingCall(stmt, tkn, kills)
|
|
|
|
|
|
def find_escaping_api_calls(instr: parser.CodeDef) -> dict[SimpleStmt, EscapingCall]:
|
|
result: dict[SimpleStmt, EscapingCall] = {}
|
|
|
|
def visit(stmt: Stmt) -> None:
|
|
if not isinstance(stmt, SimpleStmt):
|
|
return
|
|
escaping_call_in_simple_stmt(stmt, result)
|
|
|
|
instr.block.accept(visit)
|
|
check_escaping_calls(instr, result)
|
|
return result
|
|
|
|
|
|
EXITS = {
|
|
"DISPATCH",
|
|
"Py_UNREACHABLE",
|
|
"DISPATCH_INLINED",
|
|
"DISPATCH_GOTO",
|
|
}
|
|
|
|
|
|
def always_exits(op: parser.CodeDef) -> bool:
|
|
depth = 0
|
|
tkn_iter = iter(op.tokens)
|
|
for tkn in tkn_iter:
|
|
if tkn.kind == "LBRACE":
|
|
depth += 1
|
|
elif tkn.kind == "RBRACE":
|
|
depth -= 1
|
|
elif depth > 1:
|
|
continue
|
|
elif tkn.kind == "GOTO" or tkn.kind == "RETURN":
|
|
return True
|
|
elif tkn.kind == "KEYWORD":
|
|
if tkn.text in EXITS:
|
|
return True
|
|
elif tkn.kind == "IDENTIFIER":
|
|
if tkn.text in EXITS:
|
|
return True
|
|
if tkn.text == "DEOPT_IF" or tkn.text == "ERROR_IF":
|
|
next(tkn_iter) # '('
|
|
t = next(tkn_iter)
|
|
if t.text in ("true", "1"):
|
|
return True
|
|
return False
|
|
|
|
|
|
def stack_effect_only_peeks(instr: parser.InstDef) -> bool:
|
|
stack_inputs = [s for s in instr.inputs if not isinstance(s, parser.CacheEffect)]
|
|
if len(stack_inputs) != len(instr.outputs):
|
|
return False
|
|
if len(stack_inputs) == 0:
|
|
return False
|
|
return all(
|
|
(s.name == other.name and s.size == other.size)
|
|
for s, other in zip(stack_inputs, instr.outputs)
|
|
)
|
|
|
|
|
|
def stmt_is_simple_exit(stmt: Stmt) -> bool:
|
|
if not isinstance(stmt, SimpleStmt):
|
|
return False
|
|
tokens = stmt.contents
|
|
if len(tokens) < 4:
|
|
return False
|
|
return (
|
|
tokens[0].text in ("ERROR_IF", "DEOPT_IF", "EXIT_IF", "AT_END_EXIT_IF")
|
|
and
|
|
tokens[1].text == "("
|
|
and
|
|
tokens[2].text in ("true", "1")
|
|
and
|
|
tokens[3].text == ")"
|
|
)
|
|
|
|
|
|
def stmt_list_escapes(stmts: list[Stmt]) -> bool:
|
|
if not stmts:
|
|
return False
|
|
if stmt_is_simple_exit(stmts[-1]):
|
|
return False
|
|
for stmt in stmts:
|
|
if stmt_escapes(stmt):
|
|
return True
|
|
return False
|
|
|
|
|
|
def stmt_escapes(stmt: Stmt) -> bool:
|
|
if isinstance(stmt, BlockStmt):
|
|
return stmt_list_escapes(stmt.body)
|
|
elif isinstance(stmt, SimpleStmt):
|
|
for tkn in stmt.contents:
|
|
if tkn.text == "DECREF_INPUTS":
|
|
return True
|
|
d: dict[SimpleStmt, EscapingCall] = {}
|
|
escaping_call_in_simple_stmt(stmt, d)
|
|
return bool(d)
|
|
elif isinstance(stmt, IfStmt):
|
|
if stmt.else_body and stmt_escapes(stmt.else_body):
|
|
return True
|
|
return stmt_escapes(stmt.body)
|
|
elif isinstance(stmt, MacroIfStmt):
|
|
if stmt.else_body and stmt_list_escapes(stmt.else_body):
|
|
return True
|
|
return stmt_list_escapes(stmt.body)
|
|
elif isinstance(stmt, ForStmt):
|
|
return stmt_escapes(stmt.body)
|
|
elif isinstance(stmt, WhileStmt):
|
|
return stmt_escapes(stmt.body)
|
|
else:
|
|
assert False, "Unexpected statement type"
|
|
|
|
def stmt_has_jump_on_unpredictable_path_body(stmts: list[Stmt] | None, branches_seen: int) -> tuple[bool, int]:
|
|
if not stmts:
|
|
return False, branches_seen
|
|
predict = False
|
|
seen = 0
|
|
for st in stmts:
|
|
predict_body, seen_body = stmt_has_jump_on_unpredictable_path(st, branches_seen)
|
|
predict = predict or predict_body
|
|
seen += seen_body
|
|
return predict, seen
|
|
|
|
def stmt_has_jump_on_unpredictable_path(stmt: Stmt, branches_seen: int) -> tuple[bool, int]:
|
|
if isinstance(stmt, BlockStmt):
|
|
return stmt_has_jump_on_unpredictable_path_body(stmt.body, branches_seen)
|
|
elif isinstance(stmt, SimpleStmt):
|
|
for tkn in stmt.contents:
|
|
if tkn.text == "JUMPBY":
|
|
return True, branches_seen
|
|
return False, branches_seen
|
|
elif isinstance(stmt, IfStmt):
|
|
predict, seen = stmt_has_jump_on_unpredictable_path(stmt.body, branches_seen)
|
|
if stmt.else_body:
|
|
predict_else, seen_else = stmt_has_jump_on_unpredictable_path(stmt.else_body, branches_seen)
|
|
return predict != predict_else, seen + seen_else + 1
|
|
return predict, seen + 1
|
|
elif isinstance(stmt, MacroIfStmt):
|
|
predict, seen = stmt_has_jump_on_unpredictable_path_body(stmt.body, branches_seen)
|
|
if stmt.else_body:
|
|
predict_else, seen_else = stmt_has_jump_on_unpredictable_path_body(stmt.else_body, branches_seen)
|
|
return predict != predict_else, seen + seen_else
|
|
return predict, seen
|
|
elif isinstance(stmt, ForStmt):
|
|
unpredictable, branches_seen = stmt_has_jump_on_unpredictable_path(stmt.body, branches_seen)
|
|
return unpredictable, branches_seen + 1
|
|
elif isinstance(stmt, WhileStmt):
|
|
unpredictable, branches_seen = stmt_has_jump_on_unpredictable_path(stmt.body, branches_seen)
|
|
return unpredictable, branches_seen + 1
|
|
else:
|
|
assert False, f"Unexpected statement type {stmt}"
|
|
|
|
|
|
def compute_properties(op: parser.CodeDef) -> Properties:
|
|
escaping_calls = find_escaping_api_calls(op)
|
|
has_free = (
|
|
variable_used(op, "PyCell_New")
|
|
or variable_used(op, "PyCell_GetRef")
|
|
or variable_used(op, "PyCell_SetTakeRef")
|
|
or variable_used(op, "PyCell_SwapTakeRef")
|
|
)
|
|
deopts_if = variable_used(op, "DEOPT_IF")
|
|
exits_if = variable_used(op, "EXIT_IF") or variable_used(op, "AT_END_EXIT_IF")
|
|
deopts_periodic = variable_used(op, "HANDLE_PENDING_AND_DEOPT_IF")
|
|
exits_and_deopts = sum((deopts_if, exits_if, deopts_periodic))
|
|
if exits_and_deopts > 1:
|
|
tkn = op.tokens[0]
|
|
raise lexer.make_syntax_error(
|
|
"Op cannot contain more than one of EXIT_IF, DEOPT_IF and HANDLE_PENDING_AND_DEOPT_IF",
|
|
tkn.filename,
|
|
tkn.line,
|
|
tkn.column,
|
|
op.name,
|
|
)
|
|
error_with_pop = has_error_with_pop(op)
|
|
error_without_pop = has_error_without_pop(op)
|
|
escapes = stmt_escapes(op.block)
|
|
pure = False if isinstance(op, parser.LabelDef) else "pure" in op.annotations
|
|
no_save_ip = False if isinstance(op, parser.LabelDef) else "no_save_ip" in op.annotations
|
|
unpredictable, branches_seen = stmt_has_jump_on_unpredictable_path(op.block, 0)
|
|
unpredictable_jump = False if isinstance(op, parser.LabelDef) else (unpredictable and branches_seen > 0)
|
|
return Properties(
|
|
escaping_calls=escaping_calls,
|
|
escapes=escapes,
|
|
error_with_pop=error_with_pop,
|
|
error_without_pop=error_without_pop,
|
|
deopts=deopts_if,
|
|
deopts_periodic=deopts_periodic,
|
|
side_exit=exits_if,
|
|
oparg=oparg_used(op),
|
|
jumps=variable_used(op, "JUMPBY"),
|
|
eval_breaker="CHECK_PERIODIC" in op.name,
|
|
needs_this=variable_used(op, "this_instr"),
|
|
always_exits=always_exits(op),
|
|
stores_sp=variable_used(op, "SYNC_SP"),
|
|
uses_co_consts=variable_used(op, "FRAME_CO_CONSTS"),
|
|
uses_co_names=variable_used(op, "FRAME_CO_NAMES"),
|
|
uses_locals=variable_used(op, "GETLOCAL") and not has_free,
|
|
uses_opcode=variable_used(op, "opcode"),
|
|
has_free=has_free,
|
|
pure=pure,
|
|
no_save_ip=no_save_ip,
|
|
tier=tier_variable(op),
|
|
needs_prev=variable_used(op, "prev_instr"),
|
|
needs_guard_ip=(isinstance(op, parser.InstDef)
|
|
and (unpredictable_jump and "replaced" not in op.annotations))
|
|
or variable_used(op, "LOAD_IP")
|
|
or variable_used(op, "DISPATCH_INLINED"),
|
|
unpredictable_jump=unpredictable_jump,
|
|
)
|
|
|
|
def expand(items: list[StackItem], oparg: int) -> list[StackItem]:
|
|
# Only replace array item with scalar if no more than one item is an array
|
|
index = -1
|
|
for i, item in enumerate(items):
|
|
if "oparg" in item.size:
|
|
if index >= 0:
|
|
return items
|
|
index = i
|
|
if index < 0:
|
|
return items
|
|
try:
|
|
count = int(eval(items[index].size.replace("oparg", str(oparg))))
|
|
except ValueError:
|
|
return items
|
|
return items[:index] + [
|
|
StackItem(items[index].name + f"_{i}", "", items[index].peek, items[index].used) for i in range(count)
|
|
] + items[index+1:]
|
|
|
|
def scalarize_stack(stack: StackEffect, oparg: int) -> StackEffect:
|
|
stack.inputs = expand(stack.inputs, oparg)
|
|
stack.outputs = expand(stack.outputs, oparg)
|
|
return stack
|
|
|
|
def make_uop(
|
|
name: str,
|
|
op: parser.InstDef,
|
|
inputs: list[parser.InputEffect],
|
|
uops: dict[str, Uop],
|
|
) -> Uop:
|
|
result = Uop(
|
|
name=name,
|
|
context=op.context,
|
|
annotations=op.annotations,
|
|
stack=analyze_stack(op),
|
|
caches=analyze_caches(inputs),
|
|
local_stores=find_variable_stores(op),
|
|
body=op.block,
|
|
properties=compute_properties(op),
|
|
)
|
|
for anno in op.annotations:
|
|
if anno.startswith("replicate"):
|
|
text = anno[10:-1]
|
|
start, stop = text.split(":")
|
|
result.replicated = range(int(start), int(stop))
|
|
break
|
|
else:
|
|
return result
|
|
for oparg in result.replicated:
|
|
name_x = name + "_" + str(oparg)
|
|
properties = compute_properties(op)
|
|
properties.oparg = False
|
|
stack = analyze_stack(op)
|
|
if not variable_used(op, "oparg"):
|
|
stack = scalarize_stack(stack, oparg)
|
|
else:
|
|
properties.const_oparg = oparg
|
|
rep = Uop(
|
|
name=name_x,
|
|
context=op.context,
|
|
annotations=op.annotations,
|
|
stack=stack,
|
|
caches=analyze_caches(inputs),
|
|
local_stores=find_variable_stores(op),
|
|
body=op.block,
|
|
properties=properties,
|
|
)
|
|
rep.replicates = result
|
|
uops[name_x] = rep
|
|
|
|
return result
|
|
|
|
|
|
def add_op(op: parser.InstDef, uops: dict[str, Uop]) -> None:
|
|
assert op.kind == "op"
|
|
if op.name in uops:
|
|
if "override" not in op.annotations:
|
|
raise override_error(
|
|
op.name, op.context, uops[op.name].context, op.tokens[0]
|
|
)
|
|
uops[op.name] = make_uop(op.name, op, op.inputs, uops)
|
|
|
|
|
|
def add_instruction(
|
|
where: lexer.Token,
|
|
name: str,
|
|
parts: list[Part],
|
|
instructions: dict[str, Instruction],
|
|
) -> None:
|
|
instructions[name] = Instruction(where, name, parts, None)
|
|
|
|
|
|
def desugar_inst(
|
|
inst: parser.InstDef, instructions: dict[str, Instruction], uops: dict[str, Uop]
|
|
) -> None:
|
|
assert inst.kind == "inst"
|
|
name = inst.name
|
|
op_inputs: list[parser.InputEffect] = []
|
|
parts: list[Part] = []
|
|
uop_index = -1
|
|
# Move unused cache entries to the Instruction, removing them from the Uop.
|
|
for input in inst.inputs:
|
|
if isinstance(input, parser.CacheEffect) and input.name == "unused":
|
|
parts.append(Skip(input.size))
|
|
else:
|
|
op_inputs.append(input)
|
|
if uop_index < 0:
|
|
uop_index = len(parts)
|
|
# Place holder for the uop.
|
|
parts.append(Skip(0))
|
|
uop = make_uop("_" + inst.name, inst, op_inputs, uops)
|
|
uop.implicitly_created = True
|
|
uops[inst.name] = uop
|
|
if uop_index < 0:
|
|
parts.append(uop)
|
|
else:
|
|
parts[uop_index] = uop
|
|
add_instruction(inst.first_token, name, parts, instructions)
|
|
|
|
|
|
def add_macro(
|
|
macro: parser.Macro, instructions: dict[str, Instruction], uops: dict[str, Uop]
|
|
) -> None:
|
|
parts: list[Part] = []
|
|
for part in macro.uops:
|
|
match part:
|
|
case parser.OpName():
|
|
if part.name == "flush":
|
|
parts.append(Flush())
|
|
else:
|
|
if part.name not in uops:
|
|
raise analysis_error(
|
|
f"No Uop named {part.name}", macro.tokens[0]
|
|
)
|
|
parts.append(uops[part.name])
|
|
case parser.CacheEffect():
|
|
parts.append(Skip(part.size))
|
|
case _:
|
|
assert False
|
|
assert parts
|
|
add_instruction(macro.first_token, macro.name, parts, instructions)
|
|
|
|
|
|
def add_family(
|
|
pfamily: parser.Family,
|
|
instructions: dict[str, Instruction],
|
|
families: dict[str, Family],
|
|
) -> None:
|
|
family = Family(
|
|
pfamily.name,
|
|
pfamily.size,
|
|
[instructions[member_name] for member_name in pfamily.members],
|
|
)
|
|
for member in family.members:
|
|
member.family = family
|
|
# The head of the family is an implicit jump target for DEOPTs
|
|
instructions[family.name].is_target = True
|
|
families[family.name] = family
|
|
|
|
|
|
def add_pseudo(
|
|
pseudo: parser.Pseudo,
|
|
instructions: dict[str, Instruction],
|
|
pseudos: dict[str, PseudoInstruction],
|
|
) -> None:
|
|
pseudos[pseudo.name] = PseudoInstruction(
|
|
pseudo.name,
|
|
analyze_stack(pseudo),
|
|
[instructions[target] for target in pseudo.targets],
|
|
pseudo.as_sequence,
|
|
pseudo.flags,
|
|
)
|
|
|
|
|
|
def add_label(
|
|
label: parser.LabelDef,
|
|
labels: dict[str, Label],
|
|
) -> None:
|
|
properties = compute_properties(label)
|
|
labels[label.name] = Label(label.name, label.spilled, label.block, properties)
|
|
|
|
|
|
def assign_opcodes(
|
|
instructions: dict[str, Instruction],
|
|
families: dict[str, Family],
|
|
pseudos: dict[str, PseudoInstruction],
|
|
) -> tuple[dict[str, int], int, int]:
|
|
"""Assigns opcodes, then returns the opmap,
|
|
have_arg and min_instrumented values"""
|
|
instmap: dict[str, int] = {}
|
|
|
|
# 0 is reserved for cache entries. This helps debugging.
|
|
instmap["CACHE"] = 0
|
|
|
|
# 17 is reserved as it is the initial value for the specializing counter.
|
|
# This helps catch cases where we attempt to execute a cache.
|
|
instmap["RESERVED"] = 17
|
|
|
|
# 128 is RESUME - it is hard coded as such in Tools/build/deepfreeze.py
|
|
instmap["RESUME"] = 128
|
|
|
|
# This is an historical oddity.
|
|
instmap["BINARY_OP_INPLACE_ADD_UNICODE"] = 3
|
|
|
|
instmap["INSTRUMENTED_LINE"] = 254
|
|
instmap["ENTER_EXECUTOR"] = 255
|
|
|
|
instrumented = [name for name in instructions if name.startswith("INSTRUMENTED")]
|
|
|
|
specialized: set[str] = set()
|
|
no_arg: list[str] = []
|
|
has_arg: list[str] = []
|
|
|
|
for family in families.values():
|
|
specialized.update(inst.name for inst in family.members)
|
|
|
|
for inst in instructions.values():
|
|
name = inst.name
|
|
if name in specialized:
|
|
continue
|
|
if name in instrumented:
|
|
continue
|
|
if inst.properties.oparg:
|
|
has_arg.append(name)
|
|
else:
|
|
no_arg.append(name)
|
|
|
|
# Specialized ops appear in their own section
|
|
# Instrumented opcodes are at the end of the valid range
|
|
min_internal = instmap["RESUME"] + 1
|
|
min_instrumented = 254 - (len(instrumented) - 1)
|
|
assert min_internal + len(specialized) < min_instrumented
|
|
|
|
next_opcode = 1
|
|
|
|
def add_instruction(name: str) -> None:
|
|
nonlocal next_opcode
|
|
if name in instmap:
|
|
return # Pre-defined name
|
|
while next_opcode in instmap.values():
|
|
next_opcode += 1
|
|
instmap[name] = next_opcode
|
|
next_opcode += 1
|
|
|
|
for name in sorted(no_arg):
|
|
add_instruction(name)
|
|
for name in sorted(has_arg):
|
|
add_instruction(name)
|
|
# For compatibility
|
|
next_opcode = min_internal
|
|
for name in sorted(specialized):
|
|
add_instruction(name)
|
|
next_opcode = min_instrumented
|
|
for name in instrumented:
|
|
add_instruction(name)
|
|
|
|
for name in instructions:
|
|
instructions[name].opcode = instmap[name]
|
|
|
|
for op, name in enumerate(sorted(pseudos), 256):
|
|
instmap[name] = op
|
|
pseudos[name].opcode = op
|
|
|
|
return instmap, len(no_arg), min_instrumented
|
|
|
|
|
|
def get_instruction_size_for_uop(instructions: dict[str, Instruction], uop: Uop) -> int | None:
|
|
"""Return the size of the instruction that contains the given uop or
|
|
`None` if the uop does not contains the `INSTRUCTION_SIZE` macro.
|
|
|
|
If there is more than one instruction that contains the uop,
|
|
ensure that they all have the same size.
|
|
"""
|
|
for tkn in uop.body.tokens():
|
|
if tkn.text == "INSTRUCTION_SIZE":
|
|
break
|
|
else:
|
|
return None
|
|
|
|
size = None
|
|
for inst in instructions.values():
|
|
if uop in inst.parts:
|
|
if size is None:
|
|
size = inst.size
|
|
if size != inst.size:
|
|
raise analysis_error(
|
|
"All instructions containing a uop with the `INSTRUCTION_SIZE` macro "
|
|
f"must have the same size: {size} != {inst.size}",
|
|
tkn
|
|
)
|
|
if size is None:
|
|
raise analysis_error(f"No instruction containing the uop '{uop.name}' was found", tkn)
|
|
return size
|
|
|
|
|
|
def analyze_forest(forest: list[parser.AstNode]) -> Analysis:
|
|
instructions: dict[str, Instruction] = {}
|
|
uops: dict[str, Uop] = {}
|
|
families: dict[str, Family] = {}
|
|
pseudos: dict[str, PseudoInstruction] = {}
|
|
labels: dict[str, Label] = {}
|
|
for node in forest:
|
|
match node:
|
|
case parser.InstDef(name):
|
|
if node.kind == "inst":
|
|
desugar_inst(node, instructions, uops)
|
|
else:
|
|
assert node.kind == "op"
|
|
add_op(node, uops)
|
|
case parser.Macro():
|
|
pass
|
|
case parser.Family():
|
|
pass
|
|
case parser.Pseudo():
|
|
pass
|
|
case parser.LabelDef():
|
|
pass
|
|
case _:
|
|
assert False
|
|
for node in forest:
|
|
if isinstance(node, parser.Macro):
|
|
add_macro(node, instructions, uops)
|
|
for node in forest:
|
|
match node:
|
|
case parser.Family():
|
|
add_family(node, instructions, families)
|
|
case parser.Pseudo():
|
|
add_pseudo(node, instructions, pseudos)
|
|
case parser.LabelDef():
|
|
add_label(node, labels)
|
|
case _:
|
|
pass
|
|
for uop in uops.values():
|
|
uop.instruction_size = get_instruction_size_for_uop(instructions, uop)
|
|
# Special case BINARY_OP_INPLACE_ADD_UNICODE
|
|
# BINARY_OP_INPLACE_ADD_UNICODE is not a normal family member,
|
|
# as it is the wrong size, but we need it to maintain an
|
|
# historical optimization.
|
|
if "BINARY_OP_INPLACE_ADD_UNICODE" in instructions:
|
|
inst = instructions["BINARY_OP_INPLACE_ADD_UNICODE"]
|
|
inst.family = families["BINARY_OP"]
|
|
families["BINARY_OP"].members.append(inst)
|
|
opmap, first_arg, min_instrumented = assign_opcodes(instructions, families, pseudos)
|
|
return Analysis(
|
|
instructions, uops, families, pseudos, labels, opmap, first_arg, min_instrumented
|
|
)
|
|
|
|
|
|
def analyze_files(filenames: list[str]) -> Analysis:
|
|
return analyze_forest(parser.parse_files(filenames))
|
|
|
|
|
|
def dump_analysis(analysis: Analysis) -> None:
|
|
print("Uops:")
|
|
for u in analysis.uops.values():
|
|
u.dump(" ")
|
|
print("Instructions:")
|
|
for i in analysis.instructions.values():
|
|
i.dump(" ")
|
|
print("Families:")
|
|
for f in analysis.families.values():
|
|
f.dump(" ")
|
|
print("Pseudos:")
|
|
for p in analysis.pseudos.values():
|
|
p.dump(" ")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
if len(sys.argv) < 2:
|
|
print("No input")
|
|
else:
|
|
filenames = sys.argv[1:]
|
|
dump_analysis(analyze_files(filenames))
|