Data flow improvements in pseudo code generator
This commit is contained in:
@@ -29,6 +29,27 @@ BRANCH_CONDITIONS = {
|
||||
"BLE": "Z || (N != V)",
|
||||
}
|
||||
|
||||
NEGATED_BRANCH_CONDITIONS = {
|
||||
"BRN": "1",
|
||||
"BHI": "C || Z",
|
||||
"BLS": "!C && !Z",
|
||||
"BCC": "C",
|
||||
"BCS": "!C",
|
||||
"BNE": "Z",
|
||||
"BEQ": "!Z",
|
||||
"BVC": "V",
|
||||
"BVS": "!V",
|
||||
"BPL": "N",
|
||||
"BMI": "!N",
|
||||
"BGE": "N != V",
|
||||
"BLT": "N == V",
|
||||
"BGT": "Z || (N != V)",
|
||||
"BLE": "!Z && (N == V)",
|
||||
}
|
||||
|
||||
_MAX_STRUCTURED_IF_BODY = 8
|
||||
_MAX_STRUCTURED_LOOP_BODY = 24
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PseudocodeOptions:
|
||||
@@ -37,6 +58,22 @@ class PseudocodeOptions:
|
||||
include_cycles: bool = False
|
||||
emit_declarations: bool = True
|
||||
max_functions: int | None = None
|
||||
structured: bool = True
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _IfCandidate:
|
||||
target_index: int
|
||||
target_address: int
|
||||
condition: str
|
||||
instruction: JsonObject
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class _LoopCandidate:
|
||||
end_index: int
|
||||
condition: str
|
||||
instruction: JsonObject
|
||||
|
||||
|
||||
def generate_pseudocode(
|
||||
@@ -55,7 +92,7 @@ def generate_pseudocode(
|
||||
lines: list[str] = []
|
||||
lines.extend(_file_header(source_name, payload))
|
||||
if opts.emit_declarations:
|
||||
lines.extend(_declarations(instructions, functions, label_names))
|
||||
lines.extend(_declarations(payload, instructions, functions, label_names))
|
||||
|
||||
by_address = {int(ins["address"]): ins for ins in instructions}
|
||||
all_addresses = sorted(by_address)
|
||||
@@ -111,6 +148,7 @@ def main(argv: list[str] | None = None) -> int:
|
||||
parser.add_argument("--no-addresses", action="store_true", help="omit instruction addresses from line comments")
|
||||
parser.add_argument("--cycles", action="store_true", help="include cycle estimates when present in JSON")
|
||||
parser.add_argument("--no-declarations", action="store_true", help="omit register/function declarations")
|
||||
parser.add_argument("--no-structure", action="store_true", help="preserve label/goto output without if/loop structuring")
|
||||
parser.add_argument("--max-functions", type=int, default=None, help="emit only the first N functions")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
@@ -120,6 +158,7 @@ def main(argv: list[str] | None = None) -> int:
|
||||
include_cycles=args.cycles,
|
||||
emit_declarations=not args.no_declarations,
|
||||
max_functions=args.max_functions,
|
||||
structured=not args.no_structure,
|
||||
)
|
||||
write_pseudocode(args.input, args.out, options)
|
||||
print(f"wrote {args.out}")
|
||||
@@ -159,7 +198,12 @@ def _file_header(source_name: str, payload: JsonObject) -> list[str]:
|
||||
]
|
||||
|
||||
|
||||
def _declarations(instructions: list[JsonObject], functions: list[JsonObject], labels: dict[int, str]) -> list[str]:
|
||||
def _declarations(
|
||||
payload: JsonObject,
|
||||
instructions: list[JsonObject],
|
||||
functions: list[JsonObject],
|
||||
labels: dict[int, str],
|
||||
) -> list[str]:
|
||||
lines: list[str] = []
|
||||
registers = _referenced_io_registers(instructions)
|
||||
if registers:
|
||||
@@ -169,6 +213,18 @@ def _declarations(instructions: list[JsonObject], functions: list[JsonObject], l
|
||||
lines.append(f"extern volatile {c_type} {c_identifier(name)}; /* 0x{address:04X} */")
|
||||
lines.append("")
|
||||
|
||||
memory_symbols = _referenced_memory_symbols(payload)
|
||||
if memory_symbols:
|
||||
lines.append("/* RAM/external symbols inferred from instruction references and data tables. */")
|
||||
for symbol in memory_symbols:
|
||||
c_type = "u16" if symbol.get("width") == "word" else "u8"
|
||||
width = symbol.get("width") or "unknown"
|
||||
lines.append(
|
||||
f"extern volatile {c_type} {c_identifier(str(symbol['name']))}; "
|
||||
f"/* 0x{int(symbol['address']):04X} {symbol['kind']} {width} */"
|
||||
)
|
||||
lines.append("")
|
||||
|
||||
if functions:
|
||||
lines.append("/* Function entry points discovered from vectors and call targets. */")
|
||||
for function in functions:
|
||||
@@ -193,6 +249,20 @@ def _referenced_io_registers(instructions: list[JsonObject]) -> dict[str, tuple[
|
||||
return registers
|
||||
|
||||
|
||||
def _referenced_memory_symbols(payload: JsonObject) -> list[JsonObject]:
|
||||
symbols = payload.get("symbols", {}).get("symbols", [])
|
||||
if not isinstance(symbols, list):
|
||||
return []
|
||||
memory_symbols: list[JsonObject] = []
|
||||
for symbol in symbols:
|
||||
if not isinstance(symbol, dict) or symbol.get("kind") == "register":
|
||||
continue
|
||||
if not symbol.get("name") or symbol.get("address") is None:
|
||||
continue
|
||||
memory_symbols.append(symbol)
|
||||
return sorted(memory_symbols, key=lambda symbol: int(symbol["address"]))
|
||||
|
||||
|
||||
def _collect_label_names(payload: JsonObject) -> dict[int, str]:
|
||||
labels: dict[int, str] = {}
|
||||
for vector in payload.get("vectors", []):
|
||||
@@ -258,13 +328,7 @@ def _render_function(
|
||||
if sources:
|
||||
lines.append(f" /* vector sources: {', '.join(str(source) for source in sources)} */")
|
||||
|
||||
for address in addresses:
|
||||
if address in local_targets and address != start:
|
||||
lines.append(f"{labels.get(address, _label_for(address))}:")
|
||||
ins = by_address[address]
|
||||
statement = _translate_instruction(ins, labels)
|
||||
comment = _line_comment(ins, opts)
|
||||
lines.append(f" {statement}{comment}")
|
||||
lines.extend(_render_instruction_block(addresses, by_address, labels, opts, local_targets, function_entry=start))
|
||||
|
||||
lines.append("}")
|
||||
lines.append("")
|
||||
@@ -281,16 +345,271 @@ def _render_orphan_block(
|
||||
local_targets = _local_target_addresses(addresses, by_address) | {
|
||||
address for address in addresses if address in labels
|
||||
}
|
||||
for address in addresses:
|
||||
if address in local_targets:
|
||||
lines.append(f"{labels.get(address, _label_for(address))}:")
|
||||
ins = by_address[address]
|
||||
lines.append(f" {_translate_instruction(ins, labels)}{_line_comment(ins, opts)}")
|
||||
lines.extend(_render_instruction_block(addresses, by_address, labels, opts, local_targets, function_entry=None))
|
||||
lines.append("}")
|
||||
lines.append("")
|
||||
return lines
|
||||
|
||||
|
||||
def _render_instruction_block(
|
||||
addresses: list[int],
|
||||
by_address: dict[int, JsonObject],
|
||||
labels: dict[int, str],
|
||||
opts: PseudocodeOptions,
|
||||
local_targets: set[int],
|
||||
*,
|
||||
function_entry: int | None,
|
||||
) -> list[str]:
|
||||
if not opts.structured:
|
||||
return _render_linear_block(
|
||||
addresses,
|
||||
by_address,
|
||||
labels,
|
||||
opts,
|
||||
local_targets,
|
||||
function_entry=function_entry,
|
||||
suppressed_labels=set(),
|
||||
indent=1,
|
||||
)
|
||||
|
||||
incoming = _incoming_local_targets(addresses, by_address)
|
||||
suppressed_labels: set[int] = set()
|
||||
return _render_structured_block(
|
||||
addresses,
|
||||
by_address,
|
||||
labels,
|
||||
opts,
|
||||
local_targets,
|
||||
incoming,
|
||||
function_entry=function_entry,
|
||||
suppressed_labels=suppressed_labels,
|
||||
indent=1,
|
||||
)
|
||||
|
||||
|
||||
def _render_structured_block(
|
||||
addresses: list[int],
|
||||
by_address: dict[int, JsonObject],
|
||||
labels: dict[int, str],
|
||||
opts: PseudocodeOptions,
|
||||
local_targets: set[int],
|
||||
incoming: dict[int, set[int]],
|
||||
*,
|
||||
function_entry: int | None,
|
||||
suppressed_labels: set[int],
|
||||
indent: int,
|
||||
) -> list[str]:
|
||||
lines: list[str] = []
|
||||
address_to_index = {address: index for index, address in enumerate(addresses)}
|
||||
index = 0
|
||||
while index < len(addresses):
|
||||
loop = _loop_candidate_at(index, addresses, address_to_index, by_address, local_targets, incoming)
|
||||
if loop:
|
||||
start_address = addresses[index]
|
||||
suppressed_labels.add(start_address)
|
||||
lines.append(f"{_indent(indent)}do {{")
|
||||
lines.extend(
|
||||
_render_structured_block(
|
||||
addresses[index : loop.end_index],
|
||||
by_address,
|
||||
labels,
|
||||
opts,
|
||||
local_targets,
|
||||
incoming,
|
||||
function_entry=function_entry,
|
||||
suppressed_labels=suppressed_labels,
|
||||
indent=indent + 1,
|
||||
)
|
||||
)
|
||||
lines.append(f"{_indent(indent)}}} while ({loop.condition});{_line_comment(loop.instruction, opts)}")
|
||||
index = loop.end_index + 1
|
||||
continue
|
||||
|
||||
if_candidate = _if_candidate_at(index, addresses, address_to_index, by_address, local_targets, incoming)
|
||||
if if_candidate:
|
||||
suppressed_labels.add(if_candidate.target_address)
|
||||
lines.append(f"{_indent(indent)}if ({if_candidate.condition}) {{{_line_comment(if_candidate.instruction, opts)}")
|
||||
lines.extend(
|
||||
_render_structured_block(
|
||||
addresses[index + 1 : if_candidate.target_index],
|
||||
by_address,
|
||||
labels,
|
||||
opts,
|
||||
local_targets,
|
||||
incoming,
|
||||
function_entry=function_entry,
|
||||
suppressed_labels=suppressed_labels,
|
||||
indent=indent + 1,
|
||||
)
|
||||
)
|
||||
lines.append(f"{_indent(indent)}}}")
|
||||
index = if_candidate.target_index
|
||||
continue
|
||||
|
||||
address = addresses[index]
|
||||
lines.extend(
|
||||
_render_linear_block(
|
||||
[address],
|
||||
by_address,
|
||||
labels,
|
||||
opts,
|
||||
local_targets,
|
||||
function_entry=function_entry,
|
||||
suppressed_labels=suppressed_labels,
|
||||
indent=indent,
|
||||
)
|
||||
)
|
||||
index += 1
|
||||
return lines
|
||||
|
||||
|
||||
def _render_linear_block(
|
||||
addresses: list[int],
|
||||
by_address: dict[int, JsonObject],
|
||||
labels: dict[int, str],
|
||||
opts: PseudocodeOptions,
|
||||
local_targets: set[int],
|
||||
*,
|
||||
function_entry: int | None,
|
||||
suppressed_labels: set[int],
|
||||
indent: int,
|
||||
) -> list[str]:
|
||||
lines: list[str] = []
|
||||
for address in addresses:
|
||||
if _should_emit_label(address, local_targets, function_entry, suppressed_labels):
|
||||
lines.append(f"{_indent(max(indent - 1, 0))}{labels.get(address, _label_for(address))}:")
|
||||
ins = by_address[address]
|
||||
lines.append(f"{_indent(indent)}{_translate_instruction(ins, labels)}{_line_comment(ins, opts)}")
|
||||
return lines
|
||||
|
||||
|
||||
def _if_candidate_at(
|
||||
index: int,
|
||||
addresses: list[int],
|
||||
address_to_index: dict[int, int],
|
||||
by_address: dict[int, JsonObject],
|
||||
local_targets: set[int],
|
||||
incoming: dict[int, set[int]],
|
||||
) -> _IfCandidate | None:
|
||||
address = addresses[index]
|
||||
ins = by_address[address]
|
||||
base = _conditional_branch_base(ins)
|
||||
if not base:
|
||||
return None
|
||||
|
||||
target = _single_target_address(ins)
|
||||
if target is None or target <= address or target not in address_to_index:
|
||||
return None
|
||||
|
||||
target_index = address_to_index[target]
|
||||
body_addresses = addresses[index + 1 : target_index]
|
||||
if not body_addresses or len(body_addresses) > _MAX_STRUCTURED_IF_BODY:
|
||||
return None
|
||||
if not _is_straight_line_span(body_addresses, by_address):
|
||||
return None
|
||||
if any(body_address in local_targets for body_address in body_addresses):
|
||||
return None
|
||||
if any(incoming.get(body_address) for body_address in body_addresses):
|
||||
return None
|
||||
if incoming.get(target, set()) != {address}:
|
||||
return None
|
||||
|
||||
return _IfCandidate(
|
||||
target_index=target_index,
|
||||
target_address=target,
|
||||
condition=NEGATED_BRANCH_CONDITIONS[base],
|
||||
instruction=ins,
|
||||
)
|
||||
|
||||
|
||||
def _loop_candidate_at(
|
||||
index: int,
|
||||
addresses: list[int],
|
||||
address_to_index: dict[int, int],
|
||||
by_address: dict[int, JsonObject],
|
||||
local_targets: set[int],
|
||||
incoming: dict[int, set[int]],
|
||||
) -> _LoopCandidate | None:
|
||||
start_address = addresses[index]
|
||||
max_end = min(len(addresses), index + _MAX_STRUCTURED_LOOP_BODY + 1)
|
||||
for end_index in range(index + 1, max_end):
|
||||
branch_address = addresses[end_index]
|
||||
branch = by_address[branch_address]
|
||||
base = _conditional_branch_base(branch)
|
||||
if not base:
|
||||
continue
|
||||
|
||||
target = _single_target_address(branch)
|
||||
if target != start_address or target not in address_to_index:
|
||||
continue
|
||||
|
||||
body_addresses = addresses[index:end_index]
|
||||
interior_addresses = addresses[index + 1 : end_index + 1]
|
||||
if not body_addresses or not _is_straight_line_span(body_addresses, by_address):
|
||||
return None
|
||||
if any(address in local_targets for address in interior_addresses):
|
||||
return None
|
||||
if incoming.get(start_address, set()) != {branch_address}:
|
||||
return None
|
||||
if any(incoming.get(address) for address in interior_addresses):
|
||||
return None
|
||||
|
||||
return _LoopCandidate(
|
||||
end_index=end_index,
|
||||
condition=BRANCH_CONDITIONS[base],
|
||||
instruction=branch,
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _incoming_local_targets(addresses: list[int], by_address: dict[int, JsonObject]) -> dict[int, set[int]]:
|
||||
address_set = set(addresses)
|
||||
incoming: dict[int, set[int]] = {address: set() for address in addresses}
|
||||
for source in addresses:
|
||||
for target in by_address[source].get("targets", []):
|
||||
target_address = int(target)
|
||||
if target_address in address_set:
|
||||
incoming[target_address].add(source)
|
||||
return incoming
|
||||
|
||||
|
||||
def _conditional_branch_base(ins: JsonObject) -> str | None:
|
||||
if str(ins.get("kind", "normal")) != "branch":
|
||||
return None
|
||||
base = _mnemonic_base(str(ins.get("mnemonic", "")))
|
||||
if base == "BRN" or base not in BRANCH_CONDITIONS:
|
||||
return None
|
||||
return base
|
||||
|
||||
|
||||
def _single_target_address(ins: JsonObject) -> int | None:
|
||||
targets = ins.get("targets", [])
|
||||
if len(targets) != 1:
|
||||
return None
|
||||
return int(targets[0])
|
||||
|
||||
|
||||
def _is_straight_line_span(addresses: list[int], by_address: dict[int, JsonObject]) -> bool:
|
||||
for address in addresses:
|
||||
kind = str(by_address[address].get("kind", "normal"))
|
||||
if kind in {"branch", "jump", "return", "rte"}:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def _should_emit_label(
|
||||
address: int,
|
||||
local_targets: set[int],
|
||||
function_entry: int | None,
|
||||
suppressed_labels: set[int],
|
||||
) -> bool:
|
||||
return address in local_targets and address != function_entry and address not in suppressed_labels
|
||||
|
||||
|
||||
def _indent(level: int) -> str:
|
||||
return " " * level
|
||||
|
||||
|
||||
def _local_target_addresses(addresses: list[int], by_address: dict[int, JsonObject]) -> set[int]:
|
||||
address_set = set(addresses)
|
||||
targets: set[int] = set()
|
||||
@@ -433,6 +752,9 @@ def _branch_or_jump_statement(ins: JsonObject, labels: dict[int, str], ops: list
|
||||
if base in {"BRA", "JMP", "PJMP"}:
|
||||
if target:
|
||||
return f"goto {target};"
|
||||
table_expr = _indirect_table_call_args(ins)
|
||||
if table_expr:
|
||||
return f"goto_indirect_table({table_expr});"
|
||||
expr = _format_operand(ops[0], "") if ops else "unknown_target"
|
||||
return f"goto_indirect({expr});"
|
||||
if base.startswith("SCB/"):
|
||||
@@ -447,10 +769,26 @@ def _call_statement(ins: JsonObject, labels: dict[int, str], ops: list[str]) ->
|
||||
target = _target_label(ins, labels)
|
||||
if target:
|
||||
return f"{target}();"
|
||||
table_expr = _indirect_table_call_args(ins)
|
||||
if table_expr:
|
||||
return f"call_indirect_table({table_expr});"
|
||||
expr = _format_operand(ops[0], "") if ops else "unknown_target"
|
||||
return f"call_indirect({expr});"
|
||||
|
||||
|
||||
def _indirect_table_call_args(ins: JsonObject) -> str:
|
||||
indirect = ins.get("indirect_flow")
|
||||
if not isinstance(indirect, dict):
|
||||
return ""
|
||||
table = indirect.get("table")
|
||||
if not isinstance(table, dict) or table.get("base") is None:
|
||||
return ""
|
||||
base = int(table["base"])
|
||||
index_register = c_identifier(str(table.get("index_register") or "index"))
|
||||
target_register = c_identifier(str(table.get("target_register") or indirect.get("target_register") or "target"))
|
||||
return f"0x{base:04X}, {index_register}, {target_register}"
|
||||
|
||||
|
||||
def _target_label(ins: JsonObject, labels: dict[int, str]) -> str:
|
||||
targets = ins.get("targets", [])
|
||||
if targets:
|
||||
@@ -562,6 +900,30 @@ def _metadata_comments(ins: JsonObject) -> list[str]:
|
||||
if isinstance(inference, dict) and inference.get("comment"):
|
||||
comments.append(str(inference["comment"]))
|
||||
|
||||
indirect = ins.get("indirect_flow")
|
||||
if isinstance(indirect, dict) and indirect.get("summary"):
|
||||
comments.append(str(indirect["summary"]))
|
||||
|
||||
dataflow = ins.get("dataflow")
|
||||
if isinstance(dataflow, dict):
|
||||
changes = dataflow.get("changes")
|
||||
if isinstance(changes, list):
|
||||
known_changes = [_dataflow_change_comment(change) for change in changes if isinstance(change, dict)]
|
||||
known_changes = [change for change in known_changes if change]
|
||||
if known_changes:
|
||||
suffix = " ..." if len(known_changes) > 4 else ""
|
||||
comments.append("dataflow " + ", ".join(known_changes[:4]) + suffix)
|
||||
|
||||
refs = []
|
||||
for ref in ins.get("references", []):
|
||||
if not isinstance(ref, dict):
|
||||
continue
|
||||
symbol = ref.get("symbol") or ref.get("name")
|
||||
if symbol:
|
||||
refs.append(str(symbol))
|
||||
if refs:
|
||||
comments.append("refs " + ", ".join(refs))
|
||||
|
||||
for access in ins.get("peripheral_access", []):
|
||||
if not isinstance(access, dict):
|
||||
continue
|
||||
@@ -574,6 +936,16 @@ def _metadata_comments(ins: JsonObject) -> list[str]:
|
||||
return comments
|
||||
|
||||
|
||||
def _dataflow_change_comment(change: JsonObject) -> str:
|
||||
after = change.get("after")
|
||||
if not isinstance(after, dict) or not after.get("known"):
|
||||
return ""
|
||||
width = int(after.get("width", 16))
|
||||
value = int(after["value"])
|
||||
digits = 2 if width <= 8 else 4
|
||||
return f"{change['name']}=0x{value:0{digits}X}"
|
||||
|
||||
|
||||
def _instruction_text(ins: JsonObject) -> str:
|
||||
mnemonic = str(ins.get("mnemonic", ""))
|
||||
operands = str(ins.get("operands", ""))
|
||||
|
||||
Reference in New Issue
Block a user