1
0

Data flow improvements in pseudo code generator

This commit is contained in:
Aiden
2026-05-25 14:40:55 +10:00
parent 80819448cf
commit 1d7f00e59c
16 changed files with 105891 additions and 5141 deletions

108
tests/test_dataflow.py Normal file
View File

@@ -0,0 +1,108 @@
import unittest
from h8536.dataflow import analyze_dataflow, state_for_instruction
from h8536.model import Instruction
def reg_after(analysis, address, register):
return analysis["instructions"][address]["after"]["registers"][register]
def reg_before(analysis, address, register):
return analysis["instructions"][address]["before"]["registers"][register]
def control_after(analysis, address, register):
return analysis["instructions"][address]["after"]["control"][register]
class DataflowTest(unittest.TestCase):
def test_tracks_immediate_load_copy_and_simple_arithmetic(self):
instructions = {
0x0100: Instruction(0x0100, b"\x58\x02\x00", "MOV:I.W", "#H'0200, R0"),
0x0103: Instruction(0x0103, b"\xA0\x81", "MOV:G.W", "R0, R1"),
0x0105: Instruction(0x0105, b"\xA1\x08", "ADD:Q.W", "#1, R1"),
0x0107: Instruction(0x0107, b"\x0C\x00\x02\x31", "SUB.W", "#H'0002, R1"),
}
analysis = analyze_dataflow(instructions)
self.assertEqual(reg_after(analysis, 0x0100, "R0")["value"], 0x0200)
self.assertEqual(reg_after(analysis, 0x0100, "R0")["width"], 16)
self.assertEqual(reg_before(analysis, 0x0103, "R0")["value"], 0x0200)
self.assertEqual(reg_after(analysis, 0x0103, "R1")["value"], 0x0200)
self.assertEqual(reg_after(analysis, 0x0105, "R1")["value"], 0x0201)
self.assertEqual(reg_after(analysis, 0x0107, "R1")["value"], 0x01FF)
def test_tracks_byte_immediates_without_promising_word_width(self):
instructions = {
0x0200: Instruction(0x0200, b"\x52\x7F", "MOV:E.B", "#H'7F, R2"),
0x0202: Instruction(0x0202, b"\xA2\x83", "MOV:G.B", "R2, R3"),
0x0204: Instruction(0x0204, b"\x58\x20\x00", "MOV:I.W", "#H'2000, R0"),
0x0207: Instruction(0x0207, b"\xD0\x84", "MOV:G.W", "@R0, R4"),
}
analysis = analyze_dataflow(instructions)
self.assertEqual(reg_after(analysis, 0x0200, "R2")["value"], 0x7F)
self.assertEqual(reg_after(analysis, 0x0200, "R2")["width"], 8)
self.assertEqual(reg_after(analysis, 0x0202, "R3")["value"], 0x7F)
self.assertEqual(reg_after(analysis, 0x0202, "R3")["width"], 8)
self.assertEqual(reg_after(analysis, 0x0207, "R0")["value"], 0x2000)
self.assertFalse(reg_after(analysis, 0x0207, "R4")["known"])
self.assertEqual(reg_after(analysis, 0x0207, "R4")["reason"], "memory_load")
def test_calls_and_ambiguous_branches_do_not_leak_known_state(self):
instructions = {
0x0300: Instruction(0x0300, b"\x58\x12\x34", "MOV:I.W", "#H'1234, R0"),
0x0303: Instruction(0x0303, b"\x26\x03", "BNE", "loc_0308", kind="branch", targets=[0x0308]),
0x0305: Instruction(0x0305, b"\xA0\x08", "ADD:Q.W", "#1, R0"),
0x0308: Instruction(0x0308, b"\xA0\x08", "ADD:Q.W", "#1, R0"),
0x030A: Instruction(0x030A, b"\x18\x04\x00", "JSR", "@loc_0400", kind="call", targets=[0x0400]),
0x030D: Instruction(0x030D, b"\xA0\x08", "ADD:Q.W", "#1, R0"),
}
analysis = analyze_dataflow(instructions)
self.assertFalse(reg_before(analysis, 0x0305, "R0")["known"])
self.assertEqual(reg_before(analysis, 0x0305, "R0")["reason"], "block_entry")
self.assertFalse(reg_before(analysis, 0x0308, "R0")["known"])
self.assertEqual(reg_before(analysis, 0x0308, "R0")["reason"], "block_entry")
self.assertFalse(reg_after(analysis, 0x030A, "R0")["known"])
self.assertEqual(reg_after(analysis, 0x030A, "R0")["reason"], "call")
self.assertFalse(reg_before(analysis, 0x030D, "R0")["known"])
def test_tracks_control_register_loads_and_stc_copies(self):
instructions = {
0x0400: Instruction(
0x0400,
b"\x04\xFE\x89",
"LDC.B",
"#H'FE, BR",
writes_br=True,
br_value=0xFE,
),
0x0403: Instruction(0x0403, b"\xA0\x99", "STC.B", "BR, R1"),
0x0405: Instruction(0x0405, b"\x04\x01\x48", "ORC.B", "#H'01, CCR"),
}
analysis = analyze_dataflow(instructions)
self.assertEqual(control_after(analysis, 0x0400, "BR")["value"], 0xFE)
self.assertEqual(control_after(analysis, 0x0400, "BR")["width"], 8)
self.assertEqual(reg_after(analysis, 0x0403, "R1")["value"], 0xFE)
self.assertFalse(control_after(analysis, 0x0405, "CCR")["known"])
def test_state_lookup_helper_returns_instruction_record(self):
instructions = {
0x0500: Instruction(0x0500, b"\x58\x00\x01", "MOV:I.W", "#H'0001, R0"),
}
analysis = analyze_dataflow(instructions)
self.assertEqual(state_for_instruction(analysis, 0x0500)["after"]["registers"]["R0"]["value"], 1)
self.assertEqual(state_for_instruction(analysis, 0x9999), {})
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,72 @@
import unittest
from h8536.indirect import analyze_indirect_flow, indirect_comment_for_instruction
from h8536.model import Instruction
from h8536.render import format_listing, write_json
from h8536.rom import Rom
import json
import tempfile
from pathlib import Path
class IndirectFlowTest(unittest.TestCase):
def test_detects_indexed_pointer_table_before_indirect_jump(self):
data = bytearray([0xFF] * 0x240)
data[0x0200:0x0206] = bytes.fromhex("01200300FFFF")
instructions = {
0x0100: Instruction(0x0100, b"", "MOV:G.W", "@(H'0200,R4), R1"),
0x0104: Instruction(0x0104, b"", "JMP", "@R1", kind="jump", fallthrough=False),
0x0120: Instruction(0x0120, b"\x19", "RTS", kind="return", fallthrough=False),
0x0300: Instruction(0x0300, b"\x19", "RTS", kind="return", fallthrough=False),
}
analysis = analyze_indirect_flow(Rom(bytes(data)), instructions, {0x0120: "loc_0120"})
site = analysis["sites"][0]
self.assertEqual(site["address"], 0x0104)
self.assertEqual(site["target_register"], "R1")
self.assertEqual(site["table"]["base"], 0x0200)
self.assertEqual(site["table"]["entry_count"], 2)
self.assertEqual(site["table"]["decoded_target_count"], 2)
self.assertIn("pointer table H'0200", indirect_comment_for_instruction(analysis, 0x0104))
def test_records_unknown_indirect_call_without_prior_table_load(self):
instructions = {
0x0100: Instruction(0x0100, b"", "JSR", "@R0", kind="call"),
}
analysis = analyze_indirect_flow(Rom(bytes([0xFF] * 0x200)), instructions)
self.assertEqual(analysis["sites"][0]["confidence"], "unknown")
self.assertIn("target not resolved", analysis["sites"][0]["summary"])
def test_listing_and_json_include_indirect_flow_metadata(self):
instructions = {
0x0100: Instruction(0x0100, b"", "JSR", "@R0", kind="call"),
}
analysis = analyze_indirect_flow(Rom(bytes([0xFF] * 0x200)), instructions)
listing = format_listing(
Path("rom.bin"),
Rom(bytes([0xFF] * 0x200)),
instructions,
{},
{},
"min",
traced=True,
indirect_flow=analysis,
)
self.assertIn("target not resolved", listing)
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "out.json"
write_json(path, instructions, {}, {}, indirect_flow=analysis)
payload = json.loads(path.read_text(encoding="utf-8"))
self.assertEqual(payload["indirect_flow"]["sites"][0]["address"], 0x0100)
self.assertEqual(payload["instructions"][0]["indirect_flow"]["confidence"], "unknown")
if __name__ == "__main__":
unittest.main()

View File

@@ -109,7 +109,7 @@ class PseudocodeTest(unittest.TestCase):
],
}
text = generate_pseudocode(payload, options=PseudocodeOptions())
text = generate_pseudocode(payload, options=PseudocodeOptions(structured=False))
self.assertIn("void vec_reset_0100(void)", text)
self.assertIn("P1DDR = (uint8_t)(0xFF);", text)

View File

@@ -0,0 +1,132 @@
import unittest
from h8536.pseudocode import PseudocodeOptions, generate_pseudocode
def _instruction(
address,
mnemonic,
operands="",
*,
kind="normal",
targets=None,
text=None,
):
return {
"address": address,
"text": text or f"{mnemonic} {operands}".strip(),
"mnemonic": mnemonic,
"operands": operands,
"kind": kind,
"targets": list(targets or []),
"references": [],
"comment": "",
}
def _payload(instructions):
start = min(ins["address"] for ins in instructions)
end = max(ins["address"] for ins in instructions)
return {
"vectors": [],
"call_graph": {
"nodes": [
{
"start": start,
"end": end,
"label": f"loc_{start:04X}",
"sources": [],
"instruction_count": len(instructions),
"calls": [],
}
],
"edges": [],
},
"instructions": instructions,
}
def _options(**overrides):
values = {
"include_asm": False,
"include_addresses": False,
"emit_declarations": False,
}
values.update(overrides)
return PseudocodeOptions(**values)
class PseudocodeStructuringTest(unittest.TestCase):
def test_backward_conditional_branch_becomes_do_while(self):
payload = _payload(
[
_instruction(0x0100, "MOV.B", "#H'00, R0"),
_instruction(0x0102, "ADD.B", "#H'01, R0"),
_instruction(0x0104, "CMP.B", "#H'03, R0"),
_instruction(0x0106, "BNE", "loc_0102", kind="branch", targets=[0x0102]),
_instruction(0x0108, "RTS", kind="return"),
]
)
text = generate_pseudocode(payload, options=_options())
self.assertIn("do {", text)
self.assertIn("} while (!Z);", text)
self.assertNotIn("goto loc_0102;", text)
self.assertNotIn("loc_0102:", text)
def test_forward_conditional_branch_over_small_span_becomes_if(self):
payload = _payload(
[
_instruction(0x0100, "CMP.B", "#H'00, R0"),
_instruction(0x0102, "BEQ", "loc_0108", kind="branch", targets=[0x0108]),
_instruction(0x0104, "MOV.B", "#H'01, R1"),
_instruction(0x0106, "ADD.B", "#H'02, R1"),
_instruction(0x0108, "RTS", kind="return"),
]
)
text = generate_pseudocode(payload, options=_options())
self.assertIn("if (!Z) {", text)
self.assertIn("R1 = (uint8_t)(0x01);", text)
self.assertIn("R1 += (uint8_t)(0x02);", text)
self.assertNotIn("goto loc_0108;", text)
self.assertNotIn("loc_0108:", text)
def test_structuring_can_be_disabled(self):
payload = _payload(
[
_instruction(0x0100, "CMP.B", "#H'00, R0"),
_instruction(0x0102, "BEQ", "loc_0108", kind="branch", targets=[0x0108]),
_instruction(0x0104, "MOV.B", "#H'01, R1"),
_instruction(0x0108, "RTS", kind="return"),
]
)
text = generate_pseudocode(payload, options=_options(structured=False))
self.assertIn("if (Z) goto loc_0108;", text)
self.assertIn("loc_0108:", text)
self.assertNotIn("if (!Z) {", text)
def test_ambiguous_forward_branch_keeps_goto_fallback(self):
payload = _payload(
[
_instruction(0x0100, "BEQ", "loc_0108", kind="branch", targets=[0x0108]),
_instruction(0x0102, "MOV.B", "#H'01, R1"),
_instruction(0x0104, "BRA", "loc_0108", kind="jump", targets=[0x0108]),
_instruction(0x0108, "RTS", kind="return"),
]
)
text = generate_pseudocode(payload, options=_options())
self.assertIn("if (Z) goto loc_0108;", text)
self.assertIn("goto loc_0108;", text)
self.assertIn("loc_0108:", text)
self.assertNotIn("if (!Z) {", text)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,59 @@
import json
import tempfile
import unittest
from pathlib import Path
from h8536.dataflow import analyze_dataflow
from h8536.model import Instruction
from h8536.render import format_listing, write_json
from h8536.rom import Rom
from h8536.symbols import discover_symbols
class RenderAnalysisIntegrationTest(unittest.TestCase):
def test_listing_and_json_include_symbols_and_compact_dataflow(self):
instructions = {
0x0100: Instruction(0x0100, b"\x58\x12\x34", "MOV:I.W", "#H'1234, R0"),
0x0103: Instruction(
0x0103,
b"\x1D\xF6\x80\x90",
"MOV:G.W",
"R0, @H'F680",
references=[0xF680],
),
}
dataflow = analyze_dataflow(instructions)
symbols = discover_symbols(instructions)
rom = Rom(bytes([0xFF] * 0x200))
listing = format_listing(
Path("rom.bin"),
rom,
instructions,
{},
{},
"min",
traced=True,
dataflow=dataflow,
symbols=symbols,
)
self.assertIn("; Symbols", listing)
self.assertIn("ram_F680", listing)
self.assertIn("dataflow R0=H'1234", listing)
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "out.json"
write_json(path, instructions, {}, {}, dataflow=dataflow, symbols=symbols)
payload = json.loads(path.read_text(encoding="utf-8"))
self.assertEqual(payload["symbols"]["symbols"][0]["name"], "ram_F680")
self.assertEqual(payload["instructions"][1]["references"][0]["symbol"], "ram_F680")
dataflow_payload = payload["instructions"][0]["dataflow"]
self.assertEqual(dataflow_payload["changes"][0]["name"], "R0")
self.assertEqual(dataflow_payload["known_after"]["registers"]["R0"]["value"], 0x1234)
self.assertNotIn("before", dataflow_payload)
if __name__ == "__main__":
unittest.main()

123
tests/test_symbols.py Normal file
View File

@@ -0,0 +1,123 @@
import unittest
from h8536.model import Instruction
from h8536.symbols import discover_symbols, instruction_accesses, symbol_for_address
def ins(address, mnemonic, operands="", references=None):
return Instruction(
address,
b"\x00",
mnemonic,
operands,
references=list(references or []),
)
class SymbolDiscoveryTest(unittest.TestCase):
def test_discovers_ram_symbol_counts_direction_and_widths(self):
instructions = {
0x1000: ins(0x1000, "MOV:G.B", "#H'12, @H'F680", [0xF680]),
0x1004: ins(0x1004, "CMP:G.B", "#H'01, @H'F680", [0xF680]),
0x1008: ins(0x1008, "ADD:Q.W", "#1, @H'F680", [0xF680]),
}
analysis = discover_symbols(instructions)
symbols = analysis["symbols"]
self.assertEqual(len(symbols), 1)
symbol = symbols[0]
self.assertEqual(symbol["address"], 0xF680)
self.assertEqual(symbol["name"], "ram_F680")
self.assertEqual(symbol["region"], "on_chip_ram")
self.assertEqual(symbol["kind"], "ram")
self.assertEqual(symbol["access_count"], 3)
self.assertEqual(symbol["read_count"], 2)
self.assertEqual(symbol["write_count"], 2)
self.assertEqual(symbol["unknown_count"], 0)
self.assertEqual(symbol["width_hints"], ["byte", "word"])
self.assertEqual(symbol["width"], "mixed")
self.assertEqual(symbol["first_access"], 0x1000)
self.assertEqual(symbol["last_access"], 0x1008)
self.assertEqual(symbol_for_address(analysis, 0xF680), "ram_F680")
def test_names_program_or_external_memory_and_excludes_registers_by_default(self):
instructions = [
ins(0x2000, "MOV:G.W", "@H'1234, R1", [0x1234]),
ins(0x2004, "MOV:G.B", "#H'80, @RAMCR", [0xFF11]),
]
analysis = discover_symbols(instructions)
self.assertEqual([symbol["name"] for symbol in analysis["symbols"]], ["mem_1234"])
symbol = analysis["symbols"][0]
self.assertEqual(symbol["region"], "program_or_external")
self.assertEqual(symbol["kind"], "memory")
self.assertEqual(symbol["read_count"], 1)
self.assertIsNone(symbol_for_address(analysis, 0xFF11))
def test_can_include_io_register_symbols_when_requested(self):
instructions = [
ins(0x2004, "MOV:G.B", "#H'80, @RAMCR", [0xFF11]),
]
analysis = discover_symbols(instructions, include_registers=True)
self.assertEqual(len(analysis["symbols"]), 1)
symbol = analysis["symbols"][0]
self.assertEqual(symbol["address"], 0xFF11)
self.assertEqual(symbol["name"], "RAMCR")
self.assertEqual(symbol["region"], "register_field")
self.assertEqual(symbol["kind"], "register")
self.assertEqual(symbol["write_count"], 1)
def test_bit_and_clear_operations_use_conservative_directions(self):
instructions = [
ins(0x3000, "BSET.B", "#4, @H'F690", [0xF690]),
ins(0x3002, "BCLR.B", "#4, @H'F690", [0xF690]),
ins(0x3004, "TST.B", "@H'F690", [0xF690]),
ins(0x3006, "CLR.B", "@H'F690", [0xF690]),
]
analysis = discover_symbols(instructions)
symbol = analysis["symbols"][0]
self.assertEqual(symbol["read_count"], 3)
self.assertEqual(symbol["write_count"], 3)
self.assertEqual(
[access["direction"] for access in symbol["accesses"]],
["read_write", "read_write", "read", "write"],
)
def test_optional_pointer_table_candidates_add_xrefs_without_io_pollution(self):
instructions = [
ins(0x4000, "MOV:G.B", "@H'F680, R0", [0xF680]),
]
data_candidates = {
"pointer_tables": [
{
"address": 0x0200,
"targets": [0xF680, 0x1234, 0xFF11],
},
],
}
analysis = discover_symbols(instructions, data_candidates=data_candidates)
by_name = {symbol["name"]: symbol for symbol in analysis["symbols"]}
self.assertEqual(by_name["ram_F680"]["xref_count"], 1)
self.assertEqual(by_name["mem_1234"]["access_count"], 0)
self.assertEqual(by_name["mem_1234"]["xref_count"], 1)
self.assertNotIn("RAMCR", by_name)
def test_instruction_accesses_handles_comma_inside_displacement_operand(self):
access = instruction_accesses(
ins(0x5000, "MOV:G.B", "@(H'0010,R1), @H'F682", [0xF682]),
)
self.assertEqual(access[0]["direction"], "write")
self.assertEqual(access[0]["operand"], "@H'F682")
if __name__ == "__main__":
unittest.main()