commit fffc6cf92f77e2c48aa1394e91885ddc0c535c84
parent 2a542b20b82b7efe260b909d8c95413d87e87b0f
Author: Drew DeVault <sir@cmpwn.com>
Date: Tue, 2 Jan 2024 12:42:40 +0100
debug::dwarf: new module
This introduces the debug::dwarf module, which provides a partial
implementation of the DWARF debugging information format. It targets
DWARF v4 and is designed to support the future debug:: module.
Signed-off-by: Drew DeVault <sir@cmpwn.com>
Diffstat:
9 files changed, 1692 insertions(+), 0 deletions(-)
diff --git a/debug/dwarf/README b/debug/dwarf/README
@@ -0,0 +1,6 @@
+debug::dwarf includes an implementation of the DWARF Debugging Information
+Format. The implementation is incomplete and, while it may be useful to
+third-parties, is mainly designed to support the needs of [[debug::]]'s runtime
+debugging features.
+
+This module implements DWARF version 4.
diff --git a/debug/dwarf/abbrev.ha b/debug/dwarf/abbrev.ha
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use debug::image;
+use errors;
+use format::elf;
+use io;
+use memio;
+use sort;
+
+export type abbrev_table = struct {
+ items: []abbrev,
+};
+
+// A single abbreviated tag from a .debug_abbrev section.
+export type abbrev = struct {
+ code: u64,
+ tag: u32,
+ has_children: bool,
+ fields: []afield,
+};
+
+// A field in a .debug_abbrev section
+export type afield = struct {
+ attr: u32,
+ form: u32,
+};
+
+// Loads an abbreviation table from the .debug_abbrev section, loading the table
+// at the provided offset from the start of the ELF section.
+//
+// Pass the result to [[abbrev_table_finish]] to free resources associated with
+// the table when you're done with it.
+export fn load_abbrevs(
+ image: *image::image,
+ offs: u64,
+) (abbrev_table | void | errors::invalid) = {
+ const sec = match (image::section_byname(image, ".debug_abbrev")) {
+ case let sec: *elf::section64 =>
+ yield sec;
+ case null =>
+ return;
+ };
+
+ const rd = image::section_reader(image, sec);
+ io::seek(&rd, offs: io::off, io::whence::SET)!;
+ const rd = new_table_reader(&rd, false)! as table_reader;
+
+ let abbrevs: []abbrev = [];
+ for (true) {
+ match (read_abbrev(&rd)) {
+ case io::EOF => break;
+ case io::error => return errors::invalid;
+ case let ab: abbrev =>
+ append(abbrevs, ab);
+ };
+ };
+
+ return abbrev_table {
+ items = abbrevs,
+ };
+};
+
+// Reads an entry from an abbreviation table.
+fn read_abbrev(
+ rd: *table_reader,
+) (abbrev | io::EOF | io::error) = {
+ const code = read_uleb128(rd)?;
+ if (code == 0) {
+ return io::EOF;
+ };
+ const tag = read_uleb128(rd)?;
+ const children = read_ubyte(rd)? != 0;
+
+ let fields: []afield = [];
+ for (true) {
+ const name = read_uleb128(rd)?;
+ const form = read_uleb128(rd)?;
+ if (name == 0 && form == 0) {
+ break;
+ };
+ append(fields, afield {
+ attr = name: u32,
+ form = form: u32,
+ });
+ };
+
+ return abbrev {
+ code = code,
+ tag = tag: u32,
+ has_children = children,
+ fields = fields,
+ };
+};
+
+// Frees resources associated with an [[abbrev_table]].
+export fn abbrev_table_finish(table: *abbrev_table) void = {
+ for (let i = 0z; i < len(table.items); i += 1) {
+ free(table.items[i].fields);
+ };
+ free(table.items);
+};
+
+// Retrieves an abbreviation from an [[abbrev_table]] by its abbreviation code.
+export fn get_abbrev(table: *abbrev_table, code: u64) const nullable *abbrev = {
+ // TODO: Sort the list and do this faster
+ for (let i = 0z; i < len(table.items); i += 1) {
+ if (table.items[i].code == code) {
+ return &table.items[i];
+ };
+ };
+ return null;
+};
diff --git a/debug/dwarf/addr_to_line.ha b/debug/dwarf/addr_to_line.ha
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use debug::image;
+use io;
+use path;
+
+// Determines the file path, line number, and column number of a given address
+// in the program image. Returns void if unknown. The return value is statically
+// allocated.
+export fn addr_to_line(
+ image: *image::image,
+ addr: uintptr,
+) ((const str, uint, uint) | void | io::error) = {
+ const dinfo_offs = match (arange_lookup(image, addr)) {
+ case let offs: u64 =>
+ yield offs;
+ case =>
+ return; // XXX: We could walk .debug_info I guess
+ };
+ const dinfo = match (read_debug_info(image, dinfo_offs)?) {
+ case let rd: debug_info_reader =>
+ yield rd;
+ case =>
+ return;
+ };
+ defer debug_info_finish(&dinfo);
+
+ let comp_dir = "";
+ let stmt_list = 0u64, found = false;
+ for (!found) {
+ const entry = match (debug_info_next(&dinfo)) {
+ case io::EOF =>
+ return;
+ case let ent: entry =>
+ yield ent;
+ };
+ defer entry_finish(&entry);
+
+ if (entry.tag != DW_TAG_compile_unit) {
+ continue;
+ };
+
+ for (let i = 0z; i < len(entry.fields); i += 1) {
+ const field = &entry.fields[i];
+ switch (field.attr) {
+ case DW_AT_stmt_list =>
+ stmt_list = field.constant;
+ found = true;
+ case DW_AT_comp_dir =>
+ comp_dir = field.string;
+ case => yield;
+ };
+ };
+ };
+
+ const prog = match (exec_line_program(image, stmt_list)) {
+ case let prog: line_program =>
+ yield prog;
+ case =>
+ return;
+ };
+ defer line_program_finish(&prog);
+
+ let last = line_state { ... };
+ for (true) {
+ const state = match (line_next(&prog)?) {
+ case let state: line_state =>
+ yield state;
+ case io::EOF =>
+ break;
+ };
+ defer last = state;
+
+ if (state.file == 1) {
+ continue;
+ };
+ if (state.addr < addr) {
+ continue;
+ };
+
+ // If this is the first state we've seen, use it
+ if (last.vm_loc != 0) {
+ state = last;
+ };
+
+ if (state.file == 0) {
+ return;
+ };
+
+ const file = &prog.head.files[state.file - 1];
+ static let path = path::buffer { ... };
+
+ path::set(&path)!;
+
+ if (!path::abs(file.name)) {
+ let dir = "";
+ if (file.dir != 0) {
+ dir = prog.head.dirs[file.dir - 1];
+ if (!path::abs(dir) && comp_dir != "") {
+ path::set(&path, comp_dir, dir)!;
+ } else {
+ path::set(&path, dir)!;
+ };
+ } else if (comp_dir != "") {
+ path::set(&path, comp_dir)!;
+ };
+ };
+
+ path::push(&path, file.name)!;
+ return (path::string(&path), state.line, state.column);
+ };
+};
diff --git a/debug/dwarf/aranges.ha b/debug/dwarf/aranges.ha
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use bufio;
+use debug::image;
+use errors;
+use encoding::hex;
+use format::elf;
+use io;
+use memio;
+
+// Supported version of .debug_aranges decoder
+def ARANGES_VERSION: u16 = 2;
+
+// Returns the debug_info offset for the DIE that corresponds to this address,
+// if known, or void if unknown.
+export fn arange_lookup(
+ image: *image::image,
+ addr: uintptr,
+) (u64 | void | errors::invalid) = {
+ const aranges = match (image::section_byname(image, ".debug_aranges")) {
+ case let sec: *elf::section64 =>
+ yield sec;
+ case null =>
+ return;
+ };
+
+ const rd = image::section_reader(image, aranges);
+ for (true) {
+ const rd = match (new_table_reader(&rd, true)!) {
+ case io::EOF => break;
+ case let rd: table_reader =>
+ yield rd;
+ };
+
+ match (arange_match(&rd, addr)) {
+ case void => void;
+ case let u: u64 =>
+ return u;
+ case io::error =>
+ return errors::invalid;
+ };
+ };
+};
+
+fn arange_match(rd: *table_reader, addr: uintptr) (u64 | void | io::error) = {
+ const ver = read_uhalf(rd)?;
+ const info_offset = read_secword(rd)?;
+ const asize = read_ubyte(rd)?;
+ const ssize = read_ubyte(rd)?;
+ assert(ver == ARANGES_VERSION, "debug::dwarf: unsupported .debug_ranges version");
+ assert(ssize == 0, "debug::dwarf: unsupported segmented target for .debug_aranges");
+ assert(asize == 8, "debug::dwarf: unsupported address size for .debug_aranges");
+
+ read_align(rd, asize * 2)?;
+
+ const au64 = addr: u64;
+ for (!read_iseof(rd)) {
+ const min = read_ulong(rd)?;
+ const length = read_ulong(rd)?;
+ if (min == 0 && length == 0) {
+ if (!read_iseof(rd)) {
+ return errors::invalid;
+ };
+ break;
+ };
+ const max = min + length;
+ if (min <= au64 && max > au64) {
+ return info_offset;
+ };
+ };
+};
diff --git a/debug/dwarf/constant.ha b/debug/dwarf/constant.ha
@@ -0,0 +1,651 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+// DWARF constant definitions
+// Updated as of DWARF 5
+
+export def DW_TAG_array_type: u32 = 0x01;
+export def DW_TAG_class_type: u32 = 0x02;
+export def DW_TAG_entry_point: u32 = 0x03;
+export def DW_TAG_enumeration_type: u32 = 0x04;
+export def DW_TAG_formal_parameter: u32 = 0x05;
+// 0x06, 0x07: reserved
+export def DW_TAG_imported_declaration: u32 = 0x08A;
+// 0x09: reserved
+export def DW_TAG_label: u32 = 0x0a;
+export def DW_TAG_lexical_block: u32 = 0x0b;
+// 0x0c: reserved
+export def DW_TAG_member: u32 = 0x0d;
+// 0x0e: reserved
+export def DW_TAG_pointer_type: u32 = 0x0f;
+export def DW_TAG_reference_type: u32 = 0x10;
+export def DW_TAG_compile_unit: u32 = 0x11;
+export def DW_TAG_string_type: u32 = 0x12;
+export def DW_TAG_structure_type: u32 = 0x13;
+// 0x14: reserved
+export def DW_TAG_subroutine_type: u32 = 0x15;
+export def DW_TAG_typedef: u32 = 0x16;
+export def DW_TAG_union_type: u32 = 0x17;
+export def DW_TAG_unspecified_paramters: u32 = 0x18;
+export def DW_TAG_variant: u32 = 0x19;
+export def DW_TAG_common_block: u32 = 0x1a;
+export def DW_TAG_common_inclusion: u32 = 0x1b;
+export def DW_TAG_inheritance: u32 = 0x1c;
+export def DW_TAG_inlined_subroutine: u32 = 0x1d;
+export def DW_TAG_module: u32 = 0x1e;
+export def DW_TAG_ptr_to_member_type: u32 = 0x1f;
+export def DW_TAG_set_type: u32 = 0x20;
+export def DW_TAG_subrange_type: u32 = 0x21;
+export def DW_TAG_with_stmt: u32 = 0x22;
+export def DW_TAG_access_declaration: u32 = 0x23;
+export def DW_TAG_base_type: u32 = 0x24;
+export def DW_TAG_catch_block: u32 = 0x25;
+export def DW_TAG_const_type: u32 = 0x26;
+export def DW_TAG_constant: u32 = 0x27;
+export def DW_TAG_enumerator: u32 = 0x28;
+export def DW_TAG_file_type: u32 = 0x29;
+export def DW_TAG_friend: u32 = 0x2a;
+export def DW_TAG_namelist: u32 = 0x2b;
+export def DW_TAG_namelist_item: u32 = 0x2c;
+export def DW_TAG_packed_type: u32 = 0x2d;
+export def DW_TAG_subprogram: u32 = 0x2e;
+export def DW_TAG_template_type_parameter: u32 = 0x2f;
+export def DW_TAG_template_value_parameter: u32 = 0x30;
+export def DW_TAG_thrown_type: u32 = 0x31;
+export def DW_TAG_try_block: u32 = 0x32;
+export def DW_TAG_variant_part: u32 = 0x33;
+export def DW_TAG_variable: u32 = 0x34;
+export def DW_TAG_volatile_type: u32 = 0x35;
+export def DW_TAG_dwarf_procedure: u32 = 0x36;
+export def DW_TAG_restrict_type: u32 = 0x37;
+export def DW_TAG_interface_type: u32 = 0x38;
+export def DW_TAG_namespace: u32 = 0x39;
+export def DW_TAG_imported_module: u32 = 0x3a;
+export def DW_TAG_unspecified_type: u32 = 0x3b;
+export def DW_TAG_partial_unit: u32 = 0x3c;
+export def DW_TAG_imported_unit: u32 = 0x3d;
+// 0x3e: reserved
+export def DW_TAG_condition: u32 = 0x3f;
+export def DW_TAG_shared_type: u32 = 0x40;
+export def DW_TAG_type_unit: u32 = 0x41;
+export def DW_TAG_rvalue_reference_type: u32 = 0x42;
+export def DW_TAG_template_alias: u32 = 0x43;
+export def DW_TAG_coarray_type: u32 = 0x44;
+export def DW_TAG_generic_subrange: u32 = 0x45;
+export def DW_TAG_dynamic_type: u32 = 0x46;
+export def DW_TAG_atomic_type: u32 = 0x47;
+export def DW_TAG_call_site: u32 = 0x48;
+export def DW_TAG_call_site_parameter: u32 = 0x49;
+export def DW_TAG_skeleton_unit: u32 = 0x4a;
+export def DW_TAG_immutable_type: u32 = 0x4b;
+export def DW_TAG_lo_user: u32 = 0x4080;
+export def DW_TAG_hi_user: u32 = 0xffff;
+
+export def DW_CHILDREN_no: uint = 0x00;
+export def DW_CHILDREN_yes: uint = 0x01;
+
+export def DW_AT_sibling: u32 = 0x01;
+export def DW_AT_location: u32 = 0x02;
+export def DW_AT_name: u32 = 0x03;
+// 0x04-0x08: reserved
+export def DW_AT_ordering: u32 = 0x09;
+// 0x0a: reserved
+export def DW_AT_byte_size: u32 = 0x0b;
+// 0x0c: reserved
+export def DW_AT_bit_size: u32 = 0x0d;
+// 0x0e, 0x0f: reserved
+export def DW_AT_stmt_list: u32 = 0x10;
+export def DW_AT_low_pc: u32 = 0x11;
+export def DW_AT_high_pc: u32 = 0x12;
+export def DW_AT_language: u32 = 0x13;
+// 0x14: reserved
+export def DW_AT_discr: u32 = 0x15;
+export def DW_AT_discr_value: u32 = 0x16;
+export def DW_AT_visibility: u32 = 0x17;
+export def DW_AT_import: u32 = 0x18;
+export def DW_AT_string_length: u32 = 0x19;
+export def DW_AT_common_reference: u32 = 0x1a;
+export def DW_AT_comp_dir: u32 = 0x1b;
+export def DW_AT_const_value: u32 = 0x1c;
+export def DW_AT_containing_type: u32 = 0x1d;
+export def DW_AT_default_value: u32 = 0x1e;
+// 0x1f: reserved
+export def DW_AT_inline: u32 = 0x20;
+export def DW_AT_is_optional: u32 = 0x21;
+export def DW_AT_is_lower_bound: u32 = 0x22;
+// 0x23, 0x24: reserved
+export def DW_AT_producer: u32 = 0x25;
+// 0x26: reserved
+export def DW_AT_prototyped: u32 = 0x27;
+// 0x28, 0x29: reserved
+export def DW_AT_return_addr: u32 = 0x2a;
+// 0x2b: reserved
+export def DW_AT_start_scope: u32 = 0x2c;
+// 0x2d: reserved
+export def DW_AT_bit_stride: u32 = 0x2e;
+export def DW_AT_upper_bound: u32 = 0x2f;
+// 0x30: reserved
+export def DW_AT_abstract_origin: u32 = 0x31;
+export def DW_AT_accessibility: u32 = 0x32;
+export def DW_AT_address_class: u32 = 0x33;
+export def DW_AT_artificial: u32 = 0x34;
+export def DW_AT_base_types: u32 = 0x35;
+export def DW_AT_calling_convention: u32 = 0x36;
+export def DW_AT_count: u32 = 0x37;
+export def DW_AT_data_member_location: u32 = 0x38;
+export def DW_AT_decl_column: u32 = 0x39;
+export def DW_AT_decl_file: u32 = 0x3a;
+export def DW_AT_decl_line: u32 = 0x3b;
+export def DW_AT_declaration: u32 = 0x3c;
+export def DW_AT_discr_list: u32 = 0x3d;
+export def DW_AT_encoding: u32 = 0x3e;
+export def DW_AT_external: u32 = 0x3f;
+export def DW_AT_frame_base: u32 = 0x40;
+export def DW_AT_friend: u32 = 0x41;
+export def DW_AT_identifier_case: u32 = 0x42;
+// 0x43: reserved
+export def DW_AT_namelist_item: u32 = 0x44;
+export def DW_AT_priority: u32 = 0x45;
+export def DW_AT_segment: u32 = 0x46;
+export def DW_AT_specification: u32 = 0x47;
+export def DW_AT_static_link: u32 = 0x48;
+export def DW_AT_type: u32 = 0x49;
+export def DW_AT_use_location: u32 = 0x4a;
+export def DW_AT_variable_parameter: u32 = 0x4b;
+export def DW_AT_virtuality: u32 = 0x4c;
+export def DW_AT_vtable_elem_location: u32 = 0x4d;
+export def DW_AT_allocated: u32 = 0x4e;
+export def DW_AT_associated: u32 = 0x4f;
+export def DW_AT_data_location: u32 = 0x50;
+export def DW_AT_byte_stride: u32 = 0x51;
+export def DW_AT_entry_pc: u32 = 0x52;
+export def DW_AT_use_UTF8: u32 = 0x53;
+export def DW_AT_extension: u32 = 0x54;
+export def DW_AT_ranges: u32 = 0x55;
+export def DW_AT_trampoline: u32 = 0x56;
+export def DW_AT_call_column: u32 = 0x57;
+export def DW_AT_call_file: u32 = 0x58;
+export def DW_AT_call_line: u32 = 0x59;
+export def DW_AT_description: u32 = 0x5a;
+export def DW_AT_binary_scale: u32 = 0x5b;
+export def DW_AT_decimal_scale: u32 = 0x5c;
+export def DW_AT_small: u32 = 0x5d;
+export def DW_AT_decimal_sign: u32 = 0x5e;
+export def DW_AT_digit_count: u32 = 0x5f;
+export def DW_AT_picture_string: u32 = 0x60;
+export def DW_AT_mutable: u32 = 0x61;
+export def DW_AT_threads_scaled: u32 = 0x62;
+export def DW_AT_explicit: u32 = 0x63;
+export def DW_AT_object_pointer: u32 = 0x64;
+export def DW_AT_endianity: u32 = 0x65;
+export def DW_AT_elemental: u32 = 0x66;
+export def DW_AT_pure: u32 = 0x67;
+export def DW_AT_recursive: u32 = 0x68;
+export def DW_AT_signature: u32 = 0x69;
+export def DW_AT_main_subprogram: u32 = 0x6a;
+export def DW_AT_data_bit_offset: u32 = 0x6b;
+export def DW_AT_const_expr: u32 = 0x6c;
+export def DW_AT_enum_class: u32 = 0x6d;
+export def DW_AT_linkage_name: u32 = 0x6e;
+export def DW_AT_string_length_bit_size: u32 = 0x6f;
+export def DW_AT_string_length_byte_size: u32 = 0x70;
+export def DW_AT_rank: u32 = 0x71;
+export def DW_AT_str_offsets_base: u32 = 0x72;
+export def DW_AT_addr_base: u32 = 0x73;
+export def DW_AT_rnglists_base: u32 = 0x74;
+// 0x75: reserved
+export def DW_AT_dwo_name: u32 = 0x76;
+export def DW_AT_reference: u32 = 0x77;
+export def DW_AT_rvalue_reference: u32 = 0x78;
+export def DW_AT_macros: u32 = 0x79;
+export def DW_AT_call_all_calls: u32 = 0x7a;
+export def DW_AT_call_all_source_calls: u32 = 0x7b;
+export def DW_AT_call_all_tail_calls: u32 = 0x7c;
+export def DW_AT_call_return_pc: u32 = 0x7d;
+export def DW_AT_call_value: u32 = 0x7e;
+export def DW_AT_call_origin: u32 = 0x7f;
+export def DW_AT_call_parameter: u32 = 0x80;
+export def DW_AT_call_pc: u32 = 0x81;
+export def DW_AT_call_tail_call: u32 = 0x82;
+export def DW_AT_call_target: u32 = 0x83;
+export def DW_AT_call_target_clobbered: u32 = 0x84;
+export def DW_AT_call_data_location: u32 = 0x85;
+export def DW_AT_call_data_value: u32 = 0x86;
+export def DW_AT_noreturn: u32 = 0x87;
+export def DW_AT_alignment: u32 = 0x88;
+export def DW_AT_export_symbols: u32 = 0x89;
+export def DW_AT_deleted: u32 = 0x8a;
+export def DW_AT_defaulted: u32 = 0x8b;
+export def DW_AT_loclists_base: u32 = 0x8c;
+export def DW_AT_lo_user: u32 = 0x2000;
+export def DW_AT_hi_user: u32 = 0x3fff;
+
+export def DW_FORM_addr: u32 = 0x01;
+// 0x02: reserved
+export def DW_FORM_block2: u32 = 0x03;
+export def DW_FORM_block4: u32 = 0x04;
+export def DW_FORM_data2: u32 = 0x05;
+export def DW_FORM_data4: u32 = 0x06;
+export def DW_FORM_data8: u32 = 0x07;
+export def DW_FORM_string: u32 = 0x08;
+export def DW_FORM_block: u32 = 0x09;
+export def DW_FORM_block1: u32 = 0x0a;
+export def DW_FORM_data1: u32 = 0x0b;
+export def DW_FORM_flag: u32 = 0x0c;
+export def DW_FORM_sdata: u32 = 0x0d;
+export def DW_FORM_strp: u32 = 0x0e;
+export def DW_FORM_udata: u32 = 0x0f;
+export def DW_FORM_ref_addr: u32 = 0x10;
+export def DW_FORM_ref1: u32 = 0x11;
+export def DW_FORM_ref2: u32 = 0x12;
+export def DW_FORM_ref4: u32 = 0x13;
+export def DW_FORM_ref8: u32 = 0x14;
+export def DW_FORM_ref_udata: u32 = 0x15;
+export def DW_FORM_indirect: u32 = 0x16;
+export def DW_FORM_sec_offset: u32 = 0x17;
+export def DW_FORM_exprloc: u32 = 0x18;
+export def DW_FORM_flag_present: u32 = 0x19;
+export def DW_FORM_strx: u32 = 0x1a;
+export def DW_FORM_addrx: u32 = 0x1b;
+export def DW_FORM_ref_sup4: u32 = 0x1c;
+export def DW_FORM_strp_sup: u32 = 0x1d;
+export def DW_FORM_data16: u32 = 0x1e;
+export def DW_FORM_line_strp: u32 = 0x1f;
+export def DW_FORM_ref_sig8: u32 = 0x20;
+export def DW_FORM_implicit_const: u32 = 0x21;
+export def DW_FORM_loclistx: u32 = 0x22;
+export def DW_FORM_rnglistx: u32 = 0x23;
+export def DW_FORM_ref_sup8: u32 = 0x24;
+export def DW_FORM_strx1: u32 = 0x25;
+export def DW_FORM_strx2: u32 = 0x26;
+export def DW_FORM_strx3: u32 = 0x27;
+export def DW_FORM_strx4: u32 = 0x28;
+export def DW_FORM_addrx1: u32 = 0x29;
+export def DW_FORM_addrx2: u32 = 0x2a;
+export def DW_FORM_addrx3: u32 = 0x2b;
+export def DW_FORM_addrx4: u32 = 0x2c;
+
+// 0x01, 0x02: reserved
+export def DW_OP_addr: u8 = 0x03;
+// 0x04, 0x05: reserved
+export def DW_OP_deref: u8 = 0x06;
+// 0x07: reserved
+export def DW_OP_const1u: u8 = 0x08;
+export def DW_OP_const1s: u8 = 0x09;
+export def DW_OP_const2u: u8 = 0x0a;
+export def DW_OP_const2s: u8 = 0x0b;
+export def DW_OP_const4u: u8 = 0x0c;
+export def DW_OP_const4s: u8 = 0x0d;
+export def DW_OP_const8u: u8 = 0x0e;
+export def DW_OP_const8s: u8 = 0x0f;
+export def DW_OP_constu: u8 = 0x10;
+export def DW_OP_consts: u8 = 0x11;
+export def DW_OP_dup: u8 = 0x12;
+export def DW_OP_drop: u8 = 0x13;
+export def DW_OP_over: u8 = 0x14;
+export def DW_OP_pick: u8 = 0x15;
+export def DW_OP_swap: u8 = 0x16;
+export def DW_OP_rot: u8 = 0x17;
+export def DW_OP_xdref: u8 = 0x18;
+export def DW_OP_abs: u8 = 0x19;
+export def DW_OP_and: u8 = 0x1a;
+export def DW_OP_div: u8 = 0x1b;
+export def DW_OP_minus: u8 = 0x1c;
+export def DW_OP_mod: u8 = 0x1d;
+export def DW_OP_mul: u8 = 0x1e;
+export def DW_OP_neg: u8 = 0x1f;
+export def DW_OP_not: u8 = 0x20;
+export def DW_OP_or: u8 = 0x21;
+export def DW_OP_plus: u8 = 0x22;
+export def DW_OP_plus_uconst: u8 = 0x23;
+export def DW_OP_shl: u8 = 0x24;
+export def DW_OP_shr: u8 = 0x25;
+export def DW_OP_shra: u8 = 0x26;
+export def DW_OP_xor: u8 = 0x27;
+export def DW_OP_bra: u8 = 0x28;
+export def DW_OP_eq: u8 = 0x29;
+export def DW_OP_ge: u8 = 0x2a;
+export def DW_OP_gt: u8 = 0x2b;
+export def DW_OP_le: u8 = 0x2c;
+export def DW_OP_lt: u8 = 0x2d;
+export def DW_OP_ne: u8 = 0x2e;
+export def DW_OP_skip: u8 = 0x2f;
+export def DW_OP_lit0: u8 = 0x30;
+export def DW_OP_lit1: u8 = 0x31;
+export def DW_OP_lit2: u8 = 0x32;
+export def DW_OP_lit3: u8 = 0x33;
+export def DW_OP_lit4: u8 = 0x34;
+export def DW_OP_lit5: u8 = 0x35;
+export def DW_OP_lit6: u8 = 0x36;
+export def DW_OP_lit7: u8 = 0x37;
+export def DW_OP_lit8: u8 = 0x38;
+export def DW_OP_lit9: u8 = 0x39;
+export def DW_OP_lit10: u8 = 0x3a;
+export def DW_OP_lit11: u8 = 0x3b;
+export def DW_OP_lit12: u8 = 0x3c;
+export def DW_OP_lit13: u8 = 0x3d;
+export def DW_OP_lit14: u8 = 0x3e;
+export def DW_OP_lit15: u8 = 0x3f;
+export def DW_OP_lit16: u8 = 0x40;
+export def DW_OP_lit17: u8 = 0x41;
+export def DW_OP_lit18: u8 = 0x42;
+export def DW_OP_lit19: u8 = 0x43;
+export def DW_OP_lit20: u8 = 0x44;
+export def DW_OP_lit21: u8 = 0x45;
+export def DW_OP_lit22: u8 = 0x46;
+export def DW_OP_lit23: u8 = 0x47;
+export def DW_OP_lit24: u8 = 0x48;
+export def DW_OP_lit25: u8 = 0x49;
+export def DW_OP_lit26: u8 = 0x4a;
+export def DW_OP_lit27: u8 = 0x4b;
+export def DW_OP_lit28: u8 = 0x4c;
+export def DW_OP_lit29: u8 = 0x4d;
+export def DW_OP_lit30: u8 = 0x4e;
+export def DW_OP_lit31: u8 = 0x4f;
+export def DW_OP_reg0: u8 = 0x50;
+export def DW_OP_reg1: u8 = 0x51;
+export def DW_OP_reg2: u8 = 0x52;
+export def DW_OP_reg3: u8 = 0x53;
+export def DW_OP_reg4: u8 = 0x54;
+export def DW_OP_reg5: u8 = 0x55;
+export def DW_OP_reg6: u8 = 0x56;
+export def DW_OP_reg7: u8 = 0x57;
+export def DW_OP_reg8: u8 = 0x58;
+export def DW_OP_reg9: u8 = 0x59;
+export def DW_OP_reg10: u8 = 0x5a;
+export def DW_OP_reg11: u8 = 0x5b;
+export def DW_OP_reg12: u8 = 0x5c;
+export def DW_OP_reg13: u8 = 0x5d;
+export def DW_OP_reg14: u8 = 0x5e;
+export def DW_OP_reg15: u8 = 0x5f;
+export def DW_OP_reg16: u8 = 0x60;
+export def DW_OP_reg17: u8 = 0x61;
+export def DW_OP_reg18: u8 = 0x62;
+export def DW_OP_reg19: u8 = 0x63;
+export def DW_OP_reg20: u8 = 0x64;
+export def DW_OP_reg21: u8 = 0x65;
+export def DW_OP_reg22: u8 = 0x66;
+export def DW_OP_reg23: u8 = 0x67;
+export def DW_OP_reg24: u8 = 0x68;
+export def DW_OP_reg25: u8 = 0x69;
+export def DW_OP_reg26: u8 = 0x6a;
+export def DW_OP_reg27: u8 = 0x6b;
+export def DW_OP_reg28: u8 = 0x6c;
+export def DW_OP_reg29: u8 = 0x6d;
+export def DW_OP_reg30: u8 = 0x6e;
+export def DW_OP_reg31: u8 = 0x6f;
+export def DW_OP_breg0: u8 = 0x70;
+export def DW_OP_breg1: u8 = 0x71;
+export def DW_OP_breg2: u8 = 0x72;
+export def DW_OP_breg3: u8 = 0x73;
+export def DW_OP_breg4: u8 = 0x74;
+export def DW_OP_breg5: u8 = 0x75;
+export def DW_OP_breg6: u8 = 0x76;
+export def DW_OP_breg7: u8 = 0x77;
+export def DW_OP_breg8: u8 = 0x78;
+export def DW_OP_breg9: u8 = 0x79;
+export def DW_OP_breg10: u8 = 0x7a;
+export def DW_OP_breg11: u8 = 0x7b;
+export def DW_OP_breg12: u8 = 0x7c;
+export def DW_OP_breg13: u8 = 0x7d;
+export def DW_OP_breg14: u8 = 0x7e;
+export def DW_OP_breg15: u8 = 0x7f;
+export def DW_OP_breg16: u8 = 0x80;
+export def DW_OP_breg17: u8 = 0x81;
+export def DW_OP_breg18: u8 = 0x82;
+export def DW_OP_breg19: u8 = 0x83;
+export def DW_OP_breg20: u8 = 0x84;
+export def DW_OP_breg21: u8 = 0x85;
+export def DW_OP_breg22: u8 = 0x86;
+export def DW_OP_breg23: u8 = 0x87;
+export def DW_OP_breg24: u8 = 0x88;
+export def DW_OP_breg25: u8 = 0x89;
+export def DW_OP_breg26: u8 = 0x8a;
+export def DW_OP_breg27: u8 = 0x8b;
+export def DW_OP_breg28: u8 = 0x8c;
+export def DW_OP_breg29: u8 = 0x8d;
+export def DW_OP_breg30: u8 = 0x8e;
+export def DW_OP_breg31: u8 = 0x8f;
+export def DW_OP_regx: u8 = 0x90;
+export def DW_OP_fbreg: u8 = 0x91;
+export def DW_OP_bregx: u8 = 0x92;
+export def DW_OP_piece: u8 = 0x93;
+export def DW_OP_dref_size: u8 = 0x94;
+export def DW_OP_xdref_size: u8 = 0x95;
+export def DW_OP_nop: u8 = 0x96;
+export def DW_OP_push_object_address: u8 = 0x97;
+export def DW_OP_call2: u8 = 0x98;
+export def DW_OP_call4: u8 = 0x99;
+export def DW_OP_call_ref: u8 = 0x9a;
+export def DW_OP_form_tls_address: u8 = 0x9b;
+export def DW_OP_call_frame_cfa: u8 = 0x9c;
+export def DW_OP_bit_piece: u8 = 0x9d;
+export def DW_OP_implicit_value: u8 = 0x9e;
+export def DW_OP_stack_value: u8 = 0x9f;
+export def DW_OP_implicit_pointer: u8 = 0xa0;
+export def DW_OP_addrx: u8 = 0xa1;
+export def DW_OP_constx: u8 = 0xa2;
+export def DW_OP_entry_value: u8 = 0xa3;
+export def DW_OP_const_type: u8 = 0xa4;
+export def DW_OP_regval_type: u8 = 0xa5;
+export def DW_OP_deref_type: u8 = 0xa6;
+export def DW_OP_xdref_type: u8 = 0xa7;
+export def DW_OP_convert: u8 = 0xa8;
+export def DW_OP_reinterpret: u8 = 0xa9;
+export def DW_OP_lo_user: u8 = 0xe0;
+export def DW_OP_hi_user: u8 = 0xff;
+
+export def DW_LLE_end_of_list: u8 = 0x00;
+export def DW_LLE_base_addressx: u8 = 0x01;
+export def DW_LLE_startx_endx: u8 = 0x02;
+export def DW_LLE_startx_length: u8 = 0x03;
+export def DW_LLE_offset_pair: u8 = 0x04;
+export def DW_LLE_default_location: u8 = 0x05;
+export def DW_LLE_base_address: u8 = 0x06;
+export def DW_LLE_start_end: u8 = 0x07;
+export def DW_LLE_start_length: u8 = 0x08;
+
+export def DW_ATE_address: u8 = 0x01;
+export def DW_ATE_boolean: u8 = 0x02;
+export def DW_ATE_complex_float: u8 = 0x03;
+export def DW_ATE_float: u8 = 0x04;
+export def DW_ATE_signed: u8 = 0x05;
+export def DW_ATE_signed_char: u8 = 0x06;
+export def DW_ATE_unsigned: u8 = 0x07;
+export def DW_ATE_unsigned_char: u8 = 0x08;
+export def DW_ATE_imaginary_float: u8 = 0x09;
+export def DW_ATE_packed_decimal: u8 = 0x0a;
+export def DW_ATE_numeric_string: u8 = 0x0b;
+export def DW_ATE_edited: u8 = 0x0c;
+export def DW_ATE_signed_fixed: u8 = 0x0d;
+export def DW_ATE_unsigned_fixed: u8 = 0x0e;
+export def DW_ATE_decimal_float: u8 = 0x0f;
+export def DW_ATE_UTF: u8 = 0x10;
+export def DW_ATE_UCS: u8 = 0x11;
+export def DW_ATE_ASCII: u8 = 0x12;
+export def DW_ATE_lo_user: u8 = 0x80;
+export def DW_ATE_hi_user: u8 = 0xff;
+
+export def DW_DS_unsigned: u8 = 0x01;
+export def DW_DS_leading_overpunch: u8 = 0x02;
+export def DW_DS_trailing_overpunch: u8 = 0x03;
+export def DW_DS_leading_separate: u8 = 0x04;
+export def DW_DS_trailing_separate: u8 = 0x05;
+
+export def DW_END_default: u8 = 0x00;
+export def DW_END_big: u8 = 0x01;
+export def DW_END_little: u8 = 0x02;
+export def DW_END_lo_user: u8 = 0x40;
+export def DW_END_hi_user: u8 = 0xff;
+
+export def DW_ACCESS_public: u8 = 0x01;
+export def DW_ACCESS_protected: u8 = 0x02;
+export def DW_ACCESS_private: u8 = 0x03;
+
+export def DW_VIS_local: u8 = 0x01;
+export def DW_VIS_exported: u8 = 0x02;
+export def DW_VIS_qualified: u8 = 0x03;
+
+export def DW_VIRTUALITY_none: u8 = 0x00;
+export def DW_VIRTUALITY_virtual: u8 = 0x01;
+export def DW_VIRTUALITY_pure_virtual: u8 = 0x02;
+
+export def DW_LANG_C89: u16 = 0x0001;
+export def DW_LANG_C: u16 = 0x0002;
+export def DW_LANG_Ada83: u16 = 0x0003;
+export def DW_LANG_C_plus_plus: u16 = 0x0004;
+export def DW_LANG_Cobol74: u16 = 0x0005;
+export def DW_LANG_Cobol85: u16 = 0x0006;
+export def DW_LANG_Fortran77: u16 = 0x0007;
+export def DW_LANG_Fortran90: u16 = 0x0008;
+export def DW_LANG_Pascal83: u16 = 0x0009;
+export def DW_LANG_Modula2: u16 = 0x000a;
+export def DW_LANG_Java: u16 = 0x000b;
+export def DW_LANG_C99: u16 = 0x000c;
+export def DW_LANG_Ada95: u16 = 0x000d;
+export def DW_LANG_Fortran95: u16 = 0x000e;
+export def DW_LANG_PLI: u16 = 0x000f;
+export def DW_LANG_ObjC: u16 = 0x0010;
+export def DW_LANG_ObjC_plus_plus: u16 = 0x0011;
+export def DW_LANG_UPC: u16 = 0x0012;
+export def DW_LANG_D: u16 = 0x0013;
+export def DW_LANG_Python: u16 = 0x0014;
+export def DW_LANG_OpenCL: u16 = 0x0015;
+export def DW_LANG_Go: u16 = 0x0016;
+export def DW_LANG_Modula3: u16 = 0x0017;
+export def DW_LANG_Haskell: u16 = 0x0018;
+export def DW_LANG_C_plus_plus_03: u16 = 0x0019;
+export def DW_LANG_C_plus_plus_11: u16 = 0x001a;
+export def DW_LANG_OCaml: u16 = 0x001b;
+export def DW_LANG_Rust: u16 = 0x001c;
+export def DW_LANG_c11: u16 = 0x001d;
+export def DW_LANG_Swift: u16 = 0x001e;
+export def DW_LANG_Julia: u16 = 0x001f;
+export def DW_LANG_Dylan: u16 = 0x0020;
+export def DW_LANG_C_plus_plus_14: u16 = 0x0021;
+export def DW_LANG_Fortran03: u16 = 0x0022;
+export def DW_LANG_Fortran08: u16 = 0x0023;
+export def DW_LANG_RenderScript: u16 = 0x0024;
+export def DW_LANG_BLISS: u16 = 0x0025;
+export def DW_LANG_lo_user: u16 = 0x8000;
+export def DW_LANG_hi_user: u16 = 0xffff;
+
+export def DW_ADDR_none: uint = 0;
+
+export def DW_ID_case_sensitive: u8 = 0x00;
+export def DW_ID_up_case: u8 = 0x01;
+export def DW_ID_down_case: u8 = 0x02;
+export def DW_ID_case_insensitive: u8 = 0x03;
+
+export def DW_CC_normal: u8 = 0x01;
+export def DW_CC_program: u8 = 0x02;
+export def DW_CC_nocall: u8 = 0x03;
+export def DW_CC_pass_by_reference: u8 = 0x04;
+export def DW_CC_pass_by_value: u8 = 0x05;
+export def DW_CC_lo_user: u8 = 0x40;
+export def DW_CC_hi_user: u8 = 0xff;
+
+export def DW_INL_not_inlined: u8 = 0x00;
+export def DW_INL_inlined: u8 = 0x01;
+export def DW_INL_declared_not_inlined: u8 = 0x02;
+export def DW_INL_declared_inlined: u8 = 0x03;
+
+export def DW_ORD_row_major: u8 = 0x00;
+export def DW_ORD_col_major: u8 = 0x01;
+
+export def DW_DSC_label: u8 = 0x00;
+export def DW_DSC_range: u8 = 0x01;
+
+export def DW_IDX_compile_unit: u16 = 0x01;
+export def DW_IDX_type_unit: u16 = 0x02;
+export def DW_IDX_die_offset: u16 = 0x03;
+export def DW_IDX_parent: u16 = 0x04;
+export def DW_IDX_type_hash: u16 = 0x05;
+export def DW_IDX_lo_user: u16 = 0x2000;
+export def DW_IDX_hi_user: u16 = 0x3fff;
+
+export def DW_DEFAULTED_no: u8 = 0x00;
+export def DW_DEFAULTED_in_class: u8 = 0x01;
+export def DW_DEFAULTED_out_of_class: u8 = 0x02;
+
+export def DW_LNS_copy: u8 = 0x01;
+export def DW_LNS_advance_pc: u8 = 0x02;
+export def DW_LNS_advance_line: u8 = 0x03;
+export def DW_LNS_set_file: u8 = 0x04;
+export def DW_LNS_set_column: u8 = 0x05;
+export def DW_LNS_negate_stmt: u8 = 0x06;
+export def DW_LNS_set_basic_block: u8 = 0x07;
+export def DW_LNS_const_add_pc: u8 = 0x08;
+export def DW_LNS_fixed_advance_pc: u8 = 0x09;
+export def DW_LNS_set_prologue_end: u8 = 0x0a;
+export def DW_LNS_set_epilogue_begin: u8 = 0x0b;
+export def DW_LNS_isa: u8 = 0x0c;
+
+export def DW_LNE_end_sequence: u8 = 0x01;
+export def DW_LNE_set_address: u8 = 0x02;
+export def DW_LNE_define_file: u8 = 0x03;
+export def DW_LNE_set_discriminator: u8 = 0x04;
+export def DW_LNE_lo_user: u8 = 0x80;
+export def DW_LNE_hi_user: u8 = 0xff;
+
+export def DW_LNCT_path: u16 = 0x01;
+export def DW_LNCT_directory_index: u16 = 0x02;
+export def DW_LNCT_timestamp: u16 = 0x03;
+export def DW_LNCT_size: u16 = 0x04;
+export def DW_LNCT_MD5: u16 = 0x05;
+export def DW_LNCT_lo_user: u16 = 0x2000;
+export def DW_LNCT_hi_user: u16 = 0x3ff;
+
+export def DW_MACRO_define: u8 = 0x01;
+export def DW_MACRO_undef: u8 = 0x02;
+export def DW_MACRO_start_file: u8 = 0x03;
+export def DW_MACRO_end_file: u8 = 0x04;
+export def DW_MACRO_define_strp: u8 = 0x05;
+export def DW_MACRO_undef_strp: u8 = 0x06;
+export def DW_MACRO_import: u8 = 0x07;
+export def DW_MACRO_define_sup: u8 = 0x08;
+export def DW_MACRO_undef_sup: u8 = 0x09;
+export def DW_MACRO_import_sup: u8 = 0x0a;
+export def DW_MACRO_define_strx: u8 = 0x0b;
+export def DW_MACRO_undef_strx: u8 = 0x0c;
+export def DW_MACRO_lo_user: u8 = 0xe0;
+export def DW_MACRO_hi_user: u8 = 0xff;
+
+// The following instructions are omitted:
+// DW_CFA_advance_loc
+// DW_CFA_offset
+// DW_CFA_restore
+export def DW_CFA_nop: u8 = 0x00;
+export def DW_CFA_set_loc: u8 = 0x01;
+export def DW_CFA_advance_loc1: u8 = 0x02;
+export def DW_CFA_advance_loc2: u8 = 0x03;
+export def DW_CFA_advance_loc3: u8 = 0x04;
+export def DW_CFA_offset_extended: u8 = 0x05;
+export def DW_CFA_restore_extended: u8 = 0x06;
+export def DW_CFA_undefined: u8 = 0x07;
+export def DW_CFA_same_value: u8 = 0x08;
+export def DW_CFA_register: u8 = 0x09;
+export def DW_CFA_remember_state: u8 = 0x0a;
+export def DW_CFA_restore_state: u8 = 0x0b;
+export def DW_CFA_def_cfa: u8 = 0x0c;
+export def DW_CFA_def_cfa_register: u8 = 0x0d;
+export def DW_CFA_def_cfa_offset: u8 = 0x0e;
+export def DW_CFA_def_cfa_expression: u8 = 0x0f;
+export def DW_CFA_expression: u8 = 0x10;
+export def DW_CFA_offset_extended_sf: u8 = 0x11;
+export def DW_CFA_def_cfa_sf: u8 = 0x12;
+export def DW_CFA_def_cfa_offset_sf: u8 = 0x13;
+export def DW_CFA_val_offset: u8 = 0x14;
+export def DW_CFA_val_offset_sf: u8 = 0x15;
+export def DW_CFA_val_expression: u8 = 0x16;
+export def DW_CFA_lo_user: u8 = 0x1c;
+export def DW_CFA_hi_user: u8 = 0x3f;
+
+export def DW_RLE_end_of_list: u8 = 0x00;
+export def DW_RLE_base_addressx: u8 = 0x01;
+export def DW_RLE_startx_endx: u8 = 0x02;
+export def DW_RLE_startx_length: u8 = 0x03;
+export def DW_RLE_offset_pair: u8 = 0x04;
+export def DW_RLE_base_address: u8 = 0x05;
+export def DW_RLE_start_end: u8 = 0x06;
+export def DW_RLE_start_length: u8 = 0x07;
diff --git a/debug/dwarf/info.ha b/debug/dwarf/info.ha
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use errors;
+use debug::image;
+use format::elf;
+use io;
+use memio;
+
+def INFO_VERSION: u16 = 4;
+
+export type debug_info_reader = struct {
+ image: *image::image,
+ abbrev: abbrev_table,
+ strings: (string_table | void),
+ mem: *memio::stream,
+ rd: *table_reader,
+};
+
+// Reads the debug info from a DWARF image. Returns a [[debug_info_reader]],
+// call [[debug_info_next]] to retrieve the next DIE.
+//
+// Pass the return value to [[debug_info_finish]] after you're done with it.
+export fn read_debug_info(
+ image: *image::image,
+ offs: u64,
+) (debug_info_reader | void | io::error) = {
+ const sec = match (image::section_byname(image, ".debug_info")) {
+ case let sec: *elf::section64 =>
+ yield sec;
+ case null =>
+ return;
+ };
+
+ const memrd = alloc(image::section_reader(image, sec));
+ io::seek(memrd, offs: io::off, io::whence::SET)?;
+
+ const rd = match (new_table_reader(memrd, true)?) {
+ case let rd: table_reader =>
+ yield alloc(rd);
+ case io::EOF =>
+ return;
+ };
+
+ const ver = read_uhalf(rd)!;
+ const abbrev_offs = read_secword(rd)!;
+ const asize = read_ubyte(rd)!;
+ assert(ver <= INFO_VERSION, "debug::dwarf: unsupported .debug_info version");
+ assert(asize == 8, "debug::dwarf: unsupported address size in .debug_info");
+
+ const abbrevs = match (load_abbrevs(image, abbrev_offs)?) {
+ case void => return;
+ case let tab: abbrev_table =>
+ yield tab;
+ };
+
+ return debug_info_reader {
+ image = image,
+ abbrev = abbrevs,
+ strings = load_strings(image)?,
+ mem = memrd,
+ rd = rd,
+ };
+};
+
+// Returns the next debug info [[entry]] (DIE) from a [[debug_info_reader]].
+// Pass the return value to [[entry_finish]] when done.
+export fn debug_info_next(di: *debug_info_reader) (entry | io::EOF) = {
+ if (read_iseof(di.rd)) {
+ return io::EOF;
+ };
+
+ let code = read_uleb128(di.rd)!;
+ for (code == 0) {
+ if (read_iseof(di.rd)) {
+ return io::EOF;
+ };
+ code = read_uleb128(di.rd)!;
+ };
+
+ const ref = get_abbrev(&di.abbrev, code);
+ assert(ref != null, "debug::dwarf: unknown abbreviated tag");
+ return read_die(di, di.rd, ref as *abbrev)!;
+};
+
+// Frees resources associated with a [[debug_info_reader]].
+export fn debug_info_finish(di: *debug_info_reader) void = {
+ free(di.mem);
+ free(di.rd);
+};
+
+// A debug entry.
+export type entry = struct {
+ tag: u32,
+ children: bool,
+ fields: []field,
+};
+
+// Frees resources associated with an [[entry]].
+export fn entry_finish(ent: *entry) void = {
+ free(ent.fields);
+};
+
+// A debug [[entry]] field.
+export type field = struct {
+ attr: u32,
+ form: u32,
+ union {
+ address: uintptr,
+ block: []u8,
+ constant: u64,
+ string: const str,
+ flag: bool,
+ reference: u64,
+ exprloc: []u8,
+ ptr: u64,
+ },
+};
+
+fn read_die(
+ ir: *debug_info_reader,
+ rd: *table_reader,
+ abbrev: *abbrev,
+) (entry | io::error) = {
+ let fields: []field = [];
+ for (let i = 0z; i < len(abbrev.fields); i += 1) {
+ const abf = &abbrev.fields[i];
+ let field = field {
+ attr = abf.attr,
+ form = abf.form,
+ ...
+ };
+ let form = abf.form;
+ for (form == DW_FORM_indirect) {
+ form = read_uleb128(rd)?: u32;
+ };
+
+ // NOTE: Only supports up to DWARF 4 forms for now
+ switch (form) {
+ case DW_FORM_addr =>
+ field.address = read_ulong(rd)?: uintptr;
+ case DW_FORM_block =>
+ field.block = read_slice(rd, read_uleb128(rd)?)?;
+ case DW_FORM_block1 =>
+ field.block = read_slice(rd, read_ubyte(rd)?)?;
+ case DW_FORM_block2 =>
+ field.block = read_slice(rd, read_uhalf(rd)?)?;
+ case DW_FORM_block4 =>
+ field.block = read_slice(rd, read_uword(rd)?)?;
+ case DW_FORM_data1 =>
+ field.constant = read_ubyte(rd)?;
+ case DW_FORM_data2 =>
+ field.constant = read_uhalf(rd)?;
+ case DW_FORM_data4 =>
+ field.constant = read_uword(rd)?;
+ case DW_FORM_data8 =>
+ field.constant = read_ulong(rd)?;
+ case DW_FORM_udata =>
+ field.constant = read_uleb128(rd)?;
+ case DW_FORM_sdata =>
+ field.constant = read_sleb128(rd)?: u64;
+ case DW_FORM_string =>
+ field.string = read_string(rd)?;
+ case DW_FORM_strp =>
+ // TODO: Look up in .debug_strings
+ const offs = read_secword(rd)?;
+ match (ir.strings) {
+ case let tab: string_table =>
+ field.string = get_strp(&tab, offs);
+ case void =>
+ field.string = "(unknown)";
+ };
+ case DW_FORM_flag =>
+ field.flag = read_ubyte(rd)? != 0;
+ case DW_FORM_flag_present =>
+ field.flag = true;
+ case DW_FORM_ref_addr =>
+ field.reference = read_secword(rd)?;
+ case DW_FORM_ref1 =>
+ field.reference = read_ubyte(rd)?;
+ case DW_FORM_ref2 =>
+ field.reference = read_uhalf(rd)?;
+ case DW_FORM_ref4 =>
+ field.reference = read_uword(rd)?;
+ case DW_FORM_ref8 =>
+ field.reference = read_ulong(rd)?;
+ case DW_FORM_ref_udata =>
+ field.reference = read_uleb128(rd)?;
+ case DW_FORM_ref_sig8 =>
+ field.reference = read_ulong(rd)?;
+ case DW_FORM_sec_offset =>
+ field.reference = read_secword(rd)?;
+ case DW_FORM_exprloc =>
+ field.exprloc = read_slice(rd, read_uleb128(rd)?)?;
+ case DW_FORM_indirect => abort();
+ case => return errors::unsupported;
+ };
+
+ append(fields, field);
+ };
+
+ return entry {
+ tag = abbrev.tag,
+ children = abbrev.has_children,
+ fields = fields,
+ };
+};
diff --git a/debug/dwarf/line.ha b/debug/dwarf/line.ha
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use debug::image;
+use errors;
+use format::elf;
+use io;
+use memio;
+
+def LINE_VERSION: u16 = 3;
+
+// Boolean flags for the line number state machine
+export type line_flag = enum uint {
+ NONE = 0,
+ IS_STMT = 1 << 0,
+ BASIC_BLOCK = 1 << 1,
+ END_SEQUENCE = 1 << 2,
+ PROLOGUE_END = 1 << 3,
+ EPILOGUE_BEGIN = 1 << 4,
+};
+
+// Line number program state
+export type line_state = struct {
+ vm_loc: u64,
+ addr: uintptr,
+ op_index: uint,
+ file: uint,
+ line: uint,
+ column: uint,
+ flags: line_flag,
+ isa: uint,
+ discriminator: uint,
+};
+
+// A file with associated line numbers.
+export type line_file = struct {
+ name: str,
+ dir: u64,
+ mtime: u64,
+ length: u64,
+};
+
+// Header information for a .debug_line program.
+export type line_header = struct {
+ min_instr_length: u8,
+ max_ops_per_instr: u8,
+ default_isstmt: bool,
+ line_base: i8,
+ line_range: u8,
+ opcode_base: u8,
+ opcode_lengths: []u8,
+ dirs: []str,
+ files: []line_file,
+};
+
+// Line number program
+export type line_program = struct {
+ mem: *memio::stream,
+ rd: *table_reader,
+ state: line_state,
+ head: line_header,
+};
+
+// Initializes a new line number state machine to run the line number program at
+// the specified offset in .debug_line.
+//
+// Use [[line_step]] to step the state machine, and pass the result to
+// [[line_program_finish]] to free resources associated with the state machine
+// when done using it.
+export fn exec_line_program(
+ image: *image::image,
+ offs: u64,
+) (line_program | void | io::error) = {
+ const sec = match (image::section_byname(image, ".debug_line")) {
+ case let sec: *elf::section64 =>
+ yield sec;
+ case null =>
+ return;
+ };
+ const memrd = alloc(image::section_reader(image, sec));
+ io::seek(memrd, offs: io::off, io::whence::SET)?;
+ const rd = alloc(new_table_reader(memrd, true)? as table_reader);
+
+ // Read program header
+ const ver = read_uhalf(rd)!;
+ assert(ver == 3, "debug::dwarf: unsupported .debug_line version");
+
+ let head = line_header { ... };
+ const head_len = read_secword(rd)?;
+ head.min_instr_length = read_ubyte(rd)?;
+ head.max_ops_per_instr = 1; // Non-VLIW architectures only
+ head.default_isstmt = read_ubyte(rd)? != 0;
+ head.line_base = read_sbyte(rd)?;
+ head.line_range = read_ubyte(rd)?;
+ head.opcode_base = read_ubyte(rd)?;
+
+ // Opcode lengths
+ for (let i = 0u8; i < head.opcode_base - 1; i += 1) {
+ const op = read_ubyte(rd)?;
+ append(head.opcode_lengths, op);
+ };
+
+ // Directories
+ for (true) {
+ const dir = read_string(rd)?;
+ if (len(dir) == 0) {
+ break;
+ };
+ append(head.dirs, dir);
+ };
+
+ // Files
+ for (true) {
+ const name = read_string(rd)?;
+ if (len(name) == 0) {
+ break;
+ };
+ const dir = read_uleb128(rd)?;
+ const mtime = read_uleb128(rd)?;
+ const length = read_uleb128(rd)?;
+ append(head.files, line_file {
+ name = name,
+ dir = dir,
+ mtime = mtime,
+ length = length,
+ });
+ };
+
+ let prog = line_program {
+ mem = memrd,
+ rd = rd,
+ state = line_state { ... },
+ head = head,
+ };
+ line_prog_reset(&prog);
+ return prog;
+};
+
+fn line_prog_reset(prog: *line_program) void = {
+ const head = &prog.head;
+ prog.state = line_state {
+ vm_loc = 0,
+ addr = 0,
+ op_index = 0,
+ file = 1,
+ line = 1,
+ column = 0,
+ flags = if (head.default_isstmt) line_flag::IS_STMT else 0,
+ isa = 0,
+ discriminator = 0,
+ };
+};
+
+// Frees resources associated with a [[line_program]].
+export fn line_program_finish(prog: *line_program) void = {
+ free(prog.mem);
+ free(prog.rd);
+ free(prog.head.opcode_lengths);
+ free(prog.head.dirs);
+ free(prog.head.files);
+};
+
+// Runs the line number state machine until the next COPY instruction.
+export fn line_next(prog: *line_program) (line_state | io::EOF | io::error) = {
+ for (true) {
+ match (line_step(prog)?) {
+ case let state: line_state =>
+ return state;
+ case io::EOF =>
+ return io::EOF;
+ case void => continue;
+ };
+ };
+};
+
+// Step the line number state machine. Returns the current line_state on a copy
+// or end-of-sequence instruction, [[io::EOF]] at the end of the file, or void
+// otherwise.
+export fn line_step(
+ prog: *line_program,
+) (line_state | void | io::EOF | io::error) = {
+ let state = &prog.state;
+ if (read_iseof(prog.rd)) {
+ return io::EOF;
+ };
+ state.vm_loc = read_tell(prog.rd);
+
+ const opcode = read_ubyte(prog.rd)?;
+ if (opcode == 0) {
+ // Extended opcode
+ const length = read_uleb128(prog.rd)?;
+ const opcode = read_ubyte(prog.rd)?;
+ switch (opcode) {
+ case DW_LNE_end_sequence =>
+ let copy = *state;
+ line_prog_reset(prog);
+ return copy;
+ case DW_LNE_set_address =>
+ state.addr = read_ulong(prog.rd)?: uintptr;
+ case DW_LNE_define_file =>
+ const name = read_string(prog.rd)?;
+ const dir = read_uleb128(prog.rd)?;
+ const mtime = read_uleb128(prog.rd)?;
+ const length = read_uleb128(prog.rd)?;
+ append(prog.head.files, line_file {
+ name = name,
+ dir = dir,
+ mtime = mtime,
+ length = length,
+ });
+ state.file = len(prog.head.files): uint;
+ case DW_LNE_set_discriminator =>
+ state.discriminator = read_uleb128(prog.rd)?: uint;
+ case =>
+ // Unknown opcode, skip
+ read_slice(prog.rd, length - 1)?;
+ };
+ } else if (opcode < prog.head.opcode_base) {
+ // Special opcode
+ switch (opcode) {
+ case DW_LNS_copy =>
+ let copy = *state;
+ state.discriminator = 0;
+ state.flags &= ~(
+ line_flag::BASIC_BLOCK |
+ line_flag::PROLOGUE_END |
+ line_flag::EPILOGUE_BEGIN);
+ return copy;
+ case DW_LNS_advance_pc =>
+ const op_adv = read_uleb128(prog.rd)?;
+ state.addr += (prog.head.min_instr_length * op_adv): uintptr;
+ case DW_LNS_advance_line =>
+ const line = state.line: i64;
+ const offs = read_sleb128(prog.rd)?;
+ line += offs;
+ state.line = line: uint;
+ case DW_LNS_set_file =>
+ state.file = read_uleb128(prog.rd)?: uint;
+ case DW_LNS_set_column =>
+ state.column = read_uleb128(prog.rd)?: uint;
+ case DW_LNS_negate_stmt =>
+ state.flags ^= line_flag::IS_STMT;
+ case DW_LNS_set_basic_block =>
+ state.flags |= line_flag::BASIC_BLOCK;
+ case DW_LNS_const_add_pc =>
+ const opcode = 255 - prog.head.opcode_base;
+ const op_adv = opcode / prog.head.line_range;
+ state.addr += (prog.head.min_instr_length * op_adv): uintptr;
+ case DW_LNS_fixed_advance_pc =>
+ state.addr += read_uhalf(prog.rd)?: uintptr;
+ state.op_index = 0;
+ case DW_LNS_set_prologue_end =>
+ state.flags |= line_flag::PROLOGUE_END;
+ case DW_LNS_set_epilogue_begin =>
+ state.flags |= line_flag::EPILOGUE_BEGIN;
+ case DW_LNS_isa =>
+ state.isa = read_uleb128(prog.rd)?: uint;
+ case =>
+ // Unknown opcode, skip
+ const length = prog.head.opcode_lengths[opcode - 1];
+ for (length != 0; length -= 1) {
+ read_uleb128(prog.rd)?;
+ };
+ };
+ } else {
+ const opcode = opcode - prog.head.opcode_base;
+ const op_adv = opcode / prog.head.line_range;
+ state.addr += (prog.head.min_instr_length * op_adv): uintptr;
+ let line = state.line: int;
+ line += prog.head.line_base: int +
+ opcode: int % prog.head.line_range: int;
+ state.line = line: uint;
+ };
+};
diff --git a/debug/dwarf/reader.ha b/debug/dwarf/reader.ha
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use bufio;
+use endian;
+use errors;
+use io;
+use memio;
+use strings;
+use types;
+
+export type table_reader = struct {
+ src: *memio::stream,
+ orig_length: size,
+ length: size,
+ is64: bool,
+};
+
+// Creates a new DWARF table reader.
+//
+// If "read_length" is true, this function will read the length from the start
+// of the table. Returns [[io::EOF]] immediately if there is insufficient data
+// available in the provided I/O handle.
+//
+// The reader will return [[io::underread]] if the DWARF table is truncated.
+fn new_table_reader(
+ in: *memio::stream,
+ read_length: bool,
+) (table_reader | io::EOF | io::error) = {
+ let rd = table_reader {
+ src = in,
+ orig_length = types::SIZE_MAX,
+ length = types::SIZE_MAX,
+ is64 = false,
+ };
+
+ if (read_length) {
+ const word = match (read_uword(&rd)) {
+ case let uw: u32 =>
+ yield uw;
+ case io::underread =>
+ return io::EOF;
+ case let err: io::error =>
+ return err;
+ };
+
+ if (word == 0xffffffff) {
+ rd.is64 = true;
+ const long = match (read_ulong(&rd)) {
+ case let ul: u64 =>
+ yield ul;
+ case let err: io::error =>
+ if (err is io::underread) {
+ return io::EOF;
+ };
+ return err;
+ };
+ rd.length = long: size;
+ } else if (word >= 0xfffffff0) {
+ // Reserved value
+ return errors::invalid;
+ } else {
+ rd.length = word: size;
+ };
+ };
+
+ rd.orig_length = rd.length;
+ return rd;
+};
+
+fn read_iseof(rd: *table_reader) bool = rd.length == 0;
+
+fn read_advance(rd: *table_reader, nbyte: size) (void | io::error) = {
+ if (rd.length < nbyte) {
+ return 0: io::underread;
+ };
+ rd.length -= nbyte;
+};
+
+// Aligns the reader on a given alignment. This function is needed because both
+// binutils and LLVM inexplicably add padding to .debug_aranges to align the
+// first tuple on the address size * 2, despite the fact that this is mentioned
+// nowhere in the DWARF specification and in fact section 7.25 specifically
+// states that DWARF data is not aligned. It took me 6 hours to figure this out.
+fn read_align(rd: *table_reader, alignment: size) (void | io::error) = {
+ let cur = rd.orig_length - rd.length + size(u32);
+ if (rd.is64) {
+ cur += size(u64);
+ };
+
+ const offs = alignment - (cur % alignment);
+ if (offs == 0) {
+ return;
+ };
+ let buf: [128]u8 = [0...];
+ io::readall(rd.src, buf[..offs])?;
+ rd.length -= offs;
+};
+
+// Returns the current location of the reader from the start of the section.
+fn read_tell(rd: *table_reader) size = {
+ const offs = rd.orig_length - rd.length;
+ if (rd.is64) {
+ return offs + size(u32) + size(u64);
+ } else {
+ return offs + size(u32);
+ };
+};
+
+fn read_sbyte(rd: *table_reader) (i8 | io::error) = {
+ read_advance(rd, size(i8))?;
+
+ match (bufio::read_byte(rd.src)?) {
+ case let byte: u8 =>
+ return byte: i8;
+ case io::EOF =>
+ return 0: io::underread;
+ };
+};
+
+fn read_ubyte(rd: *table_reader) (u8 | io::error) = {
+ read_advance(rd, size(u8))?;
+
+ match (bufio::read_byte(rd.src)?) {
+ case let byte: u8 =>
+ return byte;
+ case io::EOF =>
+ return 0: io::underread;
+ };
+};
+
+fn read_uhalf(rd: *table_reader) (u16 | io::error) = {
+ read_advance(rd, size(u16))?;
+
+ let buf: [size(u16)]u8 = [0...];
+ match (io::readall(rd.src, buf)?) {
+ case io::EOF =>
+ return 0: io::underread;
+ case size =>
+ return endian::host.getu16(buf);
+ };
+};
+
+fn read_uword(rd: *table_reader) (u32 | io::error) = {
+ read_advance(rd, size(u32))?;
+
+ let buf: [size(u32)]u8 = [0...];
+ match (io::readall(rd.src, buf)?) {
+ case io::EOF =>
+ return 0: io::underread;
+ case size =>
+ return endian::host.getu32(buf);
+ };
+};
+
+fn read_ulong(rd: *table_reader) (u64 | io::error) = {
+ read_advance(rd, size(u64))?;
+
+ let buf: [size(u64)]u8 = [0...];
+ match (io::readall(rd.src, buf)?) {
+ case io::EOF =>
+ return 0u64: io::underread: io::error;
+ case size =>
+ return endian::host.getu64(buf);
+ };
+};
+
+fn read_secword(rd: *table_reader) (u64 | io::error) = {
+ if (rd.is64) {
+ return read_ulong(rd)?;
+ } else {
+ return read_uword(rd)?: u64;
+ };
+};
+
+fn read_uleb128(rd: *table_reader) (u64 | io::error) = {
+ let bits = 0u64, val = 0u64;
+ for (true) {
+ const x = read_ubyte(rd)?;
+ val |= (x & ~0x80) << bits;
+ if (x & 0x80 == 0) break;
+ bits += 7;
+ };
+ return val;
+};
+
+fn read_sleb128(rd: *table_reader) (i64 | io::error) = {
+ let bits = 0u64, uval = 0u64;
+ for (true) {
+ const x = read_ubyte(rd)?;
+ uval |= (x & ~0x80) << bits;
+ bits += 7;
+ if (x & 0x80 == 0) break;
+ };
+ let val = uval: i64;
+ let bits = bits: i64;
+ if (val & (1 << (bits-1)) != 0) {
+ val |= -1 << bits;
+ };
+ return val;
+};
+
+// Borrowed from underlying source
+fn read_slice(rd: *table_reader, amt: size) ([]u8 | io::error) = {
+ match (memio::borrowedread(rd.src, amt)) {
+ case let sl: []u8 =>
+ rd.length -= len(sl);
+ return sl;
+ case io::EOF =>
+ return 0: io::underread;
+ };
+};
+
+// Borrowed from underlying source
+fn read_string(rd: *table_reader) (const str | io::error) = {
+ // XXX: Leaks, should probably borrow from memio
+ match (bufio::read_tok(rd.src, 0)?) {
+ case let data: []u8 =>
+ rd.length -= len(data) + 1;
+ return strings::fromutf8(data)!;
+ case io::EOF =>
+ return 0: io::underread;
+ };
+};
diff --git a/debug/dwarf/strings.ha b/debug/dwarf/strings.ha
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use debug::image;
+use format::elf;
+use io;
+use types::c;
+
+export type string_table = struct {
+ data: []u8,
+};
+
+// Loads a DWARF string table from .debug_str.
+export fn load_strings(
+ image: *image::image,
+) (string_table | void | io::error) = {
+ const sec = match (image::section_byname(image, ".debug_str")) {
+ case let sec: *elf::section64 =>
+ yield sec;
+ case null =>
+ return;
+ };
+ return string_table {
+ data = image::section_data(image, sec),
+ };
+};
+
+// Returns a string from the string table.
+export fn get_strp(table: *string_table, offs: u64) const str = {
+ const string = &table.data[offs]: *const c::char;
+ return c::tostr(string)!;
+};