8bitworkshop/src/common/binutils.ts

1253 lines
40 KiB
TypeScript

/*
* Copyright (c) 2024 Steven E. Hugg
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
function getASCII(view: DataView, offset: number): string {
let s = '';
let i = offset;
while (view.getUint8(i) !== 0) {
s += String.fromCharCode(view.getUint8(i));
i++;
}
return s;
}
// https://blog.k3170makan.com/2018/09/introduction-to-elf-format-elf-header.html
// https://chromium.googlesource.com/breakpad/breakpad/+/linux-dwarf/src/common/dwarf/dwarf2reader.cc
// https://wiki.osdev.org/DWARF
// https://dwarfstd.org/doc/dwarf-2.0.0.pdf
export class ELFParser {
readonly dataView: DataView;
readonly sectionHeaders: ElfSectionHeader[];
readonly symbolTable: ElfSymbolTableEntry[];
readonly entry: number;
constructor(data: Uint8Array) {
this.dataView = new DataView(data.buffer);
this.sectionHeaders = [];
this.symbolTable = [];
const elfHeader = new DataView(this.dataView.buffer, 0, 52);
// check magic #
const magic = elfHeader.getInt32(0, true);
if (magic !== 0x464c457f) {
throw new Error('Invalid ELF header');
}
// only 32 bit supported
if (elfHeader.getUint8(4) !== 1) {
throw new Error('Only 32-bit ELF supported');
}
// check version = 1
if (elfHeader.getUint8(6) !== 1) {
throw new Error('Invalid ELF version');
}
// get endianness
const endian = elfHeader.getUint8(5) === 1;
if (!endian) {
throw new Error('Big endian not supported');
}
// get entryPoint
this.entry = elfHeader.getUint32(24, endian);
// Parse ELF header and extract section header offset
const sectionHeaderOffset = this.dataView.getUint32(32, endian);
// get section header size
const sectionHeaderSize = this.dataView.getUint16(46, endian);
// get # of section headers
const sectionHeaderCount = this.dataView.getUint16(48, endian);
// get index of section with names
const sectionNameIndex = this.dataView.getUint16(50, endian);
// Parse section headers
for (let i = 0; i < sectionHeaderCount; i++) {
const offset = sectionHeaderOffset + i * sectionHeaderSize; // Each section header is 40 bytes
//const sectionView = new DataView(this.dataView.buffer, offset, sectionHeaderSize);
const section = new ElfSectionHeader(this.dataView, offset);
this.sectionHeaders.push(section);
}
const sectionNameSection = this.sectionHeaders[sectionNameIndex];
if (!sectionNameSection) {
throw new Error('Invalid ELF section name table');
} else {
const sectionNameView = sectionNameSection.contents;
for (let i = 0; i < sectionHeaderCount; i++) {
this.sectionHeaders[i].stringView = sectionNameView;
}
}
// Extract the string table
const stringTableSection = this.getSection('.strtab', ElfSectionType.STRTAB);
if (stringTableSection) {
const stringView = stringTableSection.contents;
// Find the symbol table section and string table section
const symbolTableSection = this.getSection('.symtab', ElfSectionType.SYMTAB);
if (symbolTableSection) {
// Extract the symbol table
const symbolTableOffset = symbolTableSection.offset;
const symbolTableSize = symbolTableSection.size;
const symbolTableEntryCount = symbolTableSize / 16;
//const symbolTable = new DataView(this.dataView.buffer, symbolTableOffset, symbolTableSize);
for (let i = 0; i < symbolTableEntryCount; i++) {
const offset = symbolTableOffset + i * 16;
const entry = new ElfSymbolTableEntry(this.dataView, offset, stringView);
this.symbolTable.push(entry);
}
}
}
}
getSymbols(): ElfSymbolTableEntry[] {
return this.symbolTable;
}
getSection(name: string, type?: number): ElfSectionHeader | null {
if (typeof type === 'number') {
return this.sectionHeaders.find((section) => section.name === name && section.type === type) || null;
} else {
return this.sectionHeaders.find((section) => section.name === name) || null;
}
}
}
enum ElfSectionType {
SYMTAB = 2,
STRTAB = 3,
}
class ElfSectionHeader {
readonly type: number;
stringView: DataView | null = null;
constructor(readonly dataView: DataView, readonly headerOffset: number) {
this.type = this.dataView.getUint32(this.headerOffset + 0x4, true);
}
get flags(): number {
return this.dataView.getUint32(this.headerOffset + 0x8, true);
}
get vmaddr(): number {
return this.dataView.getUint32(this.headerOffset + 0xc, true);
}
get offset(): number {
return this.dataView.getUint32(this.headerOffset + 0x10, true);
}
get size(): number {
return this.dataView.getUint32(this.headerOffset + 0x14, true);
}
get nameOffset(): number {
return this.dataView.getUint32(this.headerOffset + 0x0, true);
}
get name(): string {
return getASCII(this.stringView!, this.nameOffset);
}
get contents(): DataView {
return new DataView(this.dataView.buffer, this.offset, this.size);
}
}
class ElfSymbolTableEntry {
constructor(readonly dataView: DataView,
readonly entryOffset: number,
readonly stringView: DataView) {
}
get nameOffset(): number {
return this.dataView.getUint32(this.entryOffset, true);
}
get name(): string {
return getASCII(this.stringView, this.nameOffset);
}
get value(): number {
return this.dataView.getUint32(this.entryOffset + 4, true);
}
get size(): number {
return this.dataView.getUint32(this.entryOffset + 8, true);
}
get info(): number {
return this.dataView.getUint8(this.entryOffset + 12);
}
get other(): number {
return this.dataView.getUint8(this.entryOffset + 13);
}
}
// https://dwarfstd.org/doc/Debugging%20using%20DWARF-2012.pdf
// https://dwarfstd.org/doc/DWARF5.pdf
// Tag names and codes.
enum DwarfTag {
DW_TAG_padding = 0x00,
DW_TAG_array_type = 0x01,
DW_TAG_class_type = 0x02,
DW_TAG_entry_point = 0x03,
DW_TAG_enumeration_type = 0x04,
DW_TAG_formal_parameter = 0x05,
DW_TAG_imported_declaration = 0x08,
DW_TAG_label = 0x0a,
DW_TAG_lexical_block = 0x0b,
DW_TAG_member = 0x0d,
DW_TAG_pointer_type = 0x0f,
DW_TAG_reference_type = 0x10,
DW_TAG_compile_unit = 0x11,
DW_TAG_string_type = 0x12,
DW_TAG_structure_type = 0x13,
DW_TAG_subroutine_type = 0x15,
DW_TAG_typedef = 0x16,
DW_TAG_union_type = 0x17,
DW_TAG_unspecified_parameters = 0x18,
DW_TAG_variant = 0x19,
DW_TAG_common_block = 0x1a,
DW_TAG_common_inclusion = 0x1b,
DW_TAG_inheritance = 0x1c,
DW_TAG_inlined_subroutine = 0x1d,
DW_TAG_module = 0x1e,
DW_TAG_ptr_to_member_type = 0x1f,
DW_TAG_set_type = 0x20,
DW_TAG_subrange_type = 0x21,
DW_TAG_with_stmt = 0x22,
DW_TAG_access_declaration = 0x23,
DW_TAG_base_type = 0x24,
DW_TAG_catch_block = 0x25,
DW_TAG_const_type = 0x26,
DW_TAG_constant = 0x27,
DW_TAG_enumerator = 0x28,
DW_TAG_file_type = 0x29,
DW_TAG_friend = 0x2a,
DW_TAG_namelist = 0x2b,
DW_TAG_namelist_item = 0x2c,
DW_TAG_packed_type = 0x2d,
DW_TAG_subprogram = 0x2e,
DW_TAG_template_type_param = 0x2f,
DW_TAG_template_value_param = 0x30,
DW_TAG_thrown_type = 0x31,
DW_TAG_try_block = 0x32,
DW_TAG_variant_part = 0x33,
DW_TAG_variable = 0x34,
DW_TAG_volatile_type = 0x35,
// DWARF 3.
DW_TAG_dwarf_procedure = 0x36,
DW_TAG_restrict_type = 0x37,
DW_TAG_interface_type = 0x38,
DW_TAG_namespace = 0x39,
DW_TAG_imported_module = 0x3a,
DW_TAG_unspecified_type = 0x3b,
DW_TAG_partial_unit = 0x3c,
DW_TAG_imported_unit = 0x3d,
// SGI/MIPS Extensions.
DW_TAG_MIPS_loop = 0x4081,
// HP extensions. See:
// ftp://ftp.hp.com/pub/lang/tools/WDB/wdb-4.0.tar.gz
DW_TAG_HP_array_descriptor = 0x4090,
// GNU extensions.
DW_TAG_format_label = 0x4101, // For FORTRAN 77 and Fortran 90.
DW_TAG_function_template = 0x4102, // For C++.
DW_TAG_class_template = 0x4103, // For C++.
DW_TAG_GNU_BINCL = 0x4104,
DW_TAG_GNU_EINCL = 0x4105,
// Extensions for UPC. See: http://upc.gwu.edu/~upc.
DW_TAG_upc_shared_type = 0x8765,
DW_TAG_upc_strict_type = 0x8766,
DW_TAG_upc_relaxed_type = 0x8767,
// PGI (STMicroelectronics) extensions. No documentation available.
DW_TAG_PGI_kanji_type = 0xA000,
DW_TAG_PGI_interface_block = 0xA020
};
enum DwarfHasChild {
DW_children_no = 0,
DW_children_yes = 1
};
// Form names and codes.
enum DwarfForm {
DW_FORM_addr = 0x01,
DW_FORM_block2 = 0x03,
DW_FORM_block4 = 0x04,
DW_FORM_data2 = 0x05,
DW_FORM_data4 = 0x06,
DW_FORM_data8 = 0x07,
DW_FORM_string = 0x08,
DW_FORM_block = 0x09,
DW_FORM_block1 = 0x0a,
DW_FORM_data1 = 0x0b,
DW_FORM_flag = 0x0c,
DW_FORM_sdata = 0x0d,
DW_FORM_strp = 0x0e,
DW_FORM_udata = 0x0f,
DW_FORM_ref_addr = 0x10,
DW_FORM_ref1 = 0x11,
DW_FORM_ref2 = 0x12,
DW_FORM_ref4 = 0x13,
DW_FORM_ref8 = 0x14,
DW_FORM_ref_udata = 0x15,
DW_FORM_indirect = 0x16
};
// Attribute names and codes
enum DwarfAttribute {
DW_AT_sibling = 0x01,
DW_AT_location = 0x02,
DW_AT_name = 0x03,
DW_AT_ordering = 0x09,
DW_AT_subscr_data = 0x0a,
DW_AT_byte_size = 0x0b,
DW_AT_bit_offset = 0x0c,
DW_AT_bit_size = 0x0d,
DW_AT_element_list = 0x0f,
DW_AT_stmt_list = 0x10,
DW_AT_low_pc = 0x11,
DW_AT_high_pc = 0x12,
DW_AT_language = 0x13,
DW_AT_member = 0x14,
DW_AT_discr = 0x15,
DW_AT_discr_value = 0x16,
DW_AT_visibility = 0x17,
DW_AT_import = 0x18,
DW_AT_string_length = 0x19,
DW_AT_common_reference = 0x1a,
DW_AT_comp_dir = 0x1b,
DW_AT_const_value = 0x1c,
DW_AT_containing_type = 0x1d,
DW_AT_default_value = 0x1e,
DW_AT_inline = 0x20,
DW_AT_is_optional = 0x21,
DW_AT_lower_bound = 0x22,
DW_AT_producer = 0x25,
DW_AT_prototyped = 0x27,
DW_AT_return_addr = 0x2a,
DW_AT_start_scope = 0x2c,
DW_AT_stride_size = 0x2e,
DW_AT_upper_bound = 0x2f,
DW_AT_abstract_origin = 0x31,
DW_AT_accessibility = 0x32,
DW_AT_address_class = 0x33,
DW_AT_artificial = 0x34,
DW_AT_base_types = 0x35,
DW_AT_calling_convention = 0x36,
DW_AT_count = 0x37,
DW_AT_data_member_location = 0x38,
DW_AT_decl_column = 0x39,
DW_AT_decl_file = 0x3a,
DW_AT_decl_line = 0x3b,
DW_AT_declaration = 0x3c,
DW_AT_discr_list = 0x3d,
DW_AT_encoding = 0x3e,
DW_AT_external = 0x3f,
DW_AT_frame_base = 0x40,
DW_AT_friend = 0x41,
DW_AT_identifier_case = 0x42,
DW_AT_macro_info = 0x43,
DW_AT_namelist_items = 0x44,
DW_AT_priority = 0x45,
DW_AT_segment = 0x46,
DW_AT_specification = 0x47,
DW_AT_static_link = 0x48,
DW_AT_type = 0x49,
DW_AT_use_location = 0x4a,
DW_AT_variable_parameter = 0x4b,
DW_AT_virtuality = 0x4c,
DW_AT_vtable_elem_location = 0x4d,
// DWARF 3 values.
DW_AT_allocated = 0x4e,
DW_AT_associated = 0x4f,
DW_AT_data_location = 0x50,
DW_AT_stride = 0x51,
DW_AT_entry_pc = 0x52,
DW_AT_use_UTF8 = 0x53,
DW_AT_extension = 0x54,
DW_AT_ranges = 0x55,
DW_AT_trampoline = 0x56,
DW_AT_call_column = 0x57,
DW_AT_call_file = 0x58,
DW_AT_call_line = 0x59,
// SGI/MIPS extensions.
DW_AT_MIPS_fde = 0x2001,
DW_AT_MIPS_loop_begin = 0x2002,
DW_AT_MIPS_tail_loop_begin = 0x2003,
DW_AT_MIPS_epilog_begin = 0x2004,
DW_AT_MIPS_loop_unroll_factor = 0x2005,
DW_AT_MIPS_software_pipeline_depth = 0x2006,
DW_AT_MIPS_linkage_name = 0x2007,
DW_AT_MIPS_stride = 0x2008,
DW_AT_MIPS_abstract_name = 0x2009,
DW_AT_MIPS_clone_origin = 0x200a,
DW_AT_MIPS_has_inlines = 0x200b,
// HP extensions.
DW_AT_HP_block_index = 0x2000,
DW_AT_HP_unmodifiable = 0x2001, // Same as DW_AT_MIPS_fde.
DW_AT_HP_actuals_stmt_list = 0x2010,
DW_AT_HP_proc_per_section = 0x2011,
DW_AT_HP_raw_data_ptr = 0x2012,
DW_AT_HP_pass_by_reference = 0x2013,
DW_AT_HP_opt_level = 0x2014,
DW_AT_HP_prof_version_id = 0x2015,
DW_AT_HP_opt_flags = 0x2016,
DW_AT_HP_cold_region_low_pc = 0x2017,
DW_AT_HP_cold_region_high_pc = 0x2018,
DW_AT_HP_all_variables_modifiable = 0x2019,
DW_AT_HP_linkage_name = 0x201a,
DW_AT_HP_prof_flags = 0x201b, // In comp unit of procs_info for -g.
// GNU extensions.
DW_AT_sf_names = 0x2101,
DW_AT_src_info = 0x2102,
DW_AT_mac_info = 0x2103,
DW_AT_src_coords = 0x2104,
DW_AT_body_begin = 0x2105,
DW_AT_body_end = 0x2106,
DW_AT_GNU_vector = 0x2107,
// VMS extensions.
DW_AT_VMS_rtnbeg_pd_address = 0x2201,
// UPC extension.
DW_AT_upc_threads_scaled = 0x3210,
// PGI (STMicroelectronics) extensions.
DW_AT_PGI_lbase = 0x3a00,
DW_AT_PGI_soffset = 0x3a01,
DW_AT_PGI_lstride = 0x3a02
};
// Line number opcodes.
enum DwarfLineNumberOps {
DW_LNS_extended_op = 0,
DW_LNS_copy = 1,
DW_LNS_advance_pc = 2,
DW_LNS_advance_line = 3,
DW_LNS_set_file = 4,
DW_LNS_set_column = 5,
DW_LNS_negate_stmt = 6,
DW_LNS_set_basic_block = 7,
DW_LNS_const_add_pc = 8,
DW_LNS_fixed_advance_pc = 9,
// DWARF 3.
DW_LNS_set_prologue_end = 10,
DW_LNS_set_epilogue_begin = 11,
DW_LNS_set_isa = 12
};
// Line number extended opcodes.
enum DwarfLineNumberExtendedOps {
DW_LNE_end_sequence = 1,
DW_LNE_set_address = 2,
DW_LNE_define_file = 3,
// HP extensions.
DW_LNE_HP_negate_is_UV_update = 0x11,
DW_LNE_HP_push_context = 0x12,
DW_LNE_HP_pop_context = 0x13,
DW_LNE_HP_set_file_line_column = 0x14,
DW_LNE_HP_set_routine_name = 0x15,
DW_LNE_HP_set_sequence = 0x16,
DW_LNE_HP_negate_post_semantics = 0x17,
DW_LNE_HP_negate_function_exit = 0x18,
DW_LNE_HP_negate_front_end_logical = 0x19,
DW_LNE_HP_define_proc = 0x20
};
// Type encoding names and codes
enum DwarfEncoding {
DW_ATE_address = 0x1,
DW_ATE_boolean = 0x2,
DW_ATE_complex_float = 0x3,
DW_ATE_float = 0x4,
DW_ATE_signed = 0x5,
DW_ATE_signed_char = 0x6,
DW_ATE_unsigned = 0x7,
DW_ATE_unsigned_char = 0x8,
// DWARF3/DWARF3f
DW_ATE_imaginary_float = 0x9,
DW_ATE_packed_decimal = 0xa,
DW_ATE_numeric_string = 0xb,
DW_ATE_edited = 0xc,
DW_ATE_signed_fixed = 0xd,
DW_ATE_unsigned_fixed = 0xe,
DW_ATE_decimal_float = 0xf,
DW_ATE_lo_user = 0x80,
DW_ATE_hi_user = 0xff
};
// Location virtual machine opcodes
enum DwarfOpcode {
DW_OP_addr = 0x03,
DW_OP_deref = 0x06,
DW_OP_const1u = 0x08,
DW_OP_const1s = 0x09,
DW_OP_const2u = 0x0a,
DW_OP_const2s = 0x0b,
DW_OP_const4u = 0x0c,
DW_OP_const4s = 0x0d,
DW_OP_const8u = 0x0e,
DW_OP_const8s = 0x0f,
DW_OP_constu = 0x10,
DW_OP_consts = 0x11,
DW_OP_dup = 0x12,
DW_OP_drop = 0x13,
DW_OP_over = 0x14,
DW_OP_pick = 0x15,
DW_OP_swap = 0x16,
DW_OP_rot = 0x17,
DW_OP_xderef = 0x18,
DW_OP_abs = 0x19,
DW_OP_and = 0x1a,
DW_OP_div = 0x1b,
DW_OP_minus = 0x1c,
DW_OP_mod = 0x1d,
DW_OP_mul = 0x1e,
DW_OP_neg = 0x1f,
DW_OP_not = 0x20,
DW_OP_or = 0x21,
DW_OP_plus = 0x22,
DW_OP_plus_uconst = 0x23,
DW_OP_shl = 0x24,
DW_OP_shr = 0x25,
DW_OP_shra = 0x26,
DW_OP_xor = 0x27,
DW_OP_bra = 0x28,
DW_OP_eq = 0x29,
DW_OP_ge = 0x2a,
DW_OP_gt = 0x2b,
DW_OP_le = 0x2c,
DW_OP_lt = 0x2d,
DW_OP_ne = 0x2e,
DW_OP_skip = 0x2f,
DW_OP_lit0 = 0x30,
DW_OP_lit1 = 0x31,
DW_OP_lit2 = 0x32,
DW_OP_lit3 = 0x33,
DW_OP_lit4 = 0x34,
DW_OP_lit5 = 0x35,
DW_OP_lit6 = 0x36,
DW_OP_lit7 = 0x37,
DW_OP_lit8 = 0x38,
DW_OP_lit9 = 0x39,
DW_OP_lit10 = 0x3a,
DW_OP_lit11 = 0x3b,
DW_OP_lit12 = 0x3c,
DW_OP_lit13 = 0x3d,
DW_OP_lit14 = 0x3e,
DW_OP_lit15 = 0x3f,
DW_OP_lit16 = 0x40,
DW_OP_lit17 = 0x41,
DW_OP_lit18 = 0x42,
DW_OP_lit19 = 0x43,
DW_OP_lit20 = 0x44,
DW_OP_lit21 = 0x45,
DW_OP_lit22 = 0x46,
DW_OP_lit23 = 0x47,
DW_OP_lit24 = 0x48,
DW_OP_lit25 = 0x49,
DW_OP_lit26 = 0x4a,
DW_OP_lit27 = 0x4b,
DW_OP_lit28 = 0x4c,
DW_OP_lit29 = 0x4d,
DW_OP_lit30 = 0x4e,
DW_OP_lit31 = 0x4f,
DW_OP_reg0 = 0x50,
DW_OP_reg1 = 0x51,
DW_OP_reg2 = 0x52,
DW_OP_reg3 = 0x53,
DW_OP_reg4 = 0x54,
DW_OP_reg5 = 0x55,
DW_OP_reg6 = 0x56,
DW_OP_reg7 = 0x57,
DW_OP_reg8 = 0x58,
DW_OP_reg9 = 0x59,
DW_OP_reg10 = 0x5a,
DW_OP_reg11 = 0x5b,
DW_OP_reg12 = 0x5c,
DW_OP_reg13 = 0x5d,
DW_OP_reg14 = 0x5e,
DW_OP_reg15 = 0x5f,
DW_OP_reg16 = 0x60,
DW_OP_reg17 = 0x61,
DW_OP_reg18 = 0x62,
DW_OP_reg19 = 0x63,
DW_OP_reg20 = 0x64,
DW_OP_reg21 = 0x65,
DW_OP_reg22 = 0x66,
DW_OP_reg23 = 0x67,
DW_OP_reg24 = 0x68,
DW_OP_reg25 = 0x69,
DW_OP_reg26 = 0x6a,
DW_OP_reg27 = 0x6b,
DW_OP_reg28 = 0x6c,
DW_OP_reg29 = 0x6d,
DW_OP_reg30 = 0x6e,
DW_OP_reg31 = 0x6f,
DW_OP_breg0 = 0x70,
DW_OP_breg1 = 0x71,
DW_OP_breg2 = 0x72,
DW_OP_breg3 = 0x73,
DW_OP_breg4 = 0x74,
DW_OP_breg5 = 0x75,
DW_OP_breg6 = 0x76,
DW_OP_breg7 = 0x77,
DW_OP_breg8 = 0x78,
DW_OP_breg9 = 0x79,
DW_OP_breg10 = 0x7a,
DW_OP_breg11 = 0x7b,
DW_OP_breg12 = 0x7c,
DW_OP_breg13 = 0x7d,
DW_OP_breg14 = 0x7e,
DW_OP_breg15 = 0x7f,
DW_OP_breg16 = 0x80,
DW_OP_breg17 = 0x81,
DW_OP_breg18 = 0x82,
DW_OP_breg19 = 0x83,
DW_OP_breg20 = 0x84,
DW_OP_breg21 = 0x85,
DW_OP_breg22 = 0x86,
DW_OP_breg23 = 0x87,
DW_OP_breg24 = 0x88,
DW_OP_breg25 = 0x89,
DW_OP_breg26 = 0x8a,
DW_OP_breg27 = 0x8b,
DW_OP_breg28 = 0x8c,
DW_OP_breg29 = 0x8d,
DW_OP_breg30 = 0x8e,
DW_OP_breg31 = 0x8f,
DW_OP_regX = 0x90,
DW_OP_fbreg = 0x91,
DW_OP_bregX = 0x92,
DW_OP_piece = 0x93,
DW_OP_deref_size = 0x94,
DW_OP_xderef_size = 0x95,
DW_OP_nop = 0x96,
// DWARF3/DWARF3f
DW_OP_push_object_address = 0x97,
DW_OP_call2 = 0x98,
DW_OP_call4 = 0x99,
DW_OP_call_ref = 0x9a,
DW_OP_form_tls_address = 0x9b,
DW_OP_call_frame_cfa = 0x9c,
DW_OP_bit_piece = 0x9d,
DW_OP_lo_user = 0xe0,
DW_OP_hi_user = 0xff,
// GNU extensions
DW_OP_GNU_push_tls_address = 0xe0
};
// Source languages. These are values for DW_AT_language.
enum DwarfLanguage {
DW_LANG_none = 0x0000,
DW_LANG_C89 = 0x0001,
DW_LANG_C = 0x0002,
DW_LANG_Ada83 = 0x0003,
DW_LANG_C_plus_plus = 0x0004,
DW_LANG_Cobol74 = 0x0005,
DW_LANG_Cobol85 = 0x0006,
DW_LANG_Fortran77 = 0x0007,
DW_LANG_Fortran90 = 0x0008,
DW_LANG_Pascal83 = 0x0009,
DW_LANG_Modula2 = 0x000a,
DW_LANG_Java = 0x000b,
DW_LANG_C99 = 0x000c,
DW_LANG_Ada95 = 0x000d,
DW_LANG_Fortran95 = 0x000e,
DW_LANG_PLI = 0x000f,
DW_LANG_ObjC = 0x0010,
DW_LANG_ObjC_plus_plus = 0x0011,
DW_LANG_UPC = 0x0012,
DW_LANG_D = 0x0013,
// Implementation-defined language code range.
DW_LANG_lo_user = 0x8000,
DW_LANG_hi_user = 0xffff,
// Extensions.
// MIPS assembly language. The GNU toolchain uses this for all
// assembly languages, since there's no generic DW_LANG_ value for that.
DW_LANG_Mips_Assembler = 0x8001,
DW_LANG_Upc = 0x8765 // Unified Parallel C
};
class ByteReader {
addressSize: number = 4;
offsetSize: number = 4;
offset: number = 0;
constructor(readonly view: DataView, readonly littleEndian: boolean) {
}
isEOF(): boolean {
return this.offset >= this.view.byteLength;
}
readOneByte(): number {
const value = this.view.getUint8(this.offset);
this.offset += 1;
return value;
}
readTwoBytes(): number {
const value = this.view.getUint16(this.offset, this.littleEndian);
this.offset += 2;
return value;
}
readFourBytes(): number {
const value = this.view.getUint32(this.offset, this.littleEndian);
this.offset += 4;
return value;
}
readEightBytes(): bigint {
const value = this.view.getBigUint64(this.offset, this.littleEndian);
this.offset += 8;
return value;
}
readUnsignedLEB128(): bigint {
let result = BigInt(0);
let shift = BigInt(0);
while (true) {
const byte = this.readOneByte();
result |= BigInt(byte & 0x7f) << shift;
if ((byte & 0x80) === 0) {
break;
}
shift += BigInt(7);
}
return result;
}
readSignedLEB128(): bigint {
let result = BigInt(0);
let shift = BigInt(0);
let byte = 0;
while (true) {
byte = this.readOneByte();
result |= BigInt(byte & 0x7f) << shift;
shift += BigInt(7);
if ((byte & 0x80) === 0) {
break;
}
}
if ((byte & 0x40) !== 0) {
// Sign extend if the highest bit of the last byte is set.
result |= -(BigInt(1) << shift);
}
return result;
}
readOffset(): number | bigint {
if (this.offsetSize === 4) {
const value = this.readFourBytes();
return value;
} else if (this.offsetSize === 8) {
const value = this.readEightBytes();
return value;
} else {
throw new Error('Invalid offset size');
}
}
readAddress(): number | bigint {
if (this.addressSize === 4) {
const value = this.readFourBytes();
return value;
} else if (this.addressSize === 8) {
const value = this.readEightBytes();
return value;
} else {
throw new Error('Invalid address size');
}
}
readInitialLength(): bigint | number {
const initial_length = this.readFourBytes();
// In DWARF2/3, if the initial length is all 1 bits, then the offset
// size is 8 and we need to read the next 8 bytes for the real length.
if (initial_length === 0xffffffff) {
this.offsetSize = 8;
return this.readEightBytes();
} else {
this.offsetSize = 4;
return initial_length;
}
}
readString(): string {
let result = '';
while (true) {
const byte = this.readOneByte();
if (byte === 0) {
break;
}
result += String.fromCharCode(byte);
}
return result;
}
slice(offset: number, length: number): DataView {
return new DataView(this.view.buffer, this.view.byteOffset + offset, length);
}
readByteArray(length: number): Uint8Array {
const result = new Uint8Array(length);
for (let i = 0; i < length; i++) {
result[i] = this.readOneByte();
}
return result;
}
}
export class DWARFParser {
units: DWARFCompilationUnit[] = [];
lineInfos: DWARFLineInfo[] = [];
constructor(readonly elf: ELFParser) {
// fetch DWARF v2 sections
//this.aranges = elf.getSection('.debug_aranges');
// parse compilation units
const abbrev = elf.getSection('.debug_abbrev');
const info = elf.getSection('.debug_info');
const debugstrs = elf.getSection('.debug_str') || elf.getSection('__debug_str');
const infoReader = new ByteReader(info.contents, true);
while (!infoReader.isEOF()) {
const compilationUnit = new DWARFCompilationUnit(infoReader, debugstrs.contents);
// must be either skip() or read()
compilationUnit.read(abbrev.contents);
this.units.push(compilationUnit);
compilationUnit.dispose();
}
const linedata = elf.getSection('.debug_line');
const lineReader = new ByteReader(linedata.contents, true);
while (!lineReader.isEOF()) {
const lineInfo = new DWARFLineInfo(lineReader);
// must be either skip() or read()
lineInfo.readLines();
this.lineInfos.push(lineInfo);
lineInfo.dispose();
}
}
}
class DWARFCompilationUnit {
headerLength: number;
contentLength: number;
contentOffset: number;
abbrevOffset: number;
abbrevs: Abbrev[] = [];
constructor(protected infoReader: ByteReader, protected debugstrs: DataView) {
const baseOffset = infoReader.offset;
const length = infoReader.readInitialLength();
const version = infoReader.readTwoBytes();
this.abbrevOffset = Number(infoReader.readOffset());
const address_size = infoReader.readOneByte();
this.headerLength = infoReader.offset - baseOffset;
if (version != 2) throw new Error('DWARF version ' + version + ' not supported');
if (address_size !== 4) throw new Error('Address size ' + address_size + ' not supported');
this.contentLength = Number(length) - this.headerLength + 4;
this.contentOffset = infoReader.offset;
//const info = new DWARFCompilationUnit(buffer, reader.offset, address_size);
}
dispose() {
this.infoReader = null;
this.debugstrs = null;
this.abbrevs = null;
}
skip() {
this.infoReader.offset += this.contentLength;
}
read(abbrev: DataView) {
// parse the abbreviations
let abbrevReader = new ByteReader(abbrev, true);
abbrevReader.offset = this.abbrevOffset;
this.abbrevs = parseAbbrevs(abbrevReader);
// extract slice with DIEs
const slice = this.infoReader.slice(this.contentOffset, this.contentLength);
this.processDIEs(new ByteReader(slice, true));
// skip to next cu section
this.skip();
}
processDIEs(reader: ByteReader) {
let die_stack = [];
// TODO: capture tree structure
while (!reader.isEOF()) {
let absolute_offset = reader.offset + this.contentOffset;
let abbrev_num = Number(reader.readUnsignedLEB128());
//console.log('DIE', absolute_offset.toString(16), abbrev_num);
if (abbrev_num == 0) {
if (die_stack.length == 0) throw new Error('DIE stack underflow @ offset ' + reader.offset);
die_stack.pop();
continue;
}
let abbrev = this.abbrevs[abbrev_num - 1];
if (!abbrev) throw new Error('Invalid abbreviation number ' + abbrev_num);
let obj = this.processDIE(reader, abbrev);
if (abbrev.has_children) {
die_stack.push(obj);
}
}
}
processDIE(reader: ByteReader, abbrev: Abbrev) {
//console.log('processDIE', abbrev);
let obj = {};
// iterate through attributes
for (let attr of abbrev.attributes) {
let form = attr.form;
let value = this.processAttribute(reader, form);
obj[DwarfAttribute[attr.attr]] = value;
}
//console.log(obj);
return obj;
}
processAttribute(reader: ByteReader, form: DwarfForm) {
switch (form) {
case DwarfForm.DW_FORM_data1:
case DwarfForm.DW_FORM_flag:
case DwarfForm.DW_FORM_ref1:
return reader.readOneByte();
case DwarfForm.DW_FORM_data2:
case DwarfForm.DW_FORM_ref2:
return reader.readTwoBytes();
case DwarfForm.DW_FORM_data4:
case DwarfForm.DW_FORM_ref4:
case DwarfForm.DW_FORM_addr:
case DwarfForm.DW_FORM_ref_addr:
return reader.readFourBytes();
case DwarfForm.DW_FORM_data8:
case DwarfForm.DW_FORM_ref8:
return reader.readEightBytes();
case DwarfForm.DW_FORM_string:
return reader.readString();
case DwarfForm.DW_FORM_udata:
case DwarfForm.DW_FORM_ref_udata:
return reader.readUnsignedLEB128();
case DwarfForm.DW_FORM_sdata:
return reader.readSignedLEB128();
case DwarfForm.DW_FORM_strp:
// read from strtab
let offset = Number(reader.readOffset());
return this.getStringFrom(this.debugstrs, offset);
case DwarfForm.DW_FORM_block1:
let len = reader.readOneByte();
return reader.readByteArray(len);
default:
throw new Error('Unsupported form ' + form);
}
}
getStringFrom(strtab: DataView, offset: number): string {
let result = '';
while (true) {
const byte = strtab.getUint8(offset);
if (byte === 0) {
break;
}
result += String.fromCharCode(byte);
offset += 1;
}
return result;
}
}
// https://chromium.googlesource.com/breakpad/breakpad/+/linux-dwarf/src/common/dwarf
interface FunctionInfo {
name: string;
mangled_name: string;
file: string;
line: number;
lowpc: number;
highpc: number;
}
interface SourceFileInfo {
name: string;
lowpc: number;
}
interface Abbrev {
number: number;
tag: DwarfTag;
has_children: boolean;
attributes: { attr: number, form: number }[];
}
class CompilationUnitHeader {
length = 0
version = 0
abbrev_offset = 0;
address_size = 0;
}
interface LineInfo {
file: string;
line: number;
column: number;
address: number;
is_stmt: boolean;
basic_block: boolean;
end_sequence: boolean;
}
class LineStateMachine {
file_num: number;
address: number;
line_num: number;
column_num: number;
is_stmt: boolean;
basic_block: boolean;
end_sequence: boolean;
constructor(default_is_stmt: boolean) {
this.Reset(default_is_stmt);
}
Reset(default_is_stmt: boolean): void {
this.file_num = 1;
this.address = 0;
this.line_num = 1;
this.column_num = 0;
this.is_stmt = default_is_stmt;
this.basic_block = false;
this.end_sequence = false;
}
}
// Read a DWARF2/3 abbreviation section.
// Each abbrev consists of a abbreviation number, a tag, a byte
// specifying whether the tag has children, and a list of
// attribute/form pairs.
// The list of forms is terminated by a 0 for the attribute, and a
// zero for the form. The entire abbreviation section is terminated
// by a zero for the code.
function parseAbbrevs(reader: ByteReader): Abbrev[] {
const abbrevs = [];
while (!reader.isEOF()) {
const number = Number(reader.readUnsignedLEB128());
if (number == 0) break;
const tag = Number(reader.readUnsignedLEB128());
const has_children = reader.readOneByte() !== 0;
const attributes = [];
while (true) {
const attr = Number(reader.readUnsignedLEB128());
const form = Number(reader.readUnsignedLEB128());
if (attr === 0 && form === 0) {
break;
}
attributes.push({ attr, form });
}
const abbrev: Abbrev = {
number,
tag: tag as DwarfTag,
has_children,
attributes,
};
abbrevs.push(abbrev);
}
return abbrevs;
}
interface DWARFFile {
dir_index: number;
name: string;
mod_time: number;
file_length: number;
lines: LineInfo[];
}
class DWARFLineInfo {
directories: string[];
files: DWARFFile[];
contentOffset: number;
contentLength: number;
opData: DataView;
opReader: ByteReader;
line_base: number;
line_range: number;
opcode_base: number;
min_insn_length: number;
default_is_stmt: boolean;
lsm: LineStateMachine;
constructor(protected headerReader: ByteReader) {
this.readHeader();
}
dispose() {
this.headerReader = null;
this.opData = null;
this.opReader = null;
this.lsm = null;
}
readHeader() {
const length = this.headerReader.readInitialLength();
const baseOffset1 = this.headerReader.offset;
const version = this.headerReader.readTwoBytes();
if (version != 2) throw new Error('DWARF version ' + version + ' not supported');
const prologue_length = this.headerReader.readOffset();
const baseOffset2 = this.headerReader.offset;
this.min_insn_length = this.headerReader.readOneByte();
this.default_is_stmt = this.headerReader.readOneByte() !== 0;
this.line_base = this.headerReader.readOneByte(); // signed
if (this.line_base >= 0x80) {
this.line_base -= 0x100;
}
this.line_range = this.headerReader.readOneByte();
const opcode_base = this.opcode_base = this.headerReader.readOneByte();
const std_opcode_lengths = new Array(opcode_base + 1);
for (let i = 1; i < opcode_base; i++) {
std_opcode_lengths[i] = this.headerReader.readOneByte();
}
// It is legal for the directory entry table to be empty.
this.directories = [null];
while (true) {
const name = this.headerReader.readString();
if (name === '') {
break;
}
this.directories.push(name);
}
// It is also legal for the file entry table to be empty.
this.files = [null];
while (true) {
const name = this.headerReader.readString();
if (name === '') {
break;
}
const dir_index = Number(this.headerReader.readUnsignedLEB128());
const mod_time = Number(this.headerReader.readUnsignedLEB128());
const file_length = Number(this.headerReader.readUnsignedLEB128());
this.files.push({ name, dir_index, mod_time, file_length, lines: [] });
}
this.contentOffset = baseOffset2 + Number(prologue_length);
this.contentLength = Number(length) - (this.contentOffset - baseOffset1);
}
skip() {
this.headerReader.offset = this.contentOffset + this.contentLength;
}
readLines() {
this.opData = this.headerReader.slice(this.contentOffset, this.contentLength);
this.opReader = new ByteReader(this.opData, true);
this.lsm = new LineStateMachine(this.default_is_stmt);
while (!this.opReader.isEOF()) {
let add_line = this.processOneOpcode();
if (this.lsm.end_sequence) {
this.lsm.Reset(this.default_is_stmt);
} else if (add_line) {
let line: LineInfo = {
file: this.files[this.lsm.file_num].name,
line: this.lsm.line_num,
column: this.lsm.column_num,
address: this.lsm.address,
is_stmt: this.lsm.is_stmt,
basic_block: this.lsm.basic_block,
end_sequence: this.lsm.end_sequence,
};
this.files[this.lsm.file_num].lines.push(line);
//console.log(line);
}
}
this.skip();
}
processOneOpcode() : boolean {
let opcode = this.opReader.readOneByte();
// If the opcode is great than the opcode_base, it is a special
// opcode. Most line programs consist mainly of special opcodes.
if (opcode >= this.opcode_base) {
opcode -= this.opcode_base;
let advance_address = Math.floor(opcode / this.line_range) * this.min_insn_length;
let advance_line = (opcode % this.line_range) + this.line_base;
//console.log('advance', advance_address, advance_line, this.lsm);
this.checkPassPC();
this.lsm.address += advance_address;
this.lsm.line_num += advance_line;
this.lsm.basic_block = true;
return true;
}
// Otherwise, we have the regular opcodes
//console.log('opcode', opcode, this.lsm);
switch (opcode) {
case DwarfLineNumberOps.DW_LNS_copy: {
this.lsm.basic_block = false;
return true;
}
case DwarfLineNumberOps.DW_LNS_advance_pc: {
const advance_address = this.opReader.readUnsignedLEB128();
this.checkPassPC();
this.lsm.address += this.min_insn_length * Number(advance_address);
break;
}
case DwarfLineNumberOps.DW_LNS_advance_line: {
this.lsm.line_num += Number(this.opReader.readSignedLEB128());
break;
}
case DwarfLineNumberOps.DW_LNS_set_file: {
this.lsm.file_num = Number(this.opReader.readUnsignedLEB128());
break;
}
case DwarfLineNumberOps.DW_LNS_set_column: {
this.lsm.column_num = Number(this.opReader.readUnsignedLEB128());
break;
}
case DwarfLineNumberOps.DW_LNS_negate_stmt: {
this.lsm.is_stmt = !this.lsm.is_stmt;
break;
}
case DwarfLineNumberOps.DW_LNS_set_basic_block: {
this.lsm.basic_block = true;
break;
}
case DwarfLineNumberOps.DW_LNS_fixed_advance_pc: {
const advance_address = this.opReader.readTwoBytes();
this.checkPassPC();
this.lsm.address += advance_address;
break;
}
case DwarfLineNumberOps.DW_LNS_const_add_pc: {
const advance_address = this.min_insn_length * ((255 - this.opcode_base) / this.line_range);
this.checkPassPC();
this.lsm.address += advance_address;
break;
}
case DwarfLineNumberOps.DW_LNS_set_prologue_end: {
break;
}
case DwarfLineNumberOps.DW_LNS_set_epilogue_begin: {
break;
}
case DwarfLineNumberOps.DW_LNS_extended_op: {
const extended_op_len = this.opReader.readUnsignedLEB128();
const extended_op = this.opReader.readOneByte();
//console.log('extended', extended_op, extended_op_len);
switch (extended_op) {
case DwarfLineNumberExtendedOps.DW_LNE_end_sequence:
this.lsm.end_sequence = true;
return true;
case DwarfLineNumberExtendedOps.DW_LNE_set_address:
this.lsm.address = Number(this.opReader.readAddress());
break;
case DwarfLineNumberExtendedOps.DW_LNE_define_file:
// TODO
break;
//case DwarfLineNumberExtendedOps.DW_LNE_set_discriminator:
// TODO
break;
default:
//console.log('Unknown DWARF extended opcode ' + extended_op);
this.opReader.offset += Number(extended_op_len);
break;
}
break;
}
default:
console.log('Unknown DWARF opcode ' + opcode);
break;
}
}
checkPassPC() {
/*
// Check if the lsm passes "pc". If so, mark it as passed.
if (lsm_passes_pc &&
lsm->address <= pc && pc < lsm->address + advance_address) {
*lsm_passes_pc = true;
}
*/
}
}