diff --git a/src/common/binutils.ts b/src/common/binutils.ts index 5cebb460..4d881dbd 100644 --- a/src/common/binutils.ts +++ b/src/common/binutils.ts @@ -31,7 +31,9 @@ function getASCII(view: DataView, offset: number): string { } // https://blog.k3170makan.com/2018/09/introduction-to-elf-format-elf-header.html -// https://dwarfstd.org/doc/DWARF5.pdf +// https://chromium.googlesource.com/breakpad/breakpad/+/linux-dwarf/src/common/dwarf/dwarf2reader.cc +// https://wiki.osdev.org/DWARF +// https://dwarfstd.org/doc/dwarf-2.0.0.pdf export class ELFParser { readonly dataView: DataView; @@ -98,7 +100,6 @@ export class ELFParser { const stringView = stringTableSection.contents; // Find the symbol table section and string table section const symbolTableSection = this.getSection('.symtab', ElfSectionType.SYMTAB); - console.log('symbolTableSection', symbolTableSection); if (symbolTableSection) { // Extract the symbol table const symbolTableOffset = symbolTableSection.offset; @@ -191,42 +192,1061 @@ class ElfSymbolTableEntry { // https://dwarfstd.org/doc/Debugging%20using%20DWARF-2012.pdf // https://dwarfstd.org/doc/DWARF5.pdf +// Tag names and codes. +enum DwarfTag { + DW_TAG_padding = 0x00, + DW_TAG_array_type = 0x01, + DW_TAG_class_type = 0x02, + DW_TAG_entry_point = 0x03, + DW_TAG_enumeration_type = 0x04, + DW_TAG_formal_parameter = 0x05, + DW_TAG_imported_declaration = 0x08, + DW_TAG_label = 0x0a, + DW_TAG_lexical_block = 0x0b, + DW_TAG_member = 0x0d, + DW_TAG_pointer_type = 0x0f, + DW_TAG_reference_type = 0x10, + DW_TAG_compile_unit = 0x11, + DW_TAG_string_type = 0x12, + DW_TAG_structure_type = 0x13, + DW_TAG_subroutine_type = 0x15, + DW_TAG_typedef = 0x16, + DW_TAG_union_type = 0x17, + DW_TAG_unspecified_parameters = 0x18, + DW_TAG_variant = 0x19, + DW_TAG_common_block = 0x1a, + DW_TAG_common_inclusion = 0x1b, + DW_TAG_inheritance = 0x1c, + DW_TAG_inlined_subroutine = 0x1d, + DW_TAG_module = 0x1e, + DW_TAG_ptr_to_member_type = 0x1f, + DW_TAG_set_type = 0x20, + DW_TAG_subrange_type = 0x21, + DW_TAG_with_stmt = 0x22, + DW_TAG_access_declaration = 0x23, + DW_TAG_base_type = 0x24, + DW_TAG_catch_block = 0x25, + DW_TAG_const_type = 0x26, + DW_TAG_constant = 0x27, + DW_TAG_enumerator = 0x28, + DW_TAG_file_type = 0x29, + DW_TAG_friend = 0x2a, + DW_TAG_namelist = 0x2b, + DW_TAG_namelist_item = 0x2c, + DW_TAG_packed_type = 0x2d, + DW_TAG_subprogram = 0x2e, + DW_TAG_template_type_param = 0x2f, + DW_TAG_template_value_param = 0x30, + DW_TAG_thrown_type = 0x31, + DW_TAG_try_block = 0x32, + DW_TAG_variant_part = 0x33, + DW_TAG_variable = 0x34, + DW_TAG_volatile_type = 0x35, + // DWARF 3. + DW_TAG_dwarf_procedure = 0x36, + DW_TAG_restrict_type = 0x37, + DW_TAG_interface_type = 0x38, + DW_TAG_namespace = 0x39, + DW_TAG_imported_module = 0x3a, + DW_TAG_unspecified_type = 0x3b, + DW_TAG_partial_unit = 0x3c, + DW_TAG_imported_unit = 0x3d, + // SGI/MIPS Extensions. + DW_TAG_MIPS_loop = 0x4081, + // HP extensions. See: + // ftp://ftp.hp.com/pub/lang/tools/WDB/wdb-4.0.tar.gz + DW_TAG_HP_array_descriptor = 0x4090, + // GNU extensions. + DW_TAG_format_label = 0x4101, // For FORTRAN 77 and Fortran 90. + DW_TAG_function_template = 0x4102, // For C++. + DW_TAG_class_template = 0x4103, // For C++. + DW_TAG_GNU_BINCL = 0x4104, + DW_TAG_GNU_EINCL = 0x4105, + // Extensions for UPC. See: http://upc.gwu.edu/~upc. + DW_TAG_upc_shared_type = 0x8765, + DW_TAG_upc_strict_type = 0x8766, + DW_TAG_upc_relaxed_type = 0x8767, + // PGI (STMicroelectronics) extensions. No documentation available. + DW_TAG_PGI_kanji_type = 0xA000, + DW_TAG_PGI_interface_block = 0xA020 +}; +enum DwarfHasChild { + DW_children_no = 0, + DW_children_yes = 1 +}; +// Form names and codes. +enum DwarfForm { + DW_FORM_addr = 0x01, + DW_FORM_block2 = 0x03, + DW_FORM_block4 = 0x04, + DW_FORM_data2 = 0x05, + DW_FORM_data4 = 0x06, + DW_FORM_data8 = 0x07, + DW_FORM_string = 0x08, + DW_FORM_block = 0x09, + DW_FORM_block1 = 0x0a, + DW_FORM_data1 = 0x0b, + DW_FORM_flag = 0x0c, + DW_FORM_sdata = 0x0d, + DW_FORM_strp = 0x0e, + DW_FORM_udata = 0x0f, + DW_FORM_ref_addr = 0x10, + DW_FORM_ref1 = 0x11, + DW_FORM_ref2 = 0x12, + DW_FORM_ref4 = 0x13, + DW_FORM_ref8 = 0x14, + DW_FORM_ref_udata = 0x15, + DW_FORM_indirect = 0x16 +}; +// Attribute names and codes +enum DwarfAttribute { + DW_AT_sibling = 0x01, + DW_AT_location = 0x02, + DW_AT_name = 0x03, + DW_AT_ordering = 0x09, + DW_AT_subscr_data = 0x0a, + DW_AT_byte_size = 0x0b, + DW_AT_bit_offset = 0x0c, + DW_AT_bit_size = 0x0d, + DW_AT_element_list = 0x0f, + DW_AT_stmt_list = 0x10, + DW_AT_low_pc = 0x11, + DW_AT_high_pc = 0x12, + DW_AT_language = 0x13, + DW_AT_member = 0x14, + DW_AT_discr = 0x15, + DW_AT_discr_value = 0x16, + DW_AT_visibility = 0x17, + DW_AT_import = 0x18, + DW_AT_string_length = 0x19, + DW_AT_common_reference = 0x1a, + DW_AT_comp_dir = 0x1b, + DW_AT_const_value = 0x1c, + DW_AT_containing_type = 0x1d, + DW_AT_default_value = 0x1e, + DW_AT_inline = 0x20, + DW_AT_is_optional = 0x21, + DW_AT_lower_bound = 0x22, + DW_AT_producer = 0x25, + DW_AT_prototyped = 0x27, + DW_AT_return_addr = 0x2a, + DW_AT_start_scope = 0x2c, + DW_AT_stride_size = 0x2e, + DW_AT_upper_bound = 0x2f, + DW_AT_abstract_origin = 0x31, + DW_AT_accessibility = 0x32, + DW_AT_address_class = 0x33, + DW_AT_artificial = 0x34, + DW_AT_base_types = 0x35, + DW_AT_calling_convention = 0x36, + DW_AT_count = 0x37, + DW_AT_data_member_location = 0x38, + DW_AT_decl_column = 0x39, + DW_AT_decl_file = 0x3a, + DW_AT_decl_line = 0x3b, + DW_AT_declaration = 0x3c, + DW_AT_discr_list = 0x3d, + DW_AT_encoding = 0x3e, + DW_AT_external = 0x3f, + DW_AT_frame_base = 0x40, + DW_AT_friend = 0x41, + DW_AT_identifier_case = 0x42, + DW_AT_macro_info = 0x43, + DW_AT_namelist_items = 0x44, + DW_AT_priority = 0x45, + DW_AT_segment = 0x46, + DW_AT_specification = 0x47, + DW_AT_static_link = 0x48, + DW_AT_type = 0x49, + DW_AT_use_location = 0x4a, + DW_AT_variable_parameter = 0x4b, + DW_AT_virtuality = 0x4c, + DW_AT_vtable_elem_location = 0x4d, + // DWARF 3 values. + DW_AT_allocated = 0x4e, + DW_AT_associated = 0x4f, + DW_AT_data_location = 0x50, + DW_AT_stride = 0x51, + DW_AT_entry_pc = 0x52, + DW_AT_use_UTF8 = 0x53, + DW_AT_extension = 0x54, + DW_AT_ranges = 0x55, + DW_AT_trampoline = 0x56, + DW_AT_call_column = 0x57, + DW_AT_call_file = 0x58, + DW_AT_call_line = 0x59, + // SGI/MIPS extensions. + DW_AT_MIPS_fde = 0x2001, + DW_AT_MIPS_loop_begin = 0x2002, + DW_AT_MIPS_tail_loop_begin = 0x2003, + DW_AT_MIPS_epilog_begin = 0x2004, + DW_AT_MIPS_loop_unroll_factor = 0x2005, + DW_AT_MIPS_software_pipeline_depth = 0x2006, + DW_AT_MIPS_linkage_name = 0x2007, + DW_AT_MIPS_stride = 0x2008, + DW_AT_MIPS_abstract_name = 0x2009, + DW_AT_MIPS_clone_origin = 0x200a, + DW_AT_MIPS_has_inlines = 0x200b, + // HP extensions. + DW_AT_HP_block_index = 0x2000, + DW_AT_HP_unmodifiable = 0x2001, // Same as DW_AT_MIPS_fde. + DW_AT_HP_actuals_stmt_list = 0x2010, + DW_AT_HP_proc_per_section = 0x2011, + DW_AT_HP_raw_data_ptr = 0x2012, + DW_AT_HP_pass_by_reference = 0x2013, + DW_AT_HP_opt_level = 0x2014, + DW_AT_HP_prof_version_id = 0x2015, + DW_AT_HP_opt_flags = 0x2016, + DW_AT_HP_cold_region_low_pc = 0x2017, + DW_AT_HP_cold_region_high_pc = 0x2018, + DW_AT_HP_all_variables_modifiable = 0x2019, + DW_AT_HP_linkage_name = 0x201a, + DW_AT_HP_prof_flags = 0x201b, // In comp unit of procs_info for -g. + // GNU extensions. + DW_AT_sf_names = 0x2101, + DW_AT_src_info = 0x2102, + DW_AT_mac_info = 0x2103, + DW_AT_src_coords = 0x2104, + DW_AT_body_begin = 0x2105, + DW_AT_body_end = 0x2106, + DW_AT_GNU_vector = 0x2107, + // VMS extensions. + DW_AT_VMS_rtnbeg_pd_address = 0x2201, + // UPC extension. + DW_AT_upc_threads_scaled = 0x3210, + // PGI (STMicroelectronics) extensions. + DW_AT_PGI_lbase = 0x3a00, + DW_AT_PGI_soffset = 0x3a01, + DW_AT_PGI_lstride = 0x3a02 +}; +// Line number opcodes. +enum DwarfLineNumberOps { + DW_LNS_extended_op = 0, + DW_LNS_copy = 1, + DW_LNS_advance_pc = 2, + DW_LNS_advance_line = 3, + DW_LNS_set_file = 4, + DW_LNS_set_column = 5, + DW_LNS_negate_stmt = 6, + DW_LNS_set_basic_block = 7, + DW_LNS_const_add_pc = 8, + DW_LNS_fixed_advance_pc = 9, + // DWARF 3. + DW_LNS_set_prologue_end = 10, + DW_LNS_set_epilogue_begin = 11, + DW_LNS_set_isa = 12 +}; +// Line number extended opcodes. +enum DwarfLineNumberExtendedOps { + DW_LNE_end_sequence = 1, + DW_LNE_set_address = 2, + DW_LNE_define_file = 3, + // HP extensions. + DW_LNE_HP_negate_is_UV_update = 0x11, + DW_LNE_HP_push_context = 0x12, + DW_LNE_HP_pop_context = 0x13, + DW_LNE_HP_set_file_line_column = 0x14, + DW_LNE_HP_set_routine_name = 0x15, + DW_LNE_HP_set_sequence = 0x16, + DW_LNE_HP_negate_post_semantics = 0x17, + DW_LNE_HP_negate_function_exit = 0x18, + DW_LNE_HP_negate_front_end_logical = 0x19, + DW_LNE_HP_define_proc = 0x20 +}; +// Type encoding names and codes +enum DwarfEncoding { + DW_ATE_address = 0x1, + DW_ATE_boolean = 0x2, + DW_ATE_complex_float = 0x3, + DW_ATE_float = 0x4, + DW_ATE_signed = 0x5, + DW_ATE_signed_char = 0x6, + DW_ATE_unsigned = 0x7, + DW_ATE_unsigned_char = 0x8, + // DWARF3/DWARF3f + DW_ATE_imaginary_float = 0x9, + DW_ATE_packed_decimal = 0xa, + DW_ATE_numeric_string = 0xb, + DW_ATE_edited = 0xc, + DW_ATE_signed_fixed = 0xd, + DW_ATE_unsigned_fixed = 0xe, + DW_ATE_decimal_float = 0xf, + DW_ATE_lo_user = 0x80, + DW_ATE_hi_user = 0xff +}; +// Location virtual machine opcodes +enum DwarfOpcode { + DW_OP_addr = 0x03, + DW_OP_deref = 0x06, + DW_OP_const1u = 0x08, + DW_OP_const1s = 0x09, + DW_OP_const2u = 0x0a, + DW_OP_const2s = 0x0b, + DW_OP_const4u = 0x0c, + DW_OP_const4s = 0x0d, + DW_OP_const8u = 0x0e, + DW_OP_const8s = 0x0f, + DW_OP_constu = 0x10, + DW_OP_consts = 0x11, + DW_OP_dup = 0x12, + DW_OP_drop = 0x13, + DW_OP_over = 0x14, + DW_OP_pick = 0x15, + DW_OP_swap = 0x16, + DW_OP_rot = 0x17, + DW_OP_xderef = 0x18, + DW_OP_abs = 0x19, + DW_OP_and = 0x1a, + DW_OP_div = 0x1b, + DW_OP_minus = 0x1c, + DW_OP_mod = 0x1d, + DW_OP_mul = 0x1e, + DW_OP_neg = 0x1f, + DW_OP_not = 0x20, + DW_OP_or = 0x21, + DW_OP_plus = 0x22, + DW_OP_plus_uconst = 0x23, + DW_OP_shl = 0x24, + DW_OP_shr = 0x25, + DW_OP_shra = 0x26, + DW_OP_xor = 0x27, + DW_OP_bra = 0x28, + DW_OP_eq = 0x29, + DW_OP_ge = 0x2a, + DW_OP_gt = 0x2b, + DW_OP_le = 0x2c, + DW_OP_lt = 0x2d, + DW_OP_ne = 0x2e, + DW_OP_skip = 0x2f, + DW_OP_lit0 = 0x30, + DW_OP_lit1 = 0x31, + DW_OP_lit2 = 0x32, + DW_OP_lit3 = 0x33, + DW_OP_lit4 = 0x34, + DW_OP_lit5 = 0x35, + DW_OP_lit6 = 0x36, + DW_OP_lit7 = 0x37, + DW_OP_lit8 = 0x38, + DW_OP_lit9 = 0x39, + DW_OP_lit10 = 0x3a, + DW_OP_lit11 = 0x3b, + DW_OP_lit12 = 0x3c, + DW_OP_lit13 = 0x3d, + DW_OP_lit14 = 0x3e, + DW_OP_lit15 = 0x3f, + DW_OP_lit16 = 0x40, + DW_OP_lit17 = 0x41, + DW_OP_lit18 = 0x42, + DW_OP_lit19 = 0x43, + DW_OP_lit20 = 0x44, + DW_OP_lit21 = 0x45, + DW_OP_lit22 = 0x46, + DW_OP_lit23 = 0x47, + DW_OP_lit24 = 0x48, + DW_OP_lit25 = 0x49, + DW_OP_lit26 = 0x4a, + DW_OP_lit27 = 0x4b, + DW_OP_lit28 = 0x4c, + DW_OP_lit29 = 0x4d, + DW_OP_lit30 = 0x4e, + DW_OP_lit31 = 0x4f, + DW_OP_reg0 = 0x50, + DW_OP_reg1 = 0x51, + DW_OP_reg2 = 0x52, + DW_OP_reg3 = 0x53, + DW_OP_reg4 = 0x54, + DW_OP_reg5 = 0x55, + DW_OP_reg6 = 0x56, + DW_OP_reg7 = 0x57, + DW_OP_reg8 = 0x58, + DW_OP_reg9 = 0x59, + DW_OP_reg10 = 0x5a, + DW_OP_reg11 = 0x5b, + DW_OP_reg12 = 0x5c, + DW_OP_reg13 = 0x5d, + DW_OP_reg14 = 0x5e, + DW_OP_reg15 = 0x5f, + DW_OP_reg16 = 0x60, + DW_OP_reg17 = 0x61, + DW_OP_reg18 = 0x62, + DW_OP_reg19 = 0x63, + DW_OP_reg20 = 0x64, + DW_OP_reg21 = 0x65, + DW_OP_reg22 = 0x66, + DW_OP_reg23 = 0x67, + DW_OP_reg24 = 0x68, + DW_OP_reg25 = 0x69, + DW_OP_reg26 = 0x6a, + DW_OP_reg27 = 0x6b, + DW_OP_reg28 = 0x6c, + DW_OP_reg29 = 0x6d, + DW_OP_reg30 = 0x6e, + DW_OP_reg31 = 0x6f, + DW_OP_breg0 = 0x70, + DW_OP_breg1 = 0x71, + DW_OP_breg2 = 0x72, + DW_OP_breg3 = 0x73, + DW_OP_breg4 = 0x74, + DW_OP_breg5 = 0x75, + DW_OP_breg6 = 0x76, + DW_OP_breg7 = 0x77, + DW_OP_breg8 = 0x78, + DW_OP_breg9 = 0x79, + DW_OP_breg10 = 0x7a, + DW_OP_breg11 = 0x7b, + DW_OP_breg12 = 0x7c, + DW_OP_breg13 = 0x7d, + DW_OP_breg14 = 0x7e, + DW_OP_breg15 = 0x7f, + DW_OP_breg16 = 0x80, + DW_OP_breg17 = 0x81, + DW_OP_breg18 = 0x82, + DW_OP_breg19 = 0x83, + DW_OP_breg20 = 0x84, + DW_OP_breg21 = 0x85, + DW_OP_breg22 = 0x86, + DW_OP_breg23 = 0x87, + DW_OP_breg24 = 0x88, + DW_OP_breg25 = 0x89, + DW_OP_breg26 = 0x8a, + DW_OP_breg27 = 0x8b, + DW_OP_breg28 = 0x8c, + DW_OP_breg29 = 0x8d, + DW_OP_breg30 = 0x8e, + DW_OP_breg31 = 0x8f, + DW_OP_regX = 0x90, + DW_OP_fbreg = 0x91, + DW_OP_bregX = 0x92, + DW_OP_piece = 0x93, + DW_OP_deref_size = 0x94, + DW_OP_xderef_size = 0x95, + DW_OP_nop = 0x96, + // DWARF3/DWARF3f + DW_OP_push_object_address = 0x97, + DW_OP_call2 = 0x98, + DW_OP_call4 = 0x99, + DW_OP_call_ref = 0x9a, + DW_OP_form_tls_address = 0x9b, + DW_OP_call_frame_cfa = 0x9c, + DW_OP_bit_piece = 0x9d, + DW_OP_lo_user = 0xe0, + DW_OP_hi_user = 0xff, + // GNU extensions + DW_OP_GNU_push_tls_address = 0xe0 +}; +// Source languages. These are values for DW_AT_language. +enum DwarfLanguage { + DW_LANG_none = 0x0000, + DW_LANG_C89 = 0x0001, + DW_LANG_C = 0x0002, + DW_LANG_Ada83 = 0x0003, + DW_LANG_C_plus_plus = 0x0004, + DW_LANG_Cobol74 = 0x0005, + DW_LANG_Cobol85 = 0x0006, + DW_LANG_Fortran77 = 0x0007, + DW_LANG_Fortran90 = 0x0008, + DW_LANG_Pascal83 = 0x0009, + DW_LANG_Modula2 = 0x000a, + DW_LANG_Java = 0x000b, + DW_LANG_C99 = 0x000c, + DW_LANG_Ada95 = 0x000d, + DW_LANG_Fortran95 = 0x000e, + DW_LANG_PLI = 0x000f, + DW_LANG_ObjC = 0x0010, + DW_LANG_ObjC_plus_plus = 0x0011, + DW_LANG_UPC = 0x0012, + DW_LANG_D = 0x0013, + // Implementation-defined language code range. + DW_LANG_lo_user = 0x8000, + DW_LANG_hi_user = 0xffff, + // Extensions. + // MIPS assembly language. The GNU toolchain uses this for all + // assembly languages, since there's no generic DW_LANG_ value for that. + DW_LANG_Mips_Assembler = 0x8001, + DW_LANG_Upc = 0x8765 // Unified Parallel C +}; + +class ByteReader { + addressSize: number = 4; + offsetSize: number = 4; + offset: number = 0; + + constructor(readonly view: DataView, readonly littleEndian: boolean) { + } + + isEOF(): boolean { + return this.offset >= this.view.byteLength; + } + + readOneByte(): number { + const value = this.view.getUint8(this.offset); + this.offset += 1; + return value; + } + + readTwoBytes(): number { + const value = this.view.getUint16(this.offset, this.littleEndian); + this.offset += 2; + return value; + } + + readFourBytes(): number { + const value = this.view.getUint32(this.offset, this.littleEndian); + this.offset += 4; + return value; + } + + readEightBytes(): bigint { + const value = this.view.getBigUint64(this.offset, this.littleEndian); + this.offset += 8; + return value; + } + + readUnsignedLEB128(): bigint { + let result = BigInt(0); + let shift = BigInt(0); + while (true) { + const byte = this.readOneByte(); + result |= BigInt(byte & 0x7f) << shift; + if ((byte & 0x80) === 0) { + break; + } + shift += BigInt(7); + } + return result; + } + + readSignedLEB128(): bigint { + let result = BigInt(0); + let shift = BigInt(0); + let byte = 0; + while (true) { + byte = this.readOneByte(); + result |= BigInt(byte & 0x7f) << shift; + shift += BigInt(7); + if ((byte & 0x80) === 0) { + break; + } + } + if ((byte & 0x40) !== 0) { + // Sign extend if the highest bit of the last byte is set. + result |= -(BigInt(1) << shift); + } + return result; + } + + readOffset(): number | bigint { + if (this.offsetSize === 4) { + const value = this.readFourBytes(); + return value; + } else if (this.offsetSize === 8) { + const value = this.readEightBytes(); + return value; + } else { + throw new Error('Invalid offset size'); + } + } + + readAddress(): number | bigint { + if (this.addressSize === 4) { + const value = this.readFourBytes(); + return value; + } else if (this.addressSize === 8) { + const value = this.readEightBytes(); + return value; + } else { + throw new Error('Invalid address size'); + } + } + readInitialLength(): bigint | number { + const initial_length = this.readFourBytes(); + // In DWARF2/3, if the initial length is all 1 bits, then the offset + // size is 8 and we need to read the next 8 bytes for the real length. + if (initial_length === 0xffffffff) { + this.offsetSize = 8; + return this.readEightBytes(); + } else { + this.offsetSize = 4; + return initial_length; + } + } + readString(): string { + let result = ''; + while (true) { + const byte = this.readOneByte(); + if (byte === 0) { + break; + } + result += String.fromCharCode(byte); + } + return result; + } + slice(offset: number, length: number): DataView { + return new DataView(this.view.buffer, this.view.byteOffset + offset, length); + } + readByteArray(length: number): Uint8Array { + const result = new Uint8Array(length); + for (let i = 0; i < length; i++) { + result[i] = this.readOneByte(); + } + return result; + } +} + export class DWARFParser { - invo: DWARFDebugInfo; - abbrev: ElfSectionHeader; - line: ElfSectionHeader; - str: ElfSectionHeader; - line_str: ElfSectionHeader; - aranges: ElfSectionHeader; + units: DWARFCompilationUnit[] = []; + lineInfos: DWARFLineInfo[] = []; constructor(readonly elf: ELFParser) { - // fetch sections - this.invo = new DWARFDebugInfo(elf.getSection('.debug_info')); - this.abbrev = elf.getSection('.debug_abbrev', ElfSectionType.STRTAB); - this.line = elf.getSection('.debug_line', ElfSectionType.STRTAB); - this.str = elf.getSection('.debug_str', ElfSectionType.STRTAB); - this.line_str = elf.getSection('.debug_line_str', ElfSectionType.STRTAB); - this.aranges = elf.getSection('.debug_aranges', ElfSectionType.STRTAB); - } - /* - getCompilationUnits(): DWARFCompilationUnit[] { - const compilationUnits: DWARFCompilationUnit[] = []; - let offset = this.debugInfoSection.offset; - while (offset < this.debugInfoSection.offset + this.debugInfoSection.size) { - const compilationUnit = new DWARFCompilationUnit(this.debugInfoSection.contents, offset); - compilationUnits.push(compilationUnit); - offset += compilationUnit.size; + // fetch DWARF v2 sections + //this.aranges = elf.getSection('.debug_aranges'); + // parse compilation units + const abbrev = elf.getSection('.debug_abbrev'); + const info = elf.getSection('.debug_info'); + const debugstrs = elf.getSection('.debug_str') || elf.getSection('__debug_str'); + const infoReader = new ByteReader(info.contents, true); + while (!infoReader.isEOF()) { + const compilationUnit = new DWARFCompilationUnit(infoReader, debugstrs.contents); + // must be either skip() or read() + compilationUnit.read(abbrev.contents); + this.units.push(compilationUnit); + compilationUnit.dispose(); + } + const linedata = elf.getSection('.debug_line'); + const lineReader = new ByteReader(linedata.contents, true); + while (!lineReader.isEOF()) { + const lineInfo = new DWARFLineInfo(lineReader); + // must be either skip() or read() + lineInfo.readLines(); + this.lineInfos.push(lineInfo); + lineInfo.dispose(); } - return compilationUnits; - } - */ -} - -class DWARFDebugInfo { - contents: DataView; - - constructor(readonly section: ElfSectionHeader) { - this.contents = section.contents; + } +} + +class DWARFCompilationUnit { + headerLength: number; + contentLength: number; + contentOffset: number; + abbrevOffset: number; + abbrevs: Abbrev[] = []; + + constructor(protected infoReader: ByteReader, protected debugstrs: DataView) { + const baseOffset = infoReader.offset; + const length = infoReader.readInitialLength(); + const version = infoReader.readTwoBytes(); + this.abbrevOffset = Number(infoReader.readOffset()); + const address_size = infoReader.readOneByte(); + this.headerLength = infoReader.offset - baseOffset; + if (version != 2) throw new Error('DWARF version ' + version + ' not supported'); + if (address_size !== 4) throw new Error('Address size ' + address_size + ' not supported'); + this.contentLength = Number(length) - this.headerLength + 4; + this.contentOffset = infoReader.offset; + //const info = new DWARFCompilationUnit(buffer, reader.offset, address_size); + } + dispose() { + this.infoReader = null; + this.debugstrs = null; + this.abbrevs = null; + } + skip() { + this.infoReader.offset += this.contentLength; + } + read(abbrev: DataView) { + // parse the abbreviations + let abbrevReader = new ByteReader(abbrev, true); + abbrevReader.offset = this.abbrevOffset; + this.abbrevs = parseAbbrevs(abbrevReader); + // extract slice with DIEs + const slice = this.infoReader.slice(this.contentOffset, this.contentLength); + this.processDIEs(new ByteReader(slice, true)); + // skip to next cu section + this.skip(); + } + processDIEs(reader: ByteReader) { + let die_stack = []; + // TODO: capture tree structure + while (!reader.isEOF()) { + let absolute_offset = reader.offset + this.contentOffset; + let abbrev_num = Number(reader.readUnsignedLEB128()); + //console.log('DIE', absolute_offset.toString(16), abbrev_num); + if (abbrev_num == 0) { + if (die_stack.length == 0) throw new Error('DIE stack underflow @ offset ' + reader.offset); + die_stack.pop(); + continue; + } + let abbrev = this.abbrevs[abbrev_num - 1]; + if (!abbrev) throw new Error('Invalid abbreviation number ' + abbrev_num); + let obj = this.processDIE(reader, abbrev); + if (abbrev.has_children) { + die_stack.push(obj); + } + } + } + processDIE(reader: ByteReader, abbrev: Abbrev) { + //console.log('processDIE', abbrev); + let obj = {}; + // iterate through attributes + for (let attr of abbrev.attributes) { + let form = attr.form; + let value = this.processAttribute(reader, form); + obj[DwarfAttribute[attr.attr]] = value; + } + //console.log(obj); + return obj; + } + processAttribute(reader: ByteReader, form: DwarfForm) { + switch (form) { + case DwarfForm.DW_FORM_data1: + case DwarfForm.DW_FORM_flag: + case DwarfForm.DW_FORM_ref1: + return reader.readOneByte(); + case DwarfForm.DW_FORM_data2: + case DwarfForm.DW_FORM_ref2: + return reader.readTwoBytes(); + case DwarfForm.DW_FORM_data4: + case DwarfForm.DW_FORM_ref4: + case DwarfForm.DW_FORM_addr: + case DwarfForm.DW_FORM_ref_addr: + return reader.readFourBytes(); + case DwarfForm.DW_FORM_data8: + case DwarfForm.DW_FORM_ref8: + return reader.readEightBytes(); + case DwarfForm.DW_FORM_string: + return reader.readString(); + case DwarfForm.DW_FORM_udata: + case DwarfForm.DW_FORM_ref_udata: + return reader.readUnsignedLEB128(); + case DwarfForm.DW_FORM_sdata: + return reader.readSignedLEB128(); + case DwarfForm.DW_FORM_strp: + // read from strtab + let offset = Number(reader.readOffset()); + return this.getStringFrom(this.debugstrs, offset); + case DwarfForm.DW_FORM_block1: + let len = reader.readOneByte(); + return reader.readByteArray(len); + default: + throw new Error('Unsupported form ' + form); + } + } + getStringFrom(strtab: DataView, offset: number): string { + let result = ''; + while (true) { + const byte = strtab.getUint8(offset); + if (byte === 0) { + break; + } + result += String.fromCharCode(byte); + offset += 1; + } + return result; + } +} + +// https://chromium.googlesource.com/breakpad/breakpad/+/linux-dwarf/src/common/dwarf + +interface FunctionInfo { + name: string; + mangled_name: string; + file: string; + line: number; + lowpc: number; + highpc: number; +} + +interface SourceFileInfo { + name: string; + lowpc: number; +} + +interface Abbrev { + number: number; + tag: DwarfTag; + has_children: boolean; + attributes: { attr: number, form: number }[]; +} + +class CompilationUnitHeader { + length = 0 + version = 0 + abbrev_offset = 0; + address_size = 0; +} + + +interface LineInfo { + file: string; + line: number; + column: number; + address: number; + is_stmt: boolean; + basic_block: boolean; + end_sequence: boolean; +} + +class LineStateMachine { + file_num: number; + address: number; + line_num: number; + column_num: number; + is_stmt: boolean; + basic_block: boolean; + end_sequence: boolean; + + constructor(default_is_stmt: boolean) { + this.Reset(default_is_stmt); + } + + Reset(default_is_stmt: boolean): void { + this.file_num = 1; + this.address = 0; + this.line_num = 1; + this.column_num = 0; + this.is_stmt = default_is_stmt; + this.basic_block = false; + this.end_sequence = false; + } +} + +// Read a DWARF2/3 abbreviation section. +// Each abbrev consists of a abbreviation number, a tag, a byte +// specifying whether the tag has children, and a list of +// attribute/form pairs. +// The list of forms is terminated by a 0 for the attribute, and a +// zero for the form. The entire abbreviation section is terminated +// by a zero for the code. +function parseAbbrevs(reader: ByteReader): Abbrev[] { + const abbrevs = []; + while (!reader.isEOF()) { + const number = Number(reader.readUnsignedLEB128()); + if (number == 0) break; + const tag = Number(reader.readUnsignedLEB128()); + const has_children = reader.readOneByte() !== 0; + const attributes = []; + while (true) { + const attr = Number(reader.readUnsignedLEB128()); + const form = Number(reader.readUnsignedLEB128()); + if (attr === 0 && form === 0) { + break; + } + attributes.push({ attr, form }); + } + const abbrev: Abbrev = { + number, + tag: tag as DwarfTag, + has_children, + attributes, + }; + abbrevs.push(abbrev); + } + return abbrevs; +} + +interface DWARFFile { + dir_index: number; + name: string; + mod_time: number; + file_length: number; + lines: LineInfo[]; +} + +class DWARFLineInfo { + directories: string[]; + files: DWARFFile[]; + contentOffset: number; + contentLength: number; + opData: DataView; + opReader: ByteReader; + line_base: number; + line_range: number; + opcode_base: number; + min_insn_length: number; + default_is_stmt: boolean; + lsm: LineStateMachine; + + constructor(protected headerReader: ByteReader) { + this.readHeader(); + } + dispose() { + this.headerReader = null; + this.opData = null; + this.opReader = null; + this.lsm = null; + } + readHeader() { + const length = this.headerReader.readInitialLength(); + const baseOffset1 = this.headerReader.offset; + const version = this.headerReader.readTwoBytes(); + if (version != 2) throw new Error('DWARF version ' + version + ' not supported'); + const prologue_length = this.headerReader.readOffset(); + const baseOffset2 = this.headerReader.offset; + this.min_insn_length = this.headerReader.readOneByte(); + this.default_is_stmt = this.headerReader.readOneByte() !== 0; + this.line_base = this.headerReader.readOneByte(); // signed + if (this.line_base >= 0x80) { + this.line_base -= 0x100; + } + this.line_range = this.headerReader.readOneByte(); + const opcode_base = this.opcode_base = this.headerReader.readOneByte(); + const std_opcode_lengths = new Array(opcode_base + 1); + for (let i = 1; i < opcode_base; i++) { + std_opcode_lengths[i] = this.headerReader.readOneByte(); + } + // It is legal for the directory entry table to be empty. + this.directories = [null]; + while (true) { + const name = this.headerReader.readString(); + if (name === '') { + break; + } + this.directories.push(name); + } + // It is also legal for the file entry table to be empty. + this.files = [null]; + while (true) { + const name = this.headerReader.readString(); + if (name === '') { + break; + } + const dir_index = Number(this.headerReader.readUnsignedLEB128()); + const mod_time = Number(this.headerReader.readUnsignedLEB128()); + const file_length = Number(this.headerReader.readUnsignedLEB128()); + this.files.push({ name, dir_index, mod_time, file_length, lines: [] }); + } + this.contentOffset = baseOffset2 + Number(prologue_length); + this.contentLength = Number(length) - (this.contentOffset - baseOffset1); + } + skip() { + this.headerReader.offset = this.contentOffset + this.contentLength; + } + readLines() { + this.opData = this.headerReader.slice(this.contentOffset, this.contentLength); + this.opReader = new ByteReader(this.opData, true); + this.lsm = new LineStateMachine(this.default_is_stmt); + while (!this.opReader.isEOF()) { + let add_line = this.processOneOpcode(); + if (this.lsm.end_sequence) { + this.lsm.Reset(this.default_is_stmt); + } else if (add_line) { + let line: LineInfo = { + file: this.files[this.lsm.file_num].name, + line: this.lsm.line_num, + column: this.lsm.column_num, + address: this.lsm.address, + is_stmt: this.lsm.is_stmt, + basic_block: this.lsm.basic_block, + end_sequence: this.lsm.end_sequence, + }; + this.files[this.lsm.file_num].lines.push(line); + //console.log(line); + } + } + this.skip(); + } + processOneOpcode() : boolean { + let opcode = this.opReader.readOneByte(); + // If the opcode is great than the opcode_base, it is a special + // opcode. Most line programs consist mainly of special opcodes. + if (opcode >= this.opcode_base) { + opcode -= this.opcode_base; + let advance_address = Math.floor(opcode / this.line_range) * this.min_insn_length; + let advance_line = (opcode % this.line_range) + this.line_base; + //console.log('advance', advance_address, advance_line, this.lsm); + this.checkPassPC(); + this.lsm.address += advance_address; + this.lsm.line_num += advance_line; + this.lsm.basic_block = true; + return true; + } + // Otherwise, we have the regular opcodes + //console.log('opcode', opcode, this.lsm); + switch (opcode) { + case DwarfLineNumberOps.DW_LNS_copy: { + this.lsm.basic_block = false; + return true; + } + case DwarfLineNumberOps.DW_LNS_advance_pc: { + const advance_address = this.opReader.readUnsignedLEB128(); + this.checkPassPC(); + this.lsm.address += this.min_insn_length * Number(advance_address); + break; + } + case DwarfLineNumberOps.DW_LNS_advance_line: { + this.lsm.line_num += Number(this.opReader.readSignedLEB128()); + break; + } + case DwarfLineNumberOps.DW_LNS_set_file: { + this.lsm.file_num = Number(this.opReader.readUnsignedLEB128()); + break; + } + case DwarfLineNumberOps.DW_LNS_set_column: { + this.lsm.column_num = Number(this.opReader.readUnsignedLEB128()); + break; + } + case DwarfLineNumberOps.DW_LNS_negate_stmt: { + this.lsm.is_stmt = !this.lsm.is_stmt; + break; + } + case DwarfLineNumberOps.DW_LNS_set_basic_block: { + this.lsm.basic_block = true; + break; + } + case DwarfLineNumberOps.DW_LNS_fixed_advance_pc: { + const advance_address = this.opReader.readTwoBytes(); + this.checkPassPC(); + this.lsm.address += advance_address; + break; + } + case DwarfLineNumberOps.DW_LNS_const_add_pc: { + const advance_address = this.min_insn_length * ((255 - this.opcode_base) / this.line_range); + this.checkPassPC(); + this.lsm.address += advance_address; + break; + } + case DwarfLineNumberOps.DW_LNS_set_prologue_end: { + break; + } + case DwarfLineNumberOps.DW_LNS_set_epilogue_begin: { + break; + } + case DwarfLineNumberOps.DW_LNS_extended_op: { + const extended_op_len = this.opReader.readUnsignedLEB128(); + const extended_op = this.opReader.readOneByte(); + //console.log('extended', extended_op, extended_op_len); + switch (extended_op) { + case DwarfLineNumberExtendedOps.DW_LNE_end_sequence: + this.lsm.end_sequence = true; + return true; + case DwarfLineNumberExtendedOps.DW_LNE_set_address: + this.lsm.address = Number(this.opReader.readAddress()); + break; + case DwarfLineNumberExtendedOps.DW_LNE_define_file: + // TODO + break; + //case DwarfLineNumberExtendedOps.DW_LNE_set_discriminator: + // TODO + break; + default: + //console.log('Unknown DWARF extended opcode ' + extended_op); + this.opReader.offset += Number(extended_op_len); + break; + } + break; + } + default: + console.log('Unknown DWARF opcode ' + opcode); + break; + } + } + checkPassPC() { + /* + // Check if the lsm passes "pc". If so, mark it as passed. + if (lsm_passes_pc && + lsm->address <= pc && pc < lsm->address + advance_address) { + *lsm_passes_pc = true; + } + */ } } diff --git a/src/test/testelfparser.ts b/src/test/testelfparser.ts index e2517533..f278c3d3 100644 --- a/src/test/testelfparser.ts +++ b/src/test/testelfparser.ts @@ -38,16 +38,20 @@ describe('test ELFParser', () => { console.log('symbol', index, symbol.info, symbol.other, symbol.name, symbol.value.toString(16)); }); */ - assert.strictEqual(22, elfParser.sectionHeaders.length); - assert.strictEqual(31, elfParser.getSymbols().length); + assert.strictEqual(21, elfParser.sectionHeaders.length); + assert.strictEqual(29, elfParser.getSymbols().length); assert.ok(elfParser.sectionHeaders.find((section) => section.name === '.text') != null); assert.ok(elfParser.getSymbols().find((symbol) => symbol.name === 'main') != null); }); it('should parse DWARF info', () => { const dwarf = new DWARFParser(elfParser); + assert.strictEqual(2, dwarf.units.length); + const cu = dwarf.units[0]; + // TODO: check info content + const li = dwarf.lineInfos[0]; + assert.strictEqual('crt0.c', li.files[1].name); /* - const info = dwarf.getCompilationUnits()[0]; assert.ok(info != null); assert.ok(info!.lineNumberProgram != null); assert.ok(info!.lineNumberProgram!.length > 0); diff --git a/src/worker/fs/arm32-fs.zip b/src/worker/fs/arm32-fs.zip index f8ee993d..55705300 100644 Binary files a/src/worker/fs/arm32-fs.zip and b/src/worker/fs/arm32-fs.zip differ diff --git a/src/worker/lib/arm32/libc.a b/src/worker/lib/arm32/libc.a index ba6fe7b1..16f66e8d 100644 Binary files a/src/worker/lib/arm32/libc.a and b/src/worker/lib/arm32/libc.a differ diff --git a/src/worker/tools/arm.ts b/src/worker/tools/arm.ts index 64cd3b0c..a14b859f 100644 --- a/src/worker/tools/arm.ts +++ b/src/worker/tools/arm.ts @@ -20,11 +20,11 @@ * SOFTWARE. */ -import { ELFParser } from "../../common/binutils"; +import { DWARFParser, ELFParser } from "../../common/binutils"; import { hex } from "../../common/util"; import { WASIFilesystem } from "../../common/wasi/wasishim"; import { CodeListingMap, SourceLine, WorkerError, WorkerResult } from "../../common/workertypes"; -import { BuildStep, BuildStepResult, gatherFiles, staleFiles, populateFiles, putWorkFile, anyTargetChanged, getPrefix, getWorkFileAsString, populateExtraFiles } from "../builder"; +import { BuildStep, BuildStepResult, gatherFiles, staleFiles, populateFiles, putWorkFile, anyTargetChanged, getPrefix, getWorkFileAsString, populateExtraFiles, processEmbedDirective } from "../builder"; import { makeErrorMatcher, re_crlf } from "../listingutils"; import { loadWASIFilesystemZip } from "../wasiutils"; import { loadNative, moduleInstFn, execMain, emglobal, EmscriptenModule } from "../wasmutils"; @@ -293,7 +293,7 @@ export async function compileARMTCC(step: BuildStep): Promise { //'-std=c11', '-funsigned-char', //'-Wwrite-strings', - '-gdwarf', + '-gdwarf-2', '-o', objpath]; if (params.define) { params.define.forEach((x) => args.push('-D' + x)); @@ -313,7 +313,15 @@ export async function compileARMTCC(step: BuildStep): Promise { }); populateExtraFiles(step, FS, params.extra_compile_files); - populateFiles(step, FS); + populateFiles(step, FS, { + mainFilePath: step.path, + processFn: (path, code) => { + if (typeof code === 'string') { + code = processEmbedDirective(code); + } + return code; + } + }); execMain(step, armtcc, args); if (errors.length) return { errors: errors }; @@ -347,7 +355,7 @@ export async function linkARMTCC(step: BuildStep): Promise { var args = ['-L.', '-nostdlib', '-nostdinc', '-Wl,--oformat=elf32-arm', //'-Wl,-section-alignment=0x100000', - '-gdwarf', + '-gdwarf-2', '-o', objpath]; if (params.define) { params.define.forEach((x) => args.push('-D' + x)); @@ -410,10 +418,31 @@ export async function linkARMTCC(step: BuildStep): Promise { }); } }); + const listings: CodeListingMap = {}; + const dwarf = new DWARFParser(elfparser); + dwarf.lineInfos.forEach((lineInfo) => { + lineInfo.files.forEach((file) => { + if (!file || !file.lines) return; + file.lines.forEach((line) => { + const filename = line.file; + const offset = line.address; + const path = getPrefix(filename) + '.lst'; + const linenum = line.line; + let lst = listings[path]; + if (lst == null) { lst = listings[path] = { lines: [] }; } + lst.lines.push({ + path, + line: linenum, + offset + }); + }); + }); + }); + //console.log(listings); return { output: rom, //.slice(0x34), - //listings: listings, + listings: listings, errors: errors, symbolmap: symbolmap, segments: segments diff --git a/test/exes/arm32.elf b/test/exes/arm32.elf index 567fd402..ecf893c8 100755 Binary files a/test/exes/arm32.elf and b/test/exes/arm32.elf differ