Shofel2_T124_python/venv/lib/python3.10/site-packages/elftools/dwarf/die.py

359 lines
15 KiB
Python
Raw Normal View History

2024-05-25 16:45:07 +00:00
#-------------------------------------------------------------------------------
# elftools: dwarf/die.py
#
# DWARF Debugging Information Entry
#
# Eli Bendersky (eliben@gmail.com)
# This code is in the public domain
#-------------------------------------------------------------------------------
from collections import namedtuple, OrderedDict
import os
from ..common.exceptions import DWARFError
from ..common.utils import bytes2str, struct_parse, preserve_stream_pos
from .enums import DW_FORM_raw2name
from .dwarf_util import _resolve_via_offset_table, _get_base_offset
# AttributeValue - describes an attribute value in the DIE:
#
# name:
# The name (DW_AT_*) of this attribute
#
# form:
# The DW_FORM_* name of this attribute
#
# value:
# The value parsed from the section and translated accordingly to the form
# (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
#
# raw_value:
# Raw value as parsed from the section - used for debugging and presentation
# (e.g. for a DW_FORM_strp it's the raw string offset into the table)
#
# offset:
# Offset of this attribute's value in the stream (absolute offset, relative
# the beginning of the whole stream)
#
# indirection_length:
# If the form of the attribute is DW_FORM_indirect, the form will contain
# the resolved form, and this will contain the length of the indirection chain.
# 0 means no indirection.
AttributeValue = namedtuple(
'AttributeValue', 'name form value raw_value offset indirection_length')
class DIE(object):
""" A DWARF debugging information entry. On creation, parses itself from
the stream. Each DIE is held by a CU.
Accessible attributes:
tag:
The DIE tag
size:
The size this DIE occupies in the section
offset:
The offset of this DIE in the stream
attributes:
An ordered dictionary mapping attribute names to values. It's
ordered to preserve the order of attributes in the section
has_children:
Specifies whether this DIE has children
abbrev_code:
The abbreviation code pointing to an abbreviation entry (note
that this is for informational purposes only - this object
interacts with its abbreviation table transparently).
See also the public methods.
"""
def __init__(self, cu, stream, offset):
""" cu:
CompileUnit object this DIE belongs to. Used to obtain context
information (structs, abbrev table, etc.)
stream, offset:
The stream and offset into it where this DIE's data is located
"""
self.cu = cu
self.dwarfinfo = self.cu.dwarfinfo # get DWARFInfo context
self.stream = stream
self.offset = offset
self.attributes = OrderedDict()
self.tag = None
self.has_children = None
self.abbrev_code = None
self.size = 0
# Null DIE terminator. It can be used to obtain offset range occupied
# by this DIE including its whole subtree.
self._terminator = None
self._parent = None
self._parse_DIE()
def is_null(self):
""" Is this a null entry?
"""
return self.tag is None
def get_DIE_from_attribute(self, name):
""" Return the DIE referenced by the named attribute of this DIE.
The attribute must be in the reference attribute class.
name:
The name of the attribute in the reference class.
"""
attr = self.attributes[name]
if attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4',
'DW_FORM_ref8', 'DW_FORM_ref', 'DW_FORM_ref_udata'):
refaddr = self.cu.cu_offset + attr.raw_value
return self.cu.get_DIE_from_refaddr(refaddr)
elif attr.form in ('DW_FORM_ref_addr'):
return self.cu.dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
elif attr.form in ('DW_FORM_ref_sig8'):
# Implement search type units for matching signature
raise NotImplementedError('%s (type unit by signature)' % attr.form)
elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8', 'DW_FORM_GNU_ref_alt'):
if self.dwarfinfo.supplementary_dwarfinfo:
return self.dwarfinfo.supplementary_dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
# FIXME: how to distinguish supplementary files from dwo ?
raise NotImplementedError('%s to dwo' % attr.form)
else:
raise DWARFError('%s is not a reference class form attribute' % attr)
def get_parent(self):
""" Return the parent DIE of this DIE, or None if the DIE has no
parent (i.e. is a top-level DIE).
"""
if self._parent is None:
self._search_ancestor_offspring()
return self._parent
def get_full_path(self):
""" Return the full path filename for the DIE.
The filename is the join of 'DW_AT_comp_dir' and 'DW_AT_name',
either of which may be missing in practice. Note that its value is
usually a string taken from the .debug_string section and the
returned value will be a string.
"""
comp_dir_attr = self.attributes.get('DW_AT_comp_dir', None)
comp_dir = bytes2str(comp_dir_attr.value) if comp_dir_attr else ''
fname_attr = self.attributes.get('DW_AT_name', None)
fname = bytes2str(fname_attr.value) if fname_attr else ''
return os.path.join(comp_dir, fname)
def iter_children(self):
""" Iterates all children of this DIE
"""
return self.cu.iter_DIE_children(self)
def iter_siblings(self):
""" Yield all siblings of this DIE
"""
parent = self.get_parent()
if parent:
for sibling in parent.iter_children():
if sibling is not self:
yield sibling
else:
raise StopIteration()
# The following methods are used while creating the DIE and should not be
# interesting to consumers
#
def set_parent(self, die):
self._parent = die
#------ PRIVATE ------#
def _search_ancestor_offspring(self):
""" Search our ancestors identifying their offspring to find our parent.
DIEs are stored as a flattened tree. The top DIE is the ancestor
of all DIEs in the unit. Each parent is guaranteed to be at
an offset less than their children. In each generation of children
the sibling with the closest offset not greater than our offset is
our ancestor.
"""
# This code is called when get_parent notices that the _parent has
# not been identified. To avoid execution for each sibling record all
# the children of any parent iterated. Assuming get_parent will also be
# called for siblings, it is more efficient if siblings references are
# provided and no worse than a single walk if they are missing, while
# stopping iteration early could result in O(n^2) walks.
search = self.cu.get_top_DIE()
while search.offset < self.offset:
prev = search
for child in search.iter_children():
child.set_parent(search)
if child.offset <= self.offset:
prev = child
# We also need to check the offset of the terminator DIE
if search.has_children and search._terminator.offset <= self.offset:
prev = search._terminator
# If we didn't find a closer parent, give up, don't loop.
# Either we mis-parsed an ancestor or someone created a DIE
# by an offset that was not actually the start of a DIE.
if prev is search:
raise ValueError("offset %s not in CU %s DIE tree" %
(self.offset, self.cu.cu_offset))
search = prev
def __repr__(self):
s = 'DIE %s, size=%s, has_children=%s\n' % (
self.tag, self.size, self.has_children)
for attrname, attrval in self.attributes.items():
s += ' |%-18s: %s\n' % (attrname, attrval)
return s
def __str__(self):
return self.__repr__()
def _parse_DIE(self):
""" Parses the DIE info from the section, based on the abbreviation
table of the CU
"""
structs = self.cu.structs
# A DIE begins with the abbreviation code. Read it and use it to
# obtain the abbrev declaration for this DIE.
# Note: here and elsewhere, preserve_stream_pos is used on operations
# that manipulate the stream by reading data from it.
self.abbrev_code = struct_parse(
structs.Dwarf_uleb128(''), self.stream, self.offset)
# This may be a null entry
if self.abbrev_code == 0:
self.size = self.stream.tell() - self.offset
return
abbrev_decl = self.cu.get_abbrev_table().get_abbrev(self.abbrev_code)
self.tag = abbrev_decl['tag']
self.has_children = abbrev_decl.has_children()
# Guided by the attributes listed in the abbreviation declaration, parse
# values from the stream.
for spec in abbrev_decl['attr_spec']:
form = spec.form
name = spec.name
attr_offset = self.stream.tell()
indirection_length = 0
# Special case here: the attribute value is stored in the attribute
# definition in the abbreviation spec, not in the DIE itself.
if form == 'DW_FORM_implicit_const':
value = spec.value
raw_value = value
# Another special case: the attribute value is a form code followed by the real value in that form
elif form == 'DW_FORM_indirect':
(form, raw_value, indirection_length) = self._resolve_indirect()
value = self._translate_attr_value(form, raw_value)
else:
raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream)
value = self._translate_attr_value(form, raw_value)
self.attributes[name] = AttributeValue(
name=name,
form=form,
value=value,
raw_value=raw_value,
offset=attr_offset,
indirection_length = indirection_length)
self.size = self.stream.tell() - self.offset
def _resolve_indirect(self):
# Supports arbitrary indirection nesting (the standard doesn't prohibit that).
# Expects the stream to be at the real form.
# Returns (form, raw_value, length).
structs = self.cu.structs
length = 1
real_form_code = struct_parse(structs.Dwarf_uleb128(''), self.stream) # Numeric form code
while True:
try:
real_form = DW_FORM_raw2name[real_form_code] # Form name or exception if bogus code
except KeyError as err:
raise DWARFError('Found DW_FORM_indirect with unknown real form 0x%x' % real_form_code)
raw_value = struct_parse(structs.Dwarf_dw_form[real_form], self.stream)
if real_form != 'DW_FORM_indirect': # Happy path: one level of indirection
return (real_form, raw_value, length)
else: # Indirection cascade
length += 1
real_form_code = raw_value
# And continue parsing
# No explicit infinite loop guard because the stream will end eventually
def _translate_attr_value(self, form, raw_value):
""" Translate a raw attr value according to the form
"""
# Indirect forms can only be parsed if the top DIE of this CU has already been parsed
# and listed in the CU, since the top DIE would have to contain the DW_AT_xxx_base attributes.
# This breaks if there is an indirect encoding in the top DIE itself before the
# corresponding _base, and it was seen in the wild.
# There is a hook in get_top_DIE() to resolve those lazily.
translate_indirect = self.cu.has_top_DIE() or self.offset != self.cu.cu_die_offset
value = None
if form == 'DW_FORM_strp':
with preserve_stream_pos(self.stream):
value = self.dwarfinfo.get_string_from_table(raw_value)
elif form == 'DW_FORM_line_strp':
with preserve_stream_pos(self.stream):
value = self.dwarfinfo.get_string_from_linetable(raw_value)
elif form in ('DW_FORM_GNU_strp_alt', 'DW_FORM_strp_sup'):
if self.dwarfinfo.supplementary_dwarfinfo:
return self.dwarfinfo.supplementary_dwarfinfo.get_string_from_table(raw_value)
else:
value = raw_value
elif form == 'DW_FORM_flag':
value = not raw_value == 0
elif form == 'DW_FORM_flag_present':
value = True
elif form in ('DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4') and translate_indirect:
value = self.cu.dwarfinfo.get_addr(self.cu, raw_value)
elif form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4') and translate_indirect:
stream = self.dwarfinfo.debug_str_offsets_sec.stream
base_offset = _get_base_offset(self.cu, 'DW_AT_str_offsets_base')
offset_size = 4 if self.cu.structs.dwarf_format == 32 else 8
with preserve_stream_pos(stream):
str_offset = struct_parse(self.cu.structs.Dwarf_offset(''), stream, base_offset + raw_value*offset_size)
value = self.dwarfinfo.get_string_from_table(str_offset)
elif form == 'DW_FORM_loclistx' and translate_indirect:
value = _resolve_via_offset_table(self.dwarfinfo.debug_loclists_sec.stream, self.cu, raw_value, 'DW_AT_loclists_base')
elif form == 'DW_FORM_rnglistx' and translate_indirect:
value = _resolve_via_offset_table(self.dwarfinfo.debug_rnglists_sec.stream, self.cu, raw_value, 'DW_AT_rnglists_base')
else:
value = raw_value
return value
def _translate_indirect_attributes(self):
""" This is a hook to translate the DW_FORM_...x values in the top DIE
once the top DIE is parsed to the end. They can't be translated
while the top DIE is being parsed, because they implicitly make a
reference to the DW_AT_xxx_base attribute in the same DIE that may
not have been parsed yet.
"""
for key in self.attributes:
attr = self.attributes[key]
if attr.form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4',
'DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4',
'DW_FORM_loclistx', 'DW_FORM_rnglistx'):
# Can't change value in place, got to replace the whole attribute record
self.attributes[key] = AttributeValue(
name=attr.name,
form=attr.form,
value=self._translate_attr_value(attr.form, attr.raw_value),
raw_value=attr.raw_value,
offset=attr.offset,
indirection_length=attr.indirection_length)