359 lines
15 KiB
Python
359 lines
15 KiB
Python
#-------------------------------------------------------------------------------
|
|
# elftools: dwarf/die.py
|
|
#
|
|
# DWARF Debugging Information Entry
|
|
#
|
|
# Eli Bendersky (eliben@gmail.com)
|
|
# This code is in the public domain
|
|
#-------------------------------------------------------------------------------
|
|
from collections import namedtuple, OrderedDict
|
|
import os
|
|
|
|
from ..common.exceptions import DWARFError
|
|
from ..common.utils import bytes2str, struct_parse, preserve_stream_pos
|
|
from .enums import DW_FORM_raw2name
|
|
from .dwarf_util import _resolve_via_offset_table, _get_base_offset
|
|
|
|
|
|
# AttributeValue - describes an attribute value in the DIE:
|
|
#
|
|
# name:
|
|
# The name (DW_AT_*) of this attribute
|
|
#
|
|
# form:
|
|
# The DW_FORM_* name of this attribute
|
|
#
|
|
# value:
|
|
# The value parsed from the section and translated accordingly to the form
|
|
# (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
|
|
#
|
|
# raw_value:
|
|
# Raw value as parsed from the section - used for debugging and presentation
|
|
# (e.g. for a DW_FORM_strp it's the raw string offset into the table)
|
|
#
|
|
# offset:
|
|
# Offset of this attribute's value in the stream (absolute offset, relative
|
|
# the beginning of the whole stream)
|
|
#
|
|
# indirection_length:
|
|
# If the form of the attribute is DW_FORM_indirect, the form will contain
|
|
# the resolved form, and this will contain the length of the indirection chain.
|
|
# 0 means no indirection.
|
|
AttributeValue = namedtuple(
|
|
'AttributeValue', 'name form value raw_value offset indirection_length')
|
|
|
|
|
|
class DIE(object):
|
|
""" A DWARF debugging information entry. On creation, parses itself from
|
|
the stream. Each DIE is held by a CU.
|
|
|
|
Accessible attributes:
|
|
|
|
tag:
|
|
The DIE tag
|
|
|
|
size:
|
|
The size this DIE occupies in the section
|
|
|
|
offset:
|
|
The offset of this DIE in the stream
|
|
|
|
attributes:
|
|
An ordered dictionary mapping attribute names to values. It's
|
|
ordered to preserve the order of attributes in the section
|
|
|
|
has_children:
|
|
Specifies whether this DIE has children
|
|
|
|
abbrev_code:
|
|
The abbreviation code pointing to an abbreviation entry (note
|
|
that this is for informational purposes only - this object
|
|
interacts with its abbreviation table transparently).
|
|
|
|
See also the public methods.
|
|
"""
|
|
def __init__(self, cu, stream, offset):
|
|
""" cu:
|
|
CompileUnit object this DIE belongs to. Used to obtain context
|
|
information (structs, abbrev table, etc.)
|
|
|
|
stream, offset:
|
|
The stream and offset into it where this DIE's data is located
|
|
"""
|
|
self.cu = cu
|
|
self.dwarfinfo = self.cu.dwarfinfo # get DWARFInfo context
|
|
self.stream = stream
|
|
self.offset = offset
|
|
|
|
self.attributes = OrderedDict()
|
|
self.tag = None
|
|
self.has_children = None
|
|
self.abbrev_code = None
|
|
self.size = 0
|
|
# Null DIE terminator. It can be used to obtain offset range occupied
|
|
# by this DIE including its whole subtree.
|
|
self._terminator = None
|
|
self._parent = None
|
|
|
|
self._parse_DIE()
|
|
|
|
def is_null(self):
|
|
""" Is this a null entry?
|
|
"""
|
|
return self.tag is None
|
|
|
|
def get_DIE_from_attribute(self, name):
|
|
""" Return the DIE referenced by the named attribute of this DIE.
|
|
The attribute must be in the reference attribute class.
|
|
|
|
name:
|
|
The name of the attribute in the reference class.
|
|
"""
|
|
attr = self.attributes[name]
|
|
if attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4',
|
|
'DW_FORM_ref8', 'DW_FORM_ref', 'DW_FORM_ref_udata'):
|
|
refaddr = self.cu.cu_offset + attr.raw_value
|
|
return self.cu.get_DIE_from_refaddr(refaddr)
|
|
elif attr.form in ('DW_FORM_ref_addr'):
|
|
return self.cu.dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
|
|
elif attr.form in ('DW_FORM_ref_sig8'):
|
|
# Implement search type units for matching signature
|
|
raise NotImplementedError('%s (type unit by signature)' % attr.form)
|
|
elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8', 'DW_FORM_GNU_ref_alt'):
|
|
if self.dwarfinfo.supplementary_dwarfinfo:
|
|
return self.dwarfinfo.supplementary_dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
|
|
# FIXME: how to distinguish supplementary files from dwo ?
|
|
raise NotImplementedError('%s to dwo' % attr.form)
|
|
else:
|
|
raise DWARFError('%s is not a reference class form attribute' % attr)
|
|
|
|
def get_parent(self):
|
|
""" Return the parent DIE of this DIE, or None if the DIE has no
|
|
parent (i.e. is a top-level DIE).
|
|
"""
|
|
if self._parent is None:
|
|
self._search_ancestor_offspring()
|
|
return self._parent
|
|
|
|
def get_full_path(self):
|
|
""" Return the full path filename for the DIE.
|
|
|
|
The filename is the join of 'DW_AT_comp_dir' and 'DW_AT_name',
|
|
either of which may be missing in practice. Note that its value is
|
|
usually a string taken from the .debug_string section and the
|
|
returned value will be a string.
|
|
"""
|
|
comp_dir_attr = self.attributes.get('DW_AT_comp_dir', None)
|
|
comp_dir = bytes2str(comp_dir_attr.value) if comp_dir_attr else ''
|
|
fname_attr = self.attributes.get('DW_AT_name', None)
|
|
fname = bytes2str(fname_attr.value) if fname_attr else ''
|
|
return os.path.join(comp_dir, fname)
|
|
|
|
def iter_children(self):
|
|
""" Iterates all children of this DIE
|
|
"""
|
|
return self.cu.iter_DIE_children(self)
|
|
|
|
def iter_siblings(self):
|
|
""" Yield all siblings of this DIE
|
|
"""
|
|
parent = self.get_parent()
|
|
if parent:
|
|
for sibling in parent.iter_children():
|
|
if sibling is not self:
|
|
yield sibling
|
|
else:
|
|
raise StopIteration()
|
|
|
|
# The following methods are used while creating the DIE and should not be
|
|
# interesting to consumers
|
|
#
|
|
|
|
def set_parent(self, die):
|
|
self._parent = die
|
|
|
|
#------ PRIVATE ------#
|
|
|
|
def _search_ancestor_offspring(self):
|
|
""" Search our ancestors identifying their offspring to find our parent.
|
|
|
|
DIEs are stored as a flattened tree. The top DIE is the ancestor
|
|
of all DIEs in the unit. Each parent is guaranteed to be at
|
|
an offset less than their children. In each generation of children
|
|
the sibling with the closest offset not greater than our offset is
|
|
our ancestor.
|
|
"""
|
|
# This code is called when get_parent notices that the _parent has
|
|
# not been identified. To avoid execution for each sibling record all
|
|
# the children of any parent iterated. Assuming get_parent will also be
|
|
# called for siblings, it is more efficient if siblings references are
|
|
# provided and no worse than a single walk if they are missing, while
|
|
# stopping iteration early could result in O(n^2) walks.
|
|
search = self.cu.get_top_DIE()
|
|
while search.offset < self.offset:
|
|
prev = search
|
|
for child in search.iter_children():
|
|
child.set_parent(search)
|
|
if child.offset <= self.offset:
|
|
prev = child
|
|
|
|
# We also need to check the offset of the terminator DIE
|
|
if search.has_children and search._terminator.offset <= self.offset:
|
|
prev = search._terminator
|
|
|
|
# If we didn't find a closer parent, give up, don't loop.
|
|
# Either we mis-parsed an ancestor or someone created a DIE
|
|
# by an offset that was not actually the start of a DIE.
|
|
if prev is search:
|
|
raise ValueError("offset %s not in CU %s DIE tree" %
|
|
(self.offset, self.cu.cu_offset))
|
|
|
|
search = prev
|
|
|
|
def __repr__(self):
|
|
s = 'DIE %s, size=%s, has_children=%s\n' % (
|
|
self.tag, self.size, self.has_children)
|
|
for attrname, attrval in self.attributes.items():
|
|
s += ' |%-18s: %s\n' % (attrname, attrval)
|
|
return s
|
|
|
|
def __str__(self):
|
|
return self.__repr__()
|
|
|
|
def _parse_DIE(self):
|
|
""" Parses the DIE info from the section, based on the abbreviation
|
|
table of the CU
|
|
"""
|
|
structs = self.cu.structs
|
|
|
|
# A DIE begins with the abbreviation code. Read it and use it to
|
|
# obtain the abbrev declaration for this DIE.
|
|
# Note: here and elsewhere, preserve_stream_pos is used on operations
|
|
# that manipulate the stream by reading data from it.
|
|
self.abbrev_code = struct_parse(
|
|
structs.Dwarf_uleb128(''), self.stream, self.offset)
|
|
|
|
# This may be a null entry
|
|
if self.abbrev_code == 0:
|
|
self.size = self.stream.tell() - self.offset
|
|
return
|
|
|
|
abbrev_decl = self.cu.get_abbrev_table().get_abbrev(self.abbrev_code)
|
|
self.tag = abbrev_decl['tag']
|
|
self.has_children = abbrev_decl.has_children()
|
|
|
|
# Guided by the attributes listed in the abbreviation declaration, parse
|
|
# values from the stream.
|
|
for spec in abbrev_decl['attr_spec']:
|
|
form = spec.form
|
|
name = spec.name
|
|
attr_offset = self.stream.tell()
|
|
indirection_length = 0
|
|
# Special case here: the attribute value is stored in the attribute
|
|
# definition in the abbreviation spec, not in the DIE itself.
|
|
if form == 'DW_FORM_implicit_const':
|
|
value = spec.value
|
|
raw_value = value
|
|
# Another special case: the attribute value is a form code followed by the real value in that form
|
|
elif form == 'DW_FORM_indirect':
|
|
(form, raw_value, indirection_length) = self._resolve_indirect()
|
|
value = self._translate_attr_value(form, raw_value)
|
|
else:
|
|
raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream)
|
|
value = self._translate_attr_value(form, raw_value)
|
|
self.attributes[name] = AttributeValue(
|
|
name=name,
|
|
form=form,
|
|
value=value,
|
|
raw_value=raw_value,
|
|
offset=attr_offset,
|
|
indirection_length = indirection_length)
|
|
|
|
self.size = self.stream.tell() - self.offset
|
|
|
|
def _resolve_indirect(self):
|
|
# Supports arbitrary indirection nesting (the standard doesn't prohibit that).
|
|
# Expects the stream to be at the real form.
|
|
# Returns (form, raw_value, length).
|
|
structs = self.cu.structs
|
|
length = 1
|
|
real_form_code = struct_parse(structs.Dwarf_uleb128(''), self.stream) # Numeric form code
|
|
while True:
|
|
try:
|
|
real_form = DW_FORM_raw2name[real_form_code] # Form name or exception if bogus code
|
|
except KeyError as err:
|
|
raise DWARFError('Found DW_FORM_indirect with unknown real form 0x%x' % real_form_code)
|
|
|
|
raw_value = struct_parse(structs.Dwarf_dw_form[real_form], self.stream)
|
|
|
|
if real_form != 'DW_FORM_indirect': # Happy path: one level of indirection
|
|
return (real_form, raw_value, length)
|
|
else: # Indirection cascade
|
|
length += 1
|
|
real_form_code = raw_value
|
|
# And continue parsing
|
|
# No explicit infinite loop guard because the stream will end eventually
|
|
|
|
def _translate_attr_value(self, form, raw_value):
|
|
""" Translate a raw attr value according to the form
|
|
"""
|
|
# Indirect forms can only be parsed if the top DIE of this CU has already been parsed
|
|
# and listed in the CU, since the top DIE would have to contain the DW_AT_xxx_base attributes.
|
|
# This breaks if there is an indirect encoding in the top DIE itself before the
|
|
# corresponding _base, and it was seen in the wild.
|
|
# There is a hook in get_top_DIE() to resolve those lazily.
|
|
translate_indirect = self.cu.has_top_DIE() or self.offset != self.cu.cu_die_offset
|
|
value = None
|
|
if form == 'DW_FORM_strp':
|
|
with preserve_stream_pos(self.stream):
|
|
value = self.dwarfinfo.get_string_from_table(raw_value)
|
|
elif form == 'DW_FORM_line_strp':
|
|
with preserve_stream_pos(self.stream):
|
|
value = self.dwarfinfo.get_string_from_linetable(raw_value)
|
|
elif form in ('DW_FORM_GNU_strp_alt', 'DW_FORM_strp_sup'):
|
|
if self.dwarfinfo.supplementary_dwarfinfo:
|
|
return self.dwarfinfo.supplementary_dwarfinfo.get_string_from_table(raw_value)
|
|
else:
|
|
value = raw_value
|
|
elif form == 'DW_FORM_flag':
|
|
value = not raw_value == 0
|
|
elif form == 'DW_FORM_flag_present':
|
|
value = True
|
|
elif form in ('DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4') and translate_indirect:
|
|
value = self.cu.dwarfinfo.get_addr(self.cu, raw_value)
|
|
elif form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4') and translate_indirect:
|
|
stream = self.dwarfinfo.debug_str_offsets_sec.stream
|
|
base_offset = _get_base_offset(self.cu, 'DW_AT_str_offsets_base')
|
|
offset_size = 4 if self.cu.structs.dwarf_format == 32 else 8
|
|
with preserve_stream_pos(stream):
|
|
str_offset = struct_parse(self.cu.structs.Dwarf_offset(''), stream, base_offset + raw_value*offset_size)
|
|
value = self.dwarfinfo.get_string_from_table(str_offset)
|
|
elif form == 'DW_FORM_loclistx' and translate_indirect:
|
|
value = _resolve_via_offset_table(self.dwarfinfo.debug_loclists_sec.stream, self.cu, raw_value, 'DW_AT_loclists_base')
|
|
elif form == 'DW_FORM_rnglistx' and translate_indirect:
|
|
value = _resolve_via_offset_table(self.dwarfinfo.debug_rnglists_sec.stream, self.cu, raw_value, 'DW_AT_rnglists_base')
|
|
else:
|
|
value = raw_value
|
|
return value
|
|
|
|
def _translate_indirect_attributes(self):
|
|
""" This is a hook to translate the DW_FORM_...x values in the top DIE
|
|
once the top DIE is parsed to the end. They can't be translated
|
|
while the top DIE is being parsed, because they implicitly make a
|
|
reference to the DW_AT_xxx_base attribute in the same DIE that may
|
|
not have been parsed yet.
|
|
"""
|
|
for key in self.attributes:
|
|
attr = self.attributes[key]
|
|
if attr.form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4',
|
|
'DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4',
|
|
'DW_FORM_loclistx', 'DW_FORM_rnglistx'):
|
|
# Can't change value in place, got to replace the whole attribute record
|
|
self.attributes[key] = AttributeValue(
|
|
name=attr.name,
|
|
form=attr.form,
|
|
value=self._translate_attr_value(attr.form, attr.raw_value),
|
|
raw_value=attr.raw_value,
|
|
offset=attr.offset,
|
|
indirection_length=attr.indirection_length)
|