Commit 3dee6910 authored by Eric Pouech's avatar Eric Pouech Committed by Alexandre Julliard

dbghelp: Allow symt_block to be defined over non contiguous chunks.

This improves correctness of functions like SymFromAddr() when searching local variables. Signed-off-by: 's avatarEric Pouech <eric.pouech@gmail.com>
parent eb820b37
......@@ -116,6 +116,25 @@ extern unsigned dbghelp_options DECLSPEC_HIDDEN;
extern BOOL dbghelp_opt_native DECLSPEC_HIDDEN;
extern SYSTEM_INFO sysinfo DECLSPEC_HIDDEN;
/* FIXME: this could be optimized later on by using relative offsets and smaller integral sizes */
struct addr_range
{
DWORD64 low; /* absolute address of first byte of the range */
DWORD64 high; /* absolute address of first byte after the range */
};
/* tests whether ar2 is inside ar1 */
static inline BOOL addr_range_inside(const struct addr_range* ar1, const struct addr_range* ar2)
{
return ar1->low <= ar2->low && ar2->high <= ar1->high;
}
/* tests whether ar1 and ar2 are disjoint */
static inline BOOL addr_range_disjoint(const struct addr_range* ar1, const struct addr_range* ar2)
{
return ar1->high <= ar2->low || ar2->high <= ar1->low;
}
enum location_kind {loc_error, /* reg is the error code */
loc_unavailable, /* location is not available */
loc_absolute, /* offset is the location */
......@@ -160,10 +179,10 @@ static inline BOOL symt_check_tag(const struct symt* s, enum SymTagEnum tag)
struct symt_block
{
struct symt symt;
ULONG_PTR address;
ULONG_PTR size;
struct symt* container; /* block, or func */
struct vector vchildren; /* sub-blocks & local variables */
unsigned num_ranges;
struct addr_range ranges[];
};
struct symt_module /* in fact any of .exe, .dll... */
......@@ -216,36 +235,49 @@ struct symt_data
} u;
};
/* We must take into account that most debug formats (dwarf and pdb) report for
* code (esp. inlined functions) inside functions the following way:
* - block
* + is represented by a contiguous area of memory,
* or at least have lo/hi addresses to encompass it's contents
* + but most importantly, block A's lo/hi range is always embedded within
* its parent (block or function)
* - inline site:
* + is most of the times represented by a set of ranges (instead of a
* contiguous block)
* + native dbghelp only exports the start address, not its size
* + the set of ranges isn't always embedded in enclosing block (if any)
* + the set of ranges is always embedded in top function
* - (top) function
* + is described as a contiguous block of memory
/* Understanding functions internal storage:
* - functions, inline sites and blocks can be described as spreading across
* several chunks of memory (hence describing potentially a non contiguous
* memory space).
* - this is described internally as an array of address ranges
* (struct addr_range)
*
* - there's a hierarchical (aka lexical) relationship:
* + function's parent is a compiland or the module itself
* + inline site's parent is either a function or another inline site
* + block's parent is either a function, an inline site or another block.
*
* On top of the items above (taken as assumptions), we also assume that:
* - a range in inline site A, is disjoint from all the other ranges in
* inline site A
* - a range in inline site A, is either disjoint or embedded into any of
* the ranges of inline sites parent of A
* - internal storage rely on the following assumptions:
* + in an array of address ranges, one address range doesn't overlap over
* one of its siblings
* + each address range of a block is inside a single range of its lexical
* parent (and outside of the others since they don't overlap)
* + each address range of an inline site is inside a single range its
* lexical parent
* + a function (as of today) is only represented by a single address range
* (A).
*
* Therefore, we also store all inline sites inside a function:
* - available as a linked list to simplify the walk among them
* - this linked list shall preserve the weak order of the lexical-parent
* relationship (eg for any inline site A, which has inline site B
* as lexical parent, then A is present before B in the linked list)
* - hence (from the assumptions above), when looking up which inline site
* contains a given address, the first range containing that address found
* while walking the list of inline sites is the right one.
* - all inline sites of a function are stored in a linked list:
* + this linked list shall preserve the weak order of the lexical-parent
* relationship (eg for any inline site A, which has inline site B
* as lexical parent, A must appear before B in the linked list)
*
* - lookup:
* + when looking up which inline site contains a given address, the first
* range containing that address found while walking the list of inline
* sites is the right one.
* + when lookup up which inner-most block contains an address, descend the
* blocks tree with branching on the block (if any) which contains the given
* address in one of its ranges
*
* Notes:
* (A): shall evolve but storage is native is awkward: from PGO testing, the
* top function is stored with its first range of address; all the others
* are stored as blocks, children of compiland, but which lexical parent
* is the top function. This breaks the natural assumption that
* children <> lexical parent is symetrical.
* (B): see dwarf.c for some gory discrepancies between native & builtin
* DbgHelp.
*/
struct symt_function
......@@ -261,25 +293,6 @@ struct symt_function
struct symt_inlinesite* next_inlinesite;/* linked list of inline sites in this function */
};
/* FIXME: this could be optimized later on by using relative offsets and smaller integral sizes */
struct addr_range
{
DWORD64 low; /* absolute address of first byte of the range */
DWORD64 high; /* absolute address of first byte after the range */
};
/* tests whether ar2 is inside ar1 */
static inline BOOL addr_range_inside(const struct addr_range* ar1, const struct addr_range* ar2)
{
return ar1->low <= ar2->low && ar2->high <= ar1->high;
}
/* tests whether ar1 and ar2 are disjoint */
static inline BOOL addr_range_disjoint(const struct addr_range* ar1, const struct addr_range* ar2)
{
return ar1->high <= ar2->low || ar2->high <= ar1->low;
}
/* a symt_inlinesite* can be casted to a symt_function* to access all function bits */
struct symt_inlinesite
{
......@@ -857,10 +870,10 @@ extern struct symt_data*
struct symt_function* func, struct symt_block* block,
struct symt* type, const char* name, VARIANT* v) DECLSPEC_HIDDEN;
extern struct symt_block*
symt_open_func_block(struct module* module,
symt_open_func_block(struct module* module,
struct symt_function* func,
struct symt_block* block,
unsigned pc, unsigned len) DECLSPEC_HIDDEN;
struct symt_block* block,
unsigned num_ranges) DECLSPEC_HIDDEN;
extern struct symt_block*
symt_close_func_block(struct module* module,
const struct symt_function* func,
......
......@@ -1206,80 +1206,6 @@ static const char* dwarf2_get_cpp_name(dwarf2_debug_info_t* di, const char* name
return last;
}
/******************************************************************
* dwarf2_read_range
*
* read a range for a given debug_info (either using AT_range attribute, in which
* case we don't return all the details, or using AT_low_pc & AT_high_pc attributes)
* in all cases, range is relative to beginning of compilation unit
*/
static BOOL dwarf2_read_range(dwarf2_parse_context_t* ctx, const dwarf2_debug_info_t* di,
ULONG_PTR* plow, ULONG_PTR* phigh)
{
struct attribute range;
if (dwarf2_find_attribute(di, DW_AT_ranges, &range))
{
dwarf2_traverse_context_t traverse;
ULONG_PTR low, high;
const ULONG_PTR UMAX = ~(ULONG_PTR)0u;
traverse.data = ctx->module_ctx->sections[section_ranges].address + range.u.uvalue;
traverse.end_data = ctx->module_ctx->sections[section_ranges].address +
ctx->module_ctx->sections[section_ranges].size;
*plow = UMAX;
*phigh = 0;
while (traverse.data + 2 * ctx->head.word_size < traverse.end_data)
{
low = dwarf2_parse_addr_head(&traverse, &ctx->head);
high = dwarf2_parse_addr_head(&traverse, &ctx->head);
if (low == 0 && high == 0) break;
if (low == (ctx->head.word_size == 8 ? (~(DWORD64)0u) : (DWORD64)(~0u)))
FIXME("unsupported yet (base address selection)\n");
/* range values are relative to start of compilation unit */
low += ctx->compiland->address - ctx->module_ctx->load_offset;
high += ctx->compiland->address - ctx->module_ctx->load_offset;
if (low < *plow) *plow = low;
if (high > *phigh) *phigh = high;
}
if (*plow == UMAX || *phigh == 0) {WARN("no entry found\n"); return FALSE;}
if (*plow == *phigh) {WARN("entry found, but low=high %Ix %Ix\n", low, high); return FALSE;}
return TRUE;
}
else
{
struct attribute low_pc;
struct attribute high_pc;
if (!dwarf2_find_attribute(di, DW_AT_low_pc, &low_pc) ||
!dwarf2_find_attribute(di, DW_AT_high_pc, &high_pc))
return FALSE;
*plow = low_pc.u.uvalue;
*phigh = high_pc.u.uvalue;
if (ctx->head.version >= 4)
switch (high_pc.form)
{
case DW_FORM_addr:
break;
case DW_FORM_data1:
case DW_FORM_data2:
case DW_FORM_data4:
case DW_FORM_data8:
case DW_FORM_sdata:
case DW_FORM_udata:
/* From dwarf4 on, when FORM's class is constant, high_pc is an offset from low_pc */
*phigh += *plow;
break;
default:
FIXME("Unsupported class for high_pc\n");
break;
}
return TRUE;
}
}
static unsigned dwarf2_get_num_ranges(const dwarf2_debug_info_t* di)
{
struct attribute range;
......@@ -2260,22 +2186,51 @@ static void dwarf2_parse_inlined_subroutine(dwarf2_subprogram_t* subpgm,
static void dwarf2_parse_subprogram_block(dwarf2_subprogram_t* subpgm,
dwarf2_debug_info_t* di)
{
ULONG_PTR low_pc, high_pc;
unsigned int num_ranges;
struct vector* children;
dwarf2_debug_info_t*child;
unsigned int i;
TRACE("%s\n", dwarf2_debug_di(di));
if (!dwarf2_read_range(subpgm->ctx, di, &low_pc, &high_pc))
num_ranges = dwarf2_get_num_ranges(di);
if (!num_ranges)
{
WARN("no range\n");
WARN("no ranges\n");
return;
}
subpgm->current_block = symt_open_func_block(subpgm->ctx->module_ctx->module, subpgm->current_func, subpgm->current_block,
subpgm->ctx->module_ctx->load_offset + low_pc - subpgm->current_func->address,
high_pc - low_pc);
/* Dwarf tends to keep the structure of the C/C++ program, and emits DW_TAG_lexical_block
* for every block in source program.
* With inlining and other optimizations, code for a block no longer lies in a contiguous
* chunk of memory. It's hence described with (potentially) multiple chunks of memory.
* Then each variable of each block is attached to its block. (A)
*
* PDB on the other hand no longer emits block information, and merge variable information
* at function level (actually function and each inline site).
* For example, if several variables of same name exist in different blocks of a function,
* only one entry for that name will exist. The information stored in (A) will point
* to the correct instance as defined by C/C++ scoping rules.
*
* (A) in all cases, there is information telling for each address of the function if a
* variable is accessible, and if so, how to get its value.
*
* DbgHelp only exposes a contiguous chunk of memory for a block.
*
* => Store internaly all the ranges of addresses in a block, but only expose the size
* of the first chunk of memory.
* Otherwise, it would break the rule: blocks' chunks don't overlap.
* Note: This could mislead some programs using the blocks' address and size information.
* That's very unlikely to happen (most will use the APIs from DbgHelp, which will
* hide this information to the caller).
*/
subpgm->current_block = symt_open_func_block(subpgm->ctx->module_ctx->module, subpgm->current_func,
subpgm->current_block, num_ranges);
if (!dwarf2_fill_ranges(di, subpgm->current_block->ranges, num_ranges))
{
FIXME("Unexpected situation\n");
subpgm->current_block->num_ranges = 0;
}
children = dwarf2_get_di_children(di);
if (children) for (i = 0; i < vector_length(children); i++)
......
......@@ -2459,14 +2459,14 @@ static BOOL codeview_snarf(const struct msc_debug_info* msc_dbg,
break;
case S_BLOCK32_ST:
block = symt_open_func_block(msc_dbg->module, curr_func, block,
codeview_get_address(msc_dbg, sym->block_v1.segment, sym->block_v1.offset) - curr_func->address,
sym->block_v1.length);
block = symt_open_func_block(msc_dbg->module, curr_func, block, 1);
block->ranges[0].low = codeview_get_address(msc_dbg, sym->block_v1.segment, sym->block_v1.offset);
block->ranges[0].high = block->ranges[0].low + sym->block_v1.length;
break;
case S_BLOCK32:
block = symt_open_func_block(msc_dbg->module, curr_func, block,
codeview_get_address(msc_dbg, sym->block_v3.segment, sym->block_v3.offset) - curr_func->address,
sym->block_v3.length);
block = symt_open_func_block(msc_dbg->module, curr_func, block, 1);
block->ranges[0].low = codeview_get_address(msc_dbg, sym->block_v3.segment, sym->block_v3.offset);
block->ranges[0].high = block->ranges[0].low + sym->block_v3.length;
break;
case S_END:
......
......@@ -1374,15 +1374,16 @@ BOOL stabs_parse(struct module* module, ULONG_PTR load_offset,
case N_LBRAC:
if (curr_func)
{
block = symt_open_func_block(module, curr_func, block,
n_value, 0);
block = symt_open_func_block(module, curr_func, block, 1);
block->ranges[0].low = curr_func->address + n_value;
block->ranges[0].high = 0; /* will be set by N_RBRAC */
pending_flush(&pending_block, module, curr_func, block);
}
break;
case N_RBRAC:
if (curr_func)
{
block->size = curr_func->address + n_value - block->address;
block->ranges[0].high = curr_func->address + n_value;
block = symt_close_func_block(module, curr_func, block);
}
break;
......
......@@ -523,21 +523,21 @@ struct symt_data* symt_add_func_constant(struct module* module,
return locsym;
}
struct symt_block* symt_open_func_block(struct module* module,
struct symt_block* symt_open_func_block(struct module* module,
struct symt_function* func,
struct symt_block* parent_block,
unsigned pc, unsigned len)
struct symt_block* parent_block,
unsigned num_ranges)
{
struct symt_block* block;
struct symt** p;
assert(symt_check_tag(&func->symt, SymTagFunction) || symt_check_tag(&func->symt, SymTagInlineSite));
assert(num_ranges > 0);
assert(!parent_block || parent_block->symt.tag == SymTagBlock);
block = pool_alloc(&module->pool, sizeof(*block));
block = pool_alloc(&module->pool, sizeof(*block) + num_ranges * sizeof(block->ranges[0]));
block->symt.tag = SymTagBlock;
block->address = func->address + pc;
block->size = len;
block->num_ranges = num_ranges;
block->container = parent_block ? &parent_block->symt : &func->symt;
vector_init(&block->vchildren, sizeof(struct symt*), 4);
if (parent_block)
......@@ -1131,10 +1131,15 @@ static BOOL symt_enum_locals_helper(struct module_pair* pair,
case SymTagBlock:
{
struct symt_block* block = (struct symt_block*)lsym;
if (pc < block->address || block->address + block->size <= pc)
continue;
if (!symt_enum_locals_helper(pair, match, se, func, &block->vchildren))
return FALSE;
unsigned j;
for (j = 0; j < block->num_ranges; j++)
{
if (pc >= block->ranges[j].low && pc < block->ranges[j].high)
{
if (!symt_enum_locals_helper(pair, match, se, func, &block->vchildren))
return FALSE;
}
}
}
break;
case SymTagData:
......
......@@ -150,7 +150,7 @@ BOOL symt_get_address(const struct symt* type, ULONG64* addr)
}
break;
case SymTagBlock:
*addr = ((const struct symt_block*)type)->address;
*addr = ((const struct symt_block*)type)->ranges[0].low;
break;
case SymTagFunction:
*addr = ((const struct symt_function*)type)->address;
......@@ -819,7 +819,15 @@ BOOL symt_get_info(struct module* module, const struct symt* type,
X(DWORD64) = ((const struct symt_function*)type)->size;
break;
case SymTagBlock:
X(DWORD64) = ((const struct symt_block*)type)->size;
/* When there are several ranges available, we can only return one contiguous chunk of memory.
* We return only the first range. It will lead to not report the other ranges
* being part of the block, but it will not return gaps (between the ranges) as being
* part of the block.
* So favor a correct (yet incomplete) value than an incorrect one.
*/
if (((const struct symt_block*)type)->num_ranges > 1)
WARN("Only returning first range from multiple ranges\n");
X(DWORD64) = ((const struct symt_block*)type)->ranges[0].high - ((const struct symt_block*)type)->ranges[0].low;
break;
case SymTagPointerType:
X(DWORD64) = ((const struct symt_pointer*)type)->size;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment