mirror of https://github.com/gcc-mirror/gcc.git
openmp: Change to using a hashtab to lookup offload target addresses for indirect function calls
A splay-tree was previously used to lookup equivalent target addresses for a given host address on offload targets. However, as splay-trees can modify their structure on lookup, they are not suitable for concurrent access from separate teams/threads without some form of locking. This patch changes the lookup data structure to a hashtab instead, which does not have these issues. The call to build_indirect_map to initialize the data structure is now called from just the first thread of the first team to avoid redundant calls to this function. 2024-03-22 Kwok Cheung Yeung <kcyeung@baylibre.com> libgomp/ * config/accel/target-indirect.c: Include string.h and hashtab.h. Remove include of splay-tree.h. Update comments. (splay_tree_prefix, splay_tree_c): Delete. (struct indirect_map_t): New. (hash_entry_type, htab_alloc, htab_free, htab_hash, htab_eq): New. (GOMP_INDIRECT_ADD_MAP): Remove volatile qualifier. (USE_SPLAY_TREE_LOOKUP): Rename to... (USE_HASHTAB_LOOKUP): ..this. (indirect_map, indirect_array): Delete. (indirect_htab): New. (build_indirect_map): Remove locking. Build indirect map using hashtab. (GOMP_target_map_indirect_ptr): Use indirect_htab to lookup target address. (GOMP_target_map_indirect_ptr): Remove volatile qualifier. * config/gcn/team.c (gomp_gcn_enter_kernel): Call build_indirect_map from first thread of first team only. * config/nvptx/team.c (gomp_nvptx_main): Likewise. * testsuite/libgomp.c-c++-common/declare-target-indirect-2.c (main): Add missing break statements. * testsuite/libgomp.fortran/declare-target-indirect-2.f90: Remove xfail.pull/46/merge
parent
65107faad7
commit
637e76b90e
|
@ -25,60 +25,73 @@
|
|||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include "libgomp.h"
|
||||
|
||||
#define splay_tree_prefix indirect
|
||||
#define splay_tree_c
|
||||
#include "splay-tree.h"
|
||||
struct indirect_map_t
|
||||
{
|
||||
void *host_addr;
|
||||
void *target_addr;
|
||||
};
|
||||
|
||||
volatile void **GOMP_INDIRECT_ADDR_MAP = NULL;
|
||||
typedef struct indirect_map_t *hash_entry_type;
|
||||
|
||||
/* Use a splay tree to lookup the target address instead of using a
|
||||
linear search. */
|
||||
#define USE_SPLAY_TREE_LOOKUP
|
||||
static inline void * htab_alloc (size_t size) { return gomp_malloc (size); }
|
||||
static inline void htab_free (void *ptr) { free (ptr); }
|
||||
|
||||
#ifdef USE_SPLAY_TREE_LOOKUP
|
||||
#include "hashtab.h"
|
||||
|
||||
static struct indirect_splay_tree_s indirect_map;
|
||||
static indirect_splay_tree_node indirect_array = NULL;
|
||||
static inline hashval_t
|
||||
htab_hash (hash_entry_type element)
|
||||
{
|
||||
return hash_pointer (element->host_addr);
|
||||
}
|
||||
|
||||
/* Build the splay tree used for host->target address lookups. */
|
||||
static inline bool
|
||||
htab_eq (hash_entry_type x, hash_entry_type y)
|
||||
{
|
||||
return x->host_addr == y->host_addr;
|
||||
}
|
||||
|
||||
void **GOMP_INDIRECT_ADDR_MAP = NULL;
|
||||
|
||||
/* Use a hashtab to lookup the target address instead of using a linear
|
||||
search. */
|
||||
#define USE_HASHTAB_LOOKUP
|
||||
|
||||
#ifdef USE_HASHTAB_LOOKUP
|
||||
|
||||
static htab_t indirect_htab = NULL;
|
||||
|
||||
/* Build the hashtab used for host->target address lookups. */
|
||||
|
||||
void
|
||||
build_indirect_map (void)
|
||||
{
|
||||
size_t num_ind_funcs = 0;
|
||||
volatile void **map_entry;
|
||||
static int lock = 0; /* == gomp_mutex_t lock; gomp_mutex_init (&lock); */
|
||||
void **map_entry;
|
||||
|
||||
if (!GOMP_INDIRECT_ADDR_MAP)
|
||||
return;
|
||||
|
||||
gomp_mutex_lock (&lock);
|
||||
|
||||
if (!indirect_array)
|
||||
if (!indirect_htab)
|
||||
{
|
||||
/* Count the number of entries in the NULL-terminated address map. */
|
||||
for (map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
|
||||
map_entry += 2, num_ind_funcs++);
|
||||
|
||||
/* Build splay tree for address lookup. */
|
||||
indirect_array = gomp_malloc (num_ind_funcs * sizeof (*indirect_array));
|
||||
indirect_splay_tree_node array = indirect_array;
|
||||
/* Build hashtab for address lookup. */
|
||||
indirect_htab = htab_create (num_ind_funcs);
|
||||
map_entry = GOMP_INDIRECT_ADDR_MAP;
|
||||
|
||||
for (int i = 0; i < num_ind_funcs; i++, array++)
|
||||
for (int i = 0; i < num_ind_funcs; i++, map_entry += 2)
|
||||
{
|
||||
indirect_splay_tree_key k = &array->key;
|
||||
k->host_addr = (uint64_t) *map_entry++;
|
||||
k->target_addr = (uint64_t) *map_entry++;
|
||||
array->left = NULL;
|
||||
array->right = NULL;
|
||||
indirect_splay_tree_insert (&indirect_map, array);
|
||||
struct indirect_map_t element = { *map_entry, NULL };
|
||||
hash_entry_type *slot = htab_find_slot (&indirect_htab, &element,
|
||||
INSERT);
|
||||
*slot = (hash_entry_type) map_entry;
|
||||
}
|
||||
}
|
||||
|
||||
gomp_mutex_unlock (&lock);
|
||||
}
|
||||
|
||||
void *
|
||||
|
@ -88,15 +101,11 @@ GOMP_target_map_indirect_ptr (void *ptr)
|
|||
if (!ptr)
|
||||
return ptr;
|
||||
|
||||
assert (indirect_array);
|
||||
assert (indirect_htab);
|
||||
|
||||
struct indirect_splay_tree_key_s k;
|
||||
indirect_splay_tree_key node = NULL;
|
||||
|
||||
k.host_addr = (uint64_t) ptr;
|
||||
node = indirect_splay_tree_lookup (&indirect_map, &k);
|
||||
|
||||
return node ? (void *) node->target_addr : ptr;
|
||||
struct indirect_map_t element = { ptr, NULL };
|
||||
hash_entry_type entry = htab_find (indirect_htab, &element);
|
||||
return entry ? entry->target_addr : ptr;
|
||||
}
|
||||
|
||||
#else
|
||||
|
@ -115,7 +124,7 @@ GOMP_target_map_indirect_ptr (void *ptr)
|
|||
|
||||
assert (GOMP_INDIRECT_ADDR_MAP);
|
||||
|
||||
for (volatile void **map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
|
||||
for (void **map_entry = GOMP_INDIRECT_ADDR_MAP; *map_entry;
|
||||
map_entry += 2)
|
||||
if (*map_entry == ptr)
|
||||
return (void *) *(map_entry + 1);
|
||||
|
|
|
@ -52,14 +52,15 @@ gomp_gcn_enter_kernel (void)
|
|||
{
|
||||
int threadid = __builtin_gcn_dim_pos (1);
|
||||
|
||||
/* Initialize indirect function support. */
|
||||
build_indirect_map ();
|
||||
|
||||
if (threadid == 0)
|
||||
{
|
||||
int numthreads = __builtin_gcn_dim_size (1);
|
||||
int teamid = __builtin_gcn_dim_pos(0);
|
||||
|
||||
/* Initialize indirect function support. */
|
||||
if (teamid == 0)
|
||||
build_indirect_map ();
|
||||
|
||||
/* Set up the global state.
|
||||
Every team will do this, but that should be harmless. */
|
||||
gomp_global_icv.nthreads_var = 16;
|
||||
|
|
|
@ -60,9 +60,6 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data)
|
|||
asm ("mov.u32 %0, %%tid.y;" : "=r" (tid));
|
||||
asm ("mov.u32 %0, %%ntid.y;" : "=r" (ntids));
|
||||
|
||||
/* Initialize indirect function support. */
|
||||
build_indirect_map ();
|
||||
|
||||
if (tid == 0)
|
||||
{
|
||||
gomp_global_icv.nthreads_var = ntids;
|
||||
|
@ -74,6 +71,12 @@ gomp_nvptx_main (void (*fn) (void *), void *fn_data)
|
|||
nvptx_thrs = alloca (ntids * sizeof (*nvptx_thrs));
|
||||
memset (nvptx_thrs, 0, ntids * sizeof (*nvptx_thrs));
|
||||
|
||||
/* Initialize indirect function support. */
|
||||
unsigned int block_id;
|
||||
asm ("mov.u32 %0, %%ctaid.x;" : "=r" (block_id));
|
||||
if (block_id == 0)
|
||||
build_indirect_map ();
|
||||
|
||||
/* Find the low-latency heap details .... */
|
||||
uint32_t *shared_pool;
|
||||
uint32_t shared_pool_size = 0;
|
||||
|
|
|
@ -17,17 +17,17 @@ int main (void)
|
|||
{
|
||||
switch (i % 3)
|
||||
{
|
||||
case 0: fn_ptr[i] = &foo;
|
||||
case 1: fn_ptr[i] = &bar;
|
||||
case 2: fn_ptr[i] = &baz;
|
||||
case 0: fn_ptr[i] = &foo; break;
|
||||
case 1: fn_ptr[i] = &bar; break;
|
||||
case 2: fn_ptr[i] = &baz; break;
|
||||
}
|
||||
expected += (*fn_ptr[i]) ();
|
||||
}
|
||||
|
||||
#pragma omp target teams distribute parallel for reduction(+: x) \
|
||||
map (to: fn_ptr) map (tofrom: x)
|
||||
for (int i = 0; i < N; i++)
|
||||
x += (*fn_ptr[i]) ();
|
||||
#pragma omp target teams distribute parallel for \
|
||||
reduction (+: x) map (to: fn_ptr) map (tofrom: x)
|
||||
for (int i = 0; i < N; i++)
|
||||
x += (*fn_ptr[i]) ();
|
||||
|
||||
return x - expected;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
! { dg-do run }
|
||||
! { dg-xfail-run-if "Requires libgomp bug fix pending review" { offload_device } }
|
||||
|
||||
module m
|
||||
contains
|
||||
|
|
Loading…
Reference in New Issue