/*****************************************************************************\
 *                        ANALYSIS PERFORMANCE TOOLS                         *
 *                                   Extrae                                  *
 *              Instrumentation package for parallel applications            *
 *****************************************************************************
 *     ___     This library is free software; you can redistribute it and/or *
 *    /  __         modify it under the terms of the GNU LGPL as published   *
 *   /  /  _____    by the Free Software Foundation; either version 2.1      *
 *  /  /  /     \   of the License, or (at your option) any later version.   *
 * (  (  ( B S C )                                                           *
 *  \  \  \_____/   This library is distributed in hope that it will be      *
 *   \  \__         useful but WITHOUT ANY WARRANTY; without even the        *
 *    \___          implied warranty of MERCHANTABILITY or FITNESS FOR A     *
 *                  PARTICULAR PURPOSE. See the GNU LGPL for more details.   *
 *                                                                           *
 * You should have received a copy of the GNU Lesser General Public License  *
 * along with this library; if not, write to the Free Software Foundation,   *
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA          *
 * The GNU LEsser General Public License is contained in the file COPYING.   *
 *                                 ---------                                 *
 *   Barcelona Supercomputing Center - Centro Nacional de Supercomputacion   *
\*****************************************************************************/

#include "common.h"

#ifdef HAVE_DLFCN_H
# define __USE_GNU
# include <dlfcn.h>
# undef __USE_GNU
#endif
#ifdef HAVE_STDARG_H
# include <stdarg.h>
#endif
#ifdef HAVE_STDLIB_H
# include <stdlib.h>
#endif
#ifdef HAVE_STDIO_H
# include <stdio.h>
#endif
#ifdef HAVE_PTHREAD_H
# include <pthread.h>
#endif

#include "intel-kmpc-11.h"
#include "wrapper.h"
#include "omp-common.h"
#include "omp-probe.h"
#include "omp-events.h"
#include "intel-kmpc-11-intermediate/intel-kmpc-11-intermediate.h"
#include "intel-kmpc-11-intermediate/intel-kmpc-11-taskloop-helpers.h"

#include "xalloc.h"

/*
 * This global variable stores the pointer to the outlined task from the parent thread,
 * and is queried from the child threads inside the parallel region. FIXME: in order
 * to support nesting, this should be stored in a tree structure indexed by all ancestor
 * thread id's.
 */
static void *par_func = NULL;

//#define DEBUG

/*                                                                              
 * In case the constructor initialization didn't trigger                        
 * or the symbols couldn't be found, retry hooking.                        
 */                                                                             
#define RECHECK_INIT(real_fn_ptr)                                      \
{                                                                      \
	if (real_fn_ptr == NULL)                                             \
	{                                                                    \
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL                 \
		                 "%s: WARNING! %s is a NULL pointer. "             \
		                 "Did the initialization of this module trigger? " \
		                 "Retrying initialization...\n",                   \
		                 THREAD_LEVEL_VAR, __func__, #real_fn_ptr);        \
		_extrae_intel_kmpc_init(TASKID);                                   \
  }                                                                    \
}

#if defined(PIC)

static int intel_kmpc_get_hook_points (int rank);

static void (*ompc_set_num_threads_real)(int) = NULL;

static void (*__kmpc_barrier_real)(void*,int) = NULL;

static void (*__kmpc_critical_real)(void*,int,void*) = NULL;
static void (*__kmpc_critical_with_hint_real)(void*,int,void*,uint32_t) = NULL;
static void (*__kmpc_end_critical_real)(void*,int,void*) = NULL;

static void (*__kmpc_set_lock_real)(void *, int, void **) = NULL;
static void (*__kmpc_unset_lock_real)(void *, int, void **) = NULL;

static void (*__kmpc_dispatch_init_4_real)(void*,int,int,int,int,int,int) = NULL;
static void (*__kmpc_dispatch_init_4u_real)(void*,int,int,unsigned int,unsigned int,int,int) = NULL;
static void (*__kmpc_dispatch_init_8_real)(void*,int,int,long long,long long,long long,long long) = NULL;
static void (*__kmpc_dispatch_init_8u_real)(void*,int,int,unsigned long long,unsigned long long,long long,long long) = NULL;
static int (*__kmpc_dispatch_next_4_real)(void*,int,int*,int*,int*,int*) = NULL;
static int (*__kmpc_dispatch_next_4u_real)(void*,int,int*,unsigned int*,unsigned int*,int*) = NULL;
static int (*__kmpc_dispatch_next_8_real)(void*,int,int*,long long *,long long *, long long *) = NULL;
static int (*__kmpc_dispatch_next_8u_real)(void*,int,int*,unsigned long long *,unsigned long long *, long long *) = NULL;
static void (*__kmpc_dispatch_fini_4_real)(void*,int) = NULL;
static void (*__kmpc_dispatch_fini_4u_real)(void*,int) = NULL;
static void (*__kmpc_dispatch_fini_8_real)(void*,int) = NULL; 
static void (*__kmpc_dispatch_fini_8u_real)(void*,int) = NULL;

void (*__kmpc_fork_call_real)(void*,int,void*,...) = NULL;

static int (*__kmpc_single_real)(void*,int) = NULL;
static void (*__kmpc_end_single_real)(void*,int) = NULL;

static void* (*__kmpc_omp_task_alloc_real)(void*,int,int,size_t,size_t,void*) = NULL;
static void (*__kmpc_omp_task_begin_if0_real)(void*,int,void*) = NULL;
static void (*__kmpc_omp_task_complete_if0_real)(void*,int,void*) = NULL;
static int (*__kmpc_omp_taskwait_real)(void*,int) = NULL;

static void (*__kmpc_taskloop_real)(void*,int,void*,int,void*,void*,long,int,int,long,void*) = NULL;
static void (*__kmpc_taskloop_5_real)(void*,int,void*,int,void*,void*,long,int,int,long,int,void*) = NULL;

static void (*__kmpc_taskgroup_real)(void *, int) = NULL;
static void (*__kmpc_end_taskgroup_real)(void *, int) = NULL;

static void (*__kmpc_push_num_threads_real)(void *, int, int) = NULL;

static void (*__kmpc_omp_taskyield_real)(void *, uint32_t, int) = NULL;

/******************************************************************************\
 *                                                                            *
 *                                  HELPERS                                   *
 *                                                                            *
 ******************************************************************************/

/*
 * The following helper structures are used to wrap the runtime's tasks with
 * wrappers to emit instrumentation. We store a list of tuples (pairs) of
 * real_task <-> wrap_task, the runtime is told to execute the wrap_task
 * and from the wrap_task we recover the real_task.
 */

struct helper__kmpc_task_t
{
	void *wrap_task;
	void *real_task;
	int task_id;
};

struct helper_list__kmpc_task_t
{
	struct helper__kmpc_task_t *list;
	volatile long long task_ctr;
	int last_task;
	int max_helpers;
};

/*
 * hl__kmpc_task contains a list of all active data helpers. The length
 * of the list can be increased from DEFAULT_OPENMP_HELPERS to the value set
 * by the EXTRAE_OPENMP_HELPERS environment variable.
 */
static pthread_mutex_t hl__kmpc_task_mtx = PTHREAD_MUTEX_INITIALIZER;
static struct helper_list__kmpc_task_t *hl__kmpc_task = NULL;

/*
 * Taskloop support relies partially on task support, but needs extra 
 * structures. See comments of __kmpc_taskloop function.
 */
struct helper_list__kmpc_taskloop_t
{
  void *real_task_map_by_helper[MAX_TASKLOOP_HELPERS];
	int next_id;
};

/*
 * hl__kmpc_taskloop contains a map of helper_id => real_task 
 */
static pthread_mutex_t hl__kmpc_taskloop_mtx = PTHREAD_MUTEX_INITIALIZER;
static struct helper_list__kmpc_taskloop_t *hl__kmpc_taskloop = NULL;


/**
 * preallocate_kmpc_helpers
 *
 * Allocates the helper structures for task and taskloop substitutions.
 */
static void preallocate_kmpc_helpers()
{
	int i = 0, num_helpers = 0;
	char *env_helpers = NULL;

	pthread_mutex_lock(&hl__kmpc_task_mtx);

	if (hl__kmpc_task == NULL)
	{
    hl__kmpc_task = (struct helper_list__kmpc_task_t *)xmalloc(sizeof(struct helper_list__kmpc_task_t));

		/*                                                                          
     * If the environment variable ENV_VAR_EXTRAE_OPENMP_HELPERS is defined, this
     * will be the size of the list. Otherwise, DEFAULT_OPENMP_HELPERS is used.
     */
		env_helpers = getenv(ENV_VAR_EXTRAE_OPENMP_HELPERS);                        
		if (env_helpers != NULL)                                                    
		{                                                                           
			num_helpers = atoi(env_helpers);                                          
		}                                                                           
		if (num_helpers <= 0)                                                       
		{                                                                           
			num_helpers = DEFAULT_OPENMP_HELPERS;                                     
		}                                                  

#if defined(DEBUG)
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "preallocate_kmpc_helpers: Allocating %d data helpers\n ", THREAD_LEVEL_VAR, num_helpers);
#endif 

		hl__kmpc_task->task_ctr = 1;
		hl__kmpc_task->last_task = 0;
		hl__kmpc_task->max_helpers = num_helpers;
    		hl__kmpc_task->list = (struct helper__kmpc_task_t *)xmalloc(sizeof(struct helper__kmpc_task_t) * num_helpers);
		for (i=0; i<num_helpers; i++)
		{
			hl__kmpc_task->list[i].wrap_task = NULL;
			hl__kmpc_task->list[i].real_task = NULL;
			hl__kmpc_task->list[i].task_id = 0;
		}
	}

	pthread_mutex_unlock(&hl__kmpc_task_mtx);

	pthread_mutex_lock(&hl__kmpc_taskloop_mtx);

	if (hl__kmpc_taskloop == NULL)
	{
    hl__kmpc_taskloop = (struct helper_list__kmpc_taskloop_t *)xmalloc(sizeof(struct helper_list__kmpc_taskloop_t));

		hl__kmpc_taskloop->next_id = 0;
		for (i=0; i<MAX_TASKLOOP_HELPERS; i++)
		{
			hl__kmpc_taskloop->real_task_map_by_helper[i] = NULL;
		}
	}

	pthread_mutex_unlock(&hl__kmpc_taskloop_mtx);
}

/**
 * helper__kmpc_task_register
 *
 * Associates a real and a wrapped task in the list of active data helpers.
 *
 * @param wrap_task The wrapper task that substitutes the real one
 * @param real_task The real task that got substituted 
 */
static void helper__kmpc_task_register(void *wrap_task, void *real_task)
{

	pthread_mutex_lock(&hl__kmpc_task_mtx);
    /* Add the pair of (wrapper, real) tasks to the assigned slot */

	int i = hl__kmpc_task->last_task;

	hl__kmpc_task->list[i].wrap_task = wrap_task;
	hl__kmpc_task->list[i].real_task = real_task;

	hl__kmpc_task->list[i].task_id = hl__kmpc_task->task_ctr++;

	hl__kmpc_task->last_task = (hl__kmpc_task->last_task + 1) % hl__kmpc_task->max_helpers;

	Probe_OpenMP_TaskID(hl__kmpc_task->list[i].task_id, XTR_TASK_INSTANTIATION);

#if defined(DEBUG)
		fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "helper__kmpc_task_register: Registering helper wrap_task=%p real_task=%p slot=%d\n ", THREAD_LEVEL_VAR, wrap_task, real_task, i);
#endif 
	pthread_mutex_unlock(&hl__kmpc_task_mtx);
}

/**
 * helper__kmpc_task_retrieve
 *
 * Retrieves the real task and its internal id for the given wrap_task
 * from the list of data helpers.
 *
 * @param The wrapper task that substitutes the real one
 *
 * @return A struct containing the real task that got substituted and its internal id
 */
static struct helper__kmpc_task_t * helper__kmpc_task_retrieve(void *wrap_task)
{
	struct helper__kmpc_task_t *found_task_info = NULL;
	void *real_task = NULL;

	pthread_mutex_lock(&hl__kmpc_task_mtx);

	int i = hl__kmpc_task->last_task - 1;
	int start_i = i;

	while (i != hl__kmpc_task->last_task && hl__kmpc_task->list[i].wrap_task != NULL)
	{
		if (i < 0) i = hl__kmpc_task->max_helpers - 1;

		if (hl__kmpc_task->list[i].wrap_task == wrap_task)
		{
			found_task_info = &hl__kmpc_task->list[i];
			real_task = found_task_info->real_task;
			break;
		}

		i--;

		//If i has looped around the list the task couldn't be found, give error
		if (i == start_i) {
			fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "helper__kmpc_task_retrieve: ERROR! Did not find task for wrap_task=%p\n ", THREAD_LEVEL_VAR, wrap_task);
			exit (-1);			
		}
	}

	pthread_mutex_unlock(&hl__kmpc_task_mtx);

#if defined(DEBUG)
	fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "helper__kmpc_task_retrieve: Retrieving helper for wrap_task=%p => real_task=%p\n ", THREAD_LEVEL_VAR, wrap_task, real_task);
#endif 

	if (real_task == NULL)
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "helper__kmpc_task_retrieve: ERROR! Could not find data helper for wrap_task=%p (%d max helpers)\n ", THREAD_LEVEL_VAR, wrap_task, hl__kmpc_task->max_helpers);
	}

	return found_task_info;
}

/**
 * helper__kmpc_task_substitute
 *
 * Callback function that the runtime invokes when a task is going to be
 * executed, where we perform the emission of events and the task substitution
 * from the wrapper to the real one.
 *
 * @param (p1,p2) These are the parameters that the runtime pass to the task to
 * execute, p2 is the task entry pointer to the wrapped task that can be used 
 * to retrieve the real one.
 */
static void helper__kmpc_task_substitute (int arg, void *wrap_task)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)                                                              
	fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL " %llu helper__kmpc_task_substitute enter: args=(%d %p)\n ", 
THREAD_LEVEL_VAR, /*TIME*/0, arg, wrap_task);
#endif                                                                          

	struct helper__kmpc_task_t *task_info = helper__kmpc_task_retrieve (wrap_task);
	void (*real_task)(int,void*) = task_info->real_task;

#if defined(DEBUG)
	fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL " %llu helper__kmpc_task_substitute: Found substitution for wrap_task=%p is real_task=%p\n ", THREAD_LEVEL_VAR, /*TIME*/0, wrap_task,
real_task);
#endif

	if (real_task != NULL)
	{
		Probe_OpenMP_TaskUF_Entry (real_task);
		Probe_OpenMP_TaskID(task_info->task_id, XTR_TASK_EXECUTION);
		Backend_Leave_Instrumentation ();
		real_task (arg, wrap_task); /* Original code execution */
		Backend_Enter_Instrumentation ();
		Extrae_OpenMP_Notify_NewExecutedTask();
		Probe_OpenMP_TaskUF_Exit ();
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "helper__kmpc_task_substitute: ERROR! Did not find task substitution for wrap_task=%p\n ", THREAD_LEVEL_VAR, wrap_task);
		exit (-1);
	}
	Backend_Leave_Instrumentation ();
}

/**
 * helper__kmpc_taskloop_substitute
 *
 * Callback function that the runtime invokes when a task from a taskloop is
 * executed. We retrieve the real task from the helper map indexed by helper_id.
 * The callback is trampolined through the call of taskloop_helper_fn_[0-1023],
 * intermediate functions that interpose as last parameter the helper_id, which
 * identifies the helper function that was invoked. We do this to be able to 
 * retrieve the real task pointer from different tasks from different taskloops
 * that may be executing simultaneously from different threads. 
 *
 * @param arg Argument passed by the runtime.
 * @param wrap_task Argument passed by the runtime. Corresponds to a kmp_task_t 
 * structure.
 * @param helper_id The helper identifier used to retrieve the corresponding
 * real task pointer.
 */
void helper__kmpc_taskloop_substitute (int arg, void *wrap_task, int helper_id)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)                                                              
	  fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "helper__kmpc_taskloop_substitute enter: args=(%d %p %d)\n ", THREAD_LEVEL_VAR, arg, wrap_task, helper_id);
#endif                                                                          

		void (*real_task)(int,void*) = (void(*)(int,void*)) hl__kmpc_taskloop->real_task_map_by_helper[helper_id];

#if defined(DEBUG)                                                              
	  fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "helper__kmpc_taskloop_substitute enter: Substitution for wrap_task=%p is real_task=%p (helper_id=%d)\n ", THREAD_LEVEL_VAR, wrap_task, real_task, helper_id);
#endif                                                                          

		if (real_task != NULL)
		{
			Probe_OpenMP_TaskUF_Entry (real_task);
			Backend_Leave_Instrumentation ();
			real_task (arg, wrap_task);
			Backend_Enter_Instrumentation ();
			Extrae_OpenMP_Notify_NewExecutedTask();
			Probe_OpenMP_TaskUF_Exit ();
		}
		else
		{
			fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "helper__kmpc_taskloop_substitute: ERROR! Did not find task substitution for wrap_task=%p (helper_id=%d)\n ", THREAD_LEVEL_VAR, wrap_task, helper_id);
			exit (-1);
		}
	Backend_Leave_Instrumentation ();
}


/******************************************************************************\
 *                                                                            *
 *                                WRAPPERS                                    *
 *                                                                            *
\******************************************************************************/

void
ompc_set_num_threads(int num_threads)
{
	Backend_Enter_Instrumentation();
#if defined(DEBUG)
	fprintf(stderr, PACKAGE_NAME": ompc_set_num_threads enter: @=%p num_threads=(%d)\n",
	    ompc_set_num_threads_real, num_threads);
#endif

	RECHECK_INIT(ompc_set_num_threads_real);

	if (TRACE(ompc_set_num_threads_real))
	{
		/*
		 * Change number of threads only if in a library not mixing runtimes.
		 */
		OMP_CLAUSE_NUM_THREADS_CHANGE(num_threads);

		Probe_OpenMP_SetNumThreads_Entry(num_threads);
		ompc_set_num_threads_real(num_threads);
		Probe_OpenMP_SetNumThreads_Exit();
	}
	else if (ompc_set_num_threads_real != NULL)
	{
		ompc_set_num_threads_real(num_threads);
	}
	else
	{
		fprintf(stderr, PACKAGE_NAME": ompc_set_num_threads: ERROR! This function is not hooked! Exiting!!\n");
		exit(-1);
	}
	
	#if defined(DEBUG)
	fprintf(stderr, PACKAGE_NAME": ompc_set_num_threads exit\n");
	#endif
	Backend_Leave_Instrumentation();
}

void __kmpc_barrier (void *loc, int global_tid)
{
	Backend_Enter_Instrumentation();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_barrier enter: @=%p args=(%p %d)\n ", THREAD_LEVEL_VAR, __kmpc_barrier_real, loc, global_tid);
#endif

	RECHECK_INIT(__kmpc_barrier_real);

	if (TRACE(__kmpc_barrier_real))
	{
		Probe_OpenMP_Barrier_Entry ();
		__kmpc_barrier_real (loc, global_tid);
		Probe_OpenMP_Barrier_Exit ();
	}
	else if (__kmpc_barrier_real != NULL)
	{
		__kmpc_barrier_real (loc, global_tid);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_barrier: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_barrier exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation();
}

void __kmpc_critical (void *loc, int global_tid, void *crit)
{
	Backend_Enter_Instrumentation();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_critical enter: @=%p args=(%p %d %p)\n ", THREAD_LEVEL_VAR, __kmpc_critical_real, loc, global_tid, crit);
#endif

	RECHECK_INIT(__kmpc_critical_real);

	if (TRACE(__kmpc_critical_real))
	{
		Probe_OpenMP_Named_Lock_Entry ();
		__kmpc_critical_real (loc, global_tid, crit);
		Probe_OpenMP_Named_Lock_Exit (crit);
	}
	else if (__kmpc_critical_real != NULL)
	{
		__kmpc_critical_real (loc, global_tid, crit);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_critical: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_critical exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation();
}

void __kmpc_critical_with_hint (void *loc, int global_tid, void *crit, uint32_t hint)
{
	Backend_Enter_Instrumentation();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_critical_with_hint enter: @=%p args=(%p %d %p %u)\n ", THREAD_LEVEL_VAR, __kmpc_critical_with_hint_real, loc, global_tid, crit, hint);
#endif

	RECHECK_INIT(__kmpc_critical_with_hint_real);

	if (TRACE(__kmpc_critical_with_hint_real))
	{
		Probe_OpenMP_Named_Lock_Entry ();
		__kmpc_critical_with_hint_real (loc, global_tid, crit, hint);
		Probe_OpenMP_Named_Lock_Exit (crit);
	}
	else if (__kmpc_critical_with_hint_real != NULL)
	{
		__kmpc_critical_with_hint_real (loc, global_tid, crit, hint);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_critical_with_hint: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_critical_with_hint exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation();
}

void __kmpc_end_critical (void *loc, int global_tid, void *crit)
{
	Backend_Enter_Instrumentation();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_end_critical enter: @=%p args=(%p %d %p)\n ", THREAD_LEVEL_VAR, __kmpc_end_critical_real, loc, global_tid, crit);
#endif

	RECHECK_INIT(__kmpc_end_critical_real);

	if (TRACE(__kmpc_end_critical_real))
	{
		Probe_OpenMP_Named_Unlock_Entry (crit);
		__kmpc_end_critical_real (loc, global_tid, crit);
		Probe_OpenMP_Named_Unlock_Exit ();
	}
	else if (__kmpc_end_critical_real != NULL)
	{
		__kmpc_end_critical_real (loc, global_tid, crit);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_end_critical: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_end_critical exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation();
}

void __kmpc_set_lock(void *loc, int gtid, void **user_lock)
{
	Backend_Enter_Instrumentation();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_set_lock enter: @=%p args=(%p %d %p)\n ", THREAD_LEVEL_VAR, __kmpc_set_lock_real, loc, gtid, user_lock);
#endif

	RECHECK_INIT(__kmpc_set_lock_real);

	if (TRACE(__kmpc_set_lock_real))
	{
		Probe_OpenMP_Named_Lock_Entry();
		__kmpc_set_lock_real(loc, gtid, user_lock);
		Probe_OpenMP_Named_Lock_Exit(user_lock);
	}
	else if (__kmpc_set_lock_real != NULL)
	{
		__kmpc_set_lock_real(loc, gtid, user_lock);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_set_lock: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_set_lock exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation();
}

void __kmpc_unset_lock(void *loc, int gtid, void **user_lock)
{
	Backend_Enter_Instrumentation();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_unset_lock enter: @=%p args=(%p %d %p)\n ", THREAD_LEVEL_VAR, __kmpc_unset_lock_real, loc, gtid, user_lock);
#endif

	RECHECK_INIT(__kmpc_unset_lock_real);

	if (TRACE(__kmpc_unset_lock_real))
	{
		Probe_OpenMP_Named_Unlock_Entry(user_lock);
		__kmpc_unset_lock_real(loc, gtid, user_lock);
		Probe_OpenMP_Named_Unlock_Exit();
	}
	else if (__kmpc_unset_lock_real != NULL)
	{
		__kmpc_unset_lock_real(loc, gtid, user_lock);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_unset_lock: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_unset_lock exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation();
}

void __kmpc_dispatch_init_4 (void *loc, int gtid, int schedule, int lb, int ub, int st, int chunk)
{
	Backend_Enter_Instrumentation();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_4 enter: @=%p args=(%p %d %d %d %d %d %d)\n ", THREAD_LEVEL_VAR, __kmpc_dispatch_init_4_real, loc, gtid, schedule, lb, ub, st, chunk);
#endif

	RECHECK_INIT(__kmpc_dispatch_init_4_real);

	if (TRACE(__kmpc_dispatch_init_4_real))
	{
		/* 
		 * Retrieve the outlined function.
		 * This is executed inside a parallel by multiple threads, so the current worker thread 
		 * retrieves this data from the parent thread who store it at the start of the parallel.
		 */
		void *par_uf = par_func;
#if defined(DEBUG)
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_4: par_uf=%p\n ", THREAD_LEVEL_VAR, par_uf);
#endif

		Probe_OpenMP_DO_Entry ();

		__kmpc_dispatch_init_4_real (loc, gtid, schedule, lb, ub, st, chunk);
  
		Probe_OpenMP_UF_Entry (par_uf); 
	}
	else if (__kmpc_dispatch_init_4_real != NULL)
	{
		__kmpc_dispatch_init_4_real (loc, gtid, schedule, lb, ub, st, chunk);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_4: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_4 exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation();
}

void __kmpc_dispatch_init_4u(void *loc, int gtid, int schedule, unsigned int lb, unsigned int ub, int st, int chunk)
{
	Backend_Enter_Instrumentation();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_4u enter: @=%p args=(%p %d %d %u %u %d %d)\n ", THREAD_LEVEL_VAR, __kmpc_dispatch_init_4u_real, loc, gtid, schedule, lb, ub, st, chunk);
#endif

	RECHECK_INIT(__kmpc_dispatch_init_4u_real);

	if (TRACE(__kmpc_dispatch_init_4u_real))
	{
		/*
		 * Retrieve the outlined function.
		 * This is executed inside a parallel by multiple threads, so the current worker thread
		 * retrieves this data from the parent thread who store it at the start of the parallel.
		 */
		void *par_uf = par_func;
#if defined(DEBUG)
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_4u: par_uf=%p\n ", THREAD_LEVEL_VAR, par_uf);
#endif

		Probe_OpenMP_DO_Entry ();

		__kmpc_dispatch_init_4u_real (loc, gtid, schedule, lb, ub, st, chunk);

		Probe_OpenMP_UF_Entry (par_uf);
	}
	else if (__kmpc_dispatch_init_4u_real != NULL)
	{
		__kmpc_dispatch_init_4u_real (loc, gtid, schedule, lb, ub, st, chunk);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_4u: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_4u exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation();
}

void __kmpc_dispatch_init_8 (void *loc, int gtid, int schedule, long long lb, long long ub, long long st, long long chunk)
{
	Backend_Enter_Instrumentation();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_8 enter: @=%p args=(%p %d %d %lld %lld %lld %lld)\n ", THREAD_LEVEL_VAR, __kmpc_dispatch_init_8_real, loc, gtid, schedule, lb, ub, st, chunk);
#endif

	RECHECK_INIT(__kmpc_dispatch_init_8_real);

	if (TRACE(__kmpc_dispatch_init_8_real))
	{
		/* 
		 * Retrieve the outlined function.
		 * This is executed inside a parallel by multiple threads, so the current worker thread 
		 * retrieves this data from the parent thread who store it at the start of the parallel.
		 */
		void *par_uf = par_func;
#if defined(DEBUG)
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_8: par_uf=%p\n ", THREAD_LEVEL_VAR, par_uf);
#endif

		Probe_OpenMP_DO_Entry ();

		__kmpc_dispatch_init_8_real (loc, gtid, schedule, lb, ub, st, chunk);

		Probe_OpenMP_UF_Entry (par_uf);
	}
	else if (__kmpc_dispatch_init_8_real != NULL)
	{
		__kmpc_dispatch_init_8_real (loc, gtid, schedule, lb, ub, st, chunk);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_8: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_8 exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation();
}

void __kmpc_dispatch_init_8u(void *loc, int gtid, int schedule, unsigned long long lb, unsigned long long ub, long long st, long long chunk)
{
	Backend_Enter_Instrumentation();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_8u enter: @=%p args=(%p %d %d %llu %llu %lld %lld)\n ", THREAD_LEVEL_VAR, __kmpc_dispatch_init_8u_real, loc, gtid, schedule, lb, ub, st, chunk);
#endif

	RECHECK_INIT(__kmpc_dispatch_init_8u_real);

	if (TRACE(__kmpc_dispatch_init_8u_real))
	{
		/*
		 * Retrieve the outlined function.
		 * This is executed inside a parallel by multiple threads, so the current worker thread
		 * retrieves this data from the parent thread who store it at the start of the parallel.
		 */
		void *par_uf = par_func;
#if defined(DEBUG)
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_8u: par_uf=%p\n ", THREAD_LEVEL_VAR, par_uf);
#endif

		Probe_OpenMP_DO_Entry ();

		__kmpc_dispatch_init_8u_real (loc, gtid, schedule, lb, ub, st, chunk);

		Probe_OpenMP_UF_Entry (par_uf);
	}
	else if (__kmpc_dispatch_init_8u_real != NULL)
	{
		__kmpc_dispatch_init_8u_real (loc, gtid, schedule, lb, ub, st, chunk);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_8u: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_init_8u exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation();
}

int __kmpc_dispatch_next_4 (void *loc, int gtid, int *p_last, int *p_lb, int *p_ub, int *p_st)
{
	Backend_Enter_Instrumentation ();
	int res;

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_next_4 enter: @=%p args=(%p %d %p %p %p %p)\n ", THREAD_LEVEL_VAR, __kmpc_dispatch_next_4_real, loc, gtid, p_last, p_lb, p_ub, p_st);
#endif

	RECHECK_INIT(__kmpc_dispatch_next_4_real);

	if (TRACE(__kmpc_dispatch_next_4_real))
	{
		Probe_OpenMP_Work_Entry();
		Backend_Leave_Instrumentation (); /* We're about to execute user code */
		res = __kmpc_dispatch_next_4_real (loc, gtid, p_last, p_lb, p_ub, p_st);
		Backend_Enter_Instrumentation (); /* We're about to execute OpenMP code */
		Probe_OpenMP_Work_Exit();

		if (res == 0) /* Alternative to call __kmpc_dispatch_fini_4 which seems not to be called ? */
		{
			Probe_OpenMP_UF_Exit ();
			Probe_OpenMP_DO_Exit ();
		}
	}
	else if (__kmpc_dispatch_next_4_real != NULL)
	{
		res = __kmpc_dispatch_next_4_real (loc, gtid, p_last, p_lb, p_ub, p_st);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_next_4: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_next_4 exit: res=%d\n ", THREAD_LEVEL_VAR, res);
#endif
	Backend_Leave_Instrumentation ();
	return res;
}

int __kmpc_dispatch_next_4u(void *loc, int gtid, int *p_last, unsigned int *p_lb, unsigned int *p_ub, int *p_st)
{
	Backend_Enter_Instrumentation ();
	int res;

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_next_4u enter: @=%p args=(%p %d %p %p %p %p)\n ", THREAD_LEVEL_VAR, __kmpc_dispatch_next_4u_real, loc, gtid, p_last, p_lb, p_ub, p_st);
#endif

	RECHECK_INIT(__kmpc_dispatch_next_4u_real);

	if (TRACE(__kmpc_dispatch_next_4u_real))
	{
		Probe_OpenMP_Work_Entry();
		Backend_Leave_Instrumentation (); /* We're about to execute user code */
		res = __kmpc_dispatch_next_4u_real (loc, gtid, p_last, p_lb, p_ub, p_st);
		Backend_Enter_Instrumentation (); /* We're about to execute OpenMP code */
		Probe_OpenMP_Work_Exit();

		if (res == 0) /* Alternative to call __kmpc_dispatch_fini_4 which seems not to be called ? */
		{
			Probe_OpenMP_UF_Exit ();
			Probe_OpenMP_DO_Exit ();
		}
	}
	else if (__kmpc_dispatch_next_4u_real != NULL)
	{
		res = __kmpc_dispatch_next_4u_real (loc, gtid, p_last, p_lb, p_ub, p_st);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_next_4u: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_next_4u exit: res=%d\n ", THREAD_LEVEL_VAR, res);
#endif
	Backend_Leave_Instrumentation (); /* We're about to execute user code */
	return res;
}

int __kmpc_dispatch_next_8 (void *loc, int gtid, int *p_last, long long *p_lb, long long *p_ub, long long *p_st)
{
	Backend_Enter_Instrumentation ();
	int res;

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_next_8 enter: @=%p args=(%p %d %p %p %p %p)\n ", THREAD_LEVEL_VAR, __kmpc_dispatch_next_8_real, loc, gtid, p_last, p_lb, p_ub, p_st);
#endif
	
	RECHECK_INIT(__kmpc_dispatch_next_8_real);

	if (TRACE(__kmpc_dispatch_next_8_real))
	{
		Probe_OpenMP_Work_Entry();
		res = __kmpc_dispatch_next_8_real (loc, gtid, p_last, p_lb, p_ub, p_st);
		Probe_OpenMP_Work_Exit();

		if (res == 0) /* Alternative to call __kmpc_dispatch_fini_8 which seems not to be called ? */
		{
			Probe_OpenMP_UF_Exit ();
			Probe_OpenMP_DO_Exit ();
		}
	}
	else if (__kmpc_dispatch_next_8_real != NULL)
	{
		res = __kmpc_dispatch_next_8_real (loc, gtid, p_last, p_lb, p_ub, p_st);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_next_8: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_next_8 exit: res=%d\n ", THREAD_LEVEL_VAR, res);
#endif

	Backend_Leave_Instrumentation ();
	return res;
}

int __kmpc_dispatch_next_8u(void *loc, int gtid, int *p_last, unsigned long long *p_lb, unsigned long long *p_ub, long long *p_st)
{
	Backend_Enter_Instrumentation ();
	int res;

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_next_8u enter: @=%p args=(%p %d %p %p %p %p)\n ", THREAD_LEVEL_VAR, __kmpc_dispatch_next_8u_real, loc, gtid, p_last, p_lb, p_ub, p_st);
#endif

	RECHECK_INIT(__kmpc_dispatch_next_8u_real);

	if (TRACE(__kmpc_dispatch_next_8u_real))
	{
		Probe_OpenMP_Work_Entry();
		res = __kmpc_dispatch_next_8u_real (loc, gtid, p_last, p_lb, p_ub, p_st);
		Probe_OpenMP_Work_Exit();

		if (res == 0) /* Alternative to call __kmpc_dispatch_fini_8 which seems not to be called ? */
		{
			Probe_OpenMP_UF_Exit ();
			Probe_OpenMP_DO_Exit ();
		}
	}
	else if (__kmpc_dispatch_next_8u_real != NULL)
	{
		res = __kmpc_dispatch_next_8u_real (loc, gtid, p_last, p_lb, p_ub, p_st);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_next_8u: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_next_8u exit: res=%d\n ", THREAD_LEVEL_VAR, res);
#endif
	Backend_Leave_Instrumentation ();
	return res;
}

void __kmpc_dispatch_fini_4 (void *loc, int gtid)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_fini_4 enter: @=%p args=(%p %d)\n ", THREAD_LEVEL_VAR, __kmpc_dispatch_fini_4_real, loc, gtid);
#endif

	RECHECK_INIT(__kmpc_dispatch_fini_4_real);

	if (TRACE(__kmpc_dispatch_fini_4_real))
	{
		Probe_OpenMP_DO_Exit ();
		__kmpc_dispatch_fini_4_real (loc, gtid);
		Probe_OpenMP_UF_Exit ();
	}
	else if (__kmpc_dispatch_fini_4_real != NULL)
	{
		__kmpc_dispatch_fini_4_real (loc, gtid);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_fini_4: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_fini_4 exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

void __kmpc_dispatch_fini_4u(void *loc, int gtid)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_fini_4u enter: @=%p args=(%p %d)\n ", THREAD_LEVEL_VAR, __kmpc_dispatch_fini_4u_real, loc, gtid);
#endif

	RECHECK_INIT(__kmpc_dispatch_fini_4u_real);

	if (TRACE(__kmpc_dispatch_fini_4u_real))
	{
		Probe_OpenMP_DO_Exit ();
		__kmpc_dispatch_fini_4u_real (loc, gtid);
		Probe_OpenMP_UF_Exit ();
	}
	else if (__kmpc_dispatch_fini_4u_real != NULL)
	{
		__kmpc_dispatch_fini_4u_real (loc, gtid);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_fini_4u: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_fini_4u exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

void __kmpc_dispatch_fini_8 (void *loc, int gtid)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_fini_8 enter: @=%p args=(%p %d)\n ", THREAD_LEVEL_VAR, __kmpc_dispatch_fini_8_real, loc, gtid);
#endif

	RECHECK_INIT(__kmpc_dispatch_fini_8_real);

	if (TRACE(__kmpc_dispatch_fini_8_real))
	{
		Probe_OpenMP_DO_Exit ();
		__kmpc_dispatch_fini_8_real (loc, gtid);
		Probe_OpenMP_UF_Exit ();
	}
	else if (__kmpc_dispatch_fini_8_real != NULL)
	{
		__kmpc_dispatch_fini_8_real (loc, gtid);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_fini_8: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_fini_8 exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

void __kmpc_dispatch_fini_8u(void *loc, int gtid)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_fini_8u enter: @=%p args=(%p %d)\n ", THREAD_LEVEL_VAR, __kmpc_dispatch_fini_8u_real, loc, gtid);
#endif

	RECHECK_INIT(__kmpc_dispatch_fini_8u_real);

	if (TRACE(__kmpc_dispatch_fini_8u_real))
	{
		Probe_OpenMP_DO_Exit ();
		__kmpc_dispatch_fini_8u_real (loc, gtid);
		Probe_OpenMP_UF_Exit ();
	}
	else if (__kmpc_dispatch_fini_8u_real != NULL)
	{
		__kmpc_dispatch_fini_8u_real (loc, gtid);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_fini_8u: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_dispatch_fini_8u exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

void __kmpc_fork_call (void *loc, int argc, void *microtask, ...)
{
	Backend_Enter_Instrumentation();
	void  *args[INTEL_OMP_FUNC_ENTRIES];
	char   kmpc_parallel_wrap_name[1024];
	char   kmpc_parallel_sched_name[1024];
	void (*kmpc_parallel_sched_ptr)(void*,int,void*,void*,void **) = NULL;
	void  *wrap_ptr = NULL;
	void  *task_ptr = microtask;
	va_list ap;
	int     i = 0;

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_fork_call enter: @=%p args=(%p %d %p)\n ", THREAD_LEVEL_VAR, __kmpc_fork_call_real, loc, argc, microtask);
#endif

	RECHECK_INIT(__kmpc_fork_call_real);

	if (__kmpc_fork_call_real == NULL)
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_fork_call: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
 		exit (-1);
	}

	/* Grab parameters */
	xmemset(args, 0, sizeof(args));

	va_start (ap, microtask);
	for (i=0; i<argc; i++)
	{
		args[i] = va_arg(ap, void *);
	}
	va_end (ap);

	/* 
	 * Store the outlined function.
	 * This corresponds to the start of a parallel region, 
	 * which is executed by the master thread only.
	 */
	par_func = task_ptr;

	/* Retrieve handler to the scheduling routine that will call __kmpc_fork_call_real with the correct number of arguments */
	snprintf(kmpc_parallel_sched_name, sizeof(kmpc_parallel_sched_name), "__kmpc_parallel_sched_%d_args", argc);
	kmpc_parallel_sched_ptr = (void(*)(void*,int,void*,void*,void **)) dlsym(RTLD_DEFAULT, kmpc_parallel_sched_name);
	if (kmpc_parallel_sched_ptr == NULL)
	{
		fprintf (stderr, PACKAGE_NAME": Error! Can't retrieve handler to stub '%s' (%d arguments)! Quitting!\n"
		                 PACKAGE_NAME":        Recompile Extrae to support this number of arguments!\n"
				 PACKAGE_NAME":        Use src/tracer/wrappers/OMP/genstubs-kmpc-11.sh to do so.\n"
				 PACKAGE_NAME":        Check Extrae FAQ Webpage (https://tools.bsc.es/sites/default/files/documentation/html/extrae/FAQ.html#execution-faq) for more details",
		                 kmpc_parallel_sched_name, argc);
		exit (-1);                                                                  

	}

	if (EXTRAE_ON())
	{
		Probe_OpenMP_ParRegion_Entry ();

		snprintf(kmpc_parallel_wrap_name, sizeof(kmpc_parallel_wrap_name), "__kmpc_parallel_wrap_%d_args", argc);
		wrap_ptr = dlsym(RTLD_DEFAULT, kmpc_parallel_wrap_name);
		if (wrap_ptr == NULL)
		{
			fprintf (stderr, PACKAGE_NAME": Error! Can't retrieve handler to stub '%s' (%d arguments)! Quitting!\n"
		 	                 PACKAGE_NAME":        Recompile Extrae to support this number of arguments!\n"
					 PACKAGE_NAME":        Use src/tracer/wrappers/OMP/genstubs-kmpc-11.sh to do so.\n"
					 PACKAGE_NAME":        Check Extrae FAQ Webpage (https://tools.bsc.es/sites/default/files/documentation/html/extrae/FAQ.html#execution-faq) for more details",
			                 kmpc_parallel_wrap_name, argc);
			exit (-1);                                                                  
		}
	}                      

	Backend_Leave_Instrumentation ();

	/* Call the scheduling routine. 
	 * If wrap_ptr is not NULL, it will interpose a call to wrap_ptr with an extra
	 * parameter with the real task_ptr, in order to instrument when the task
	 * starts executing.
	 */
	kmpc_parallel_sched_ptr(loc, argc, task_ptr, wrap_ptr, args); 
	Backend_Enter_Instrumentation ();

	if (EXTRAE_ON())
	{
		Probe_OpenMP_ParRegion_Exit ();	
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_fork_call exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

/**
 * __kmpc_fork_call_dyninst
 *
 *   dlsym() does not seem to work under Dyninst and we can't replace this
 *   function by itself (opposite to MPI, OpenMP does not have something like
 *   PMPI). Thus, we need to pass the address of the original __kmpc_fork_call
 *   (through _extrae_intel_kmpc_init_dyninst) and let the new 
 *   __kmpc_fork_call_dyninst do the work by finally calling to the pointer to
 *   __kmpc_fork_call passed.
 */
void __kmpc_fork_call_dyninst (void *loc, int argc, void *microtask, ...)
{
	void   *args[INTEL_OMP_FUNC_ENTRIES];
	char    kmpc_parallel_wrap_name[1024];
	char    kmpc_parallel_sched_name[1024];
	void  (*kmpc_parallel_sched_ptr)(void*,int,void*,void*,void **) = NULL;
	void   *wrap_ptr = NULL;
	void   *task_ptr = microtask;
	va_list ap;
	int     i = 0;

	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_fork_call_dyninst enter: @=%p __kmpc_fork_call_real=%p args=(%p %d %p)\n ", THREAD_LEVEL_VAR, __kmpc_fork_call_dyninst, __kmpc_fork_call_real, loc, argc, microtask);
#endif

	RECHECK_INIT(__kmpc_fork_call_real);

	if (__kmpc_fork_call_real == NULL)
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_fork_call_dyninst: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

	/* Grab parameters */
	xmemset(args, 0, sizeof(args));

	va_start (ap, microtask);
  for (i=0; i<argc; i++)
	{
		args[i] = va_arg(ap, void *);
	}
	va_end (ap);

	/* Retrieve handler to the scheduling routine that will call __kmpc_fork_call_real with the correct number of arguments */

	snprintf(kmpc_parallel_sched_name, sizeof(kmpc_parallel_sched_name), "__kmpc_parallel_sched_%d_args", argc);
  kmpc_parallel_sched_ptr = (void(*)(void*,int,void*,void*,void **)) dlsym(RTLD_DEFAULT, kmpc_parallel_sched_name);
	if (kmpc_parallel_sched_ptr == NULL)                                          
	{
    fprintf (stderr, PACKAGE_NAME": Error! Can't retrieve handler to stub '%s' (%d arguments)! Quitting!\n"
		                 PACKAGE_NAME":        Recompile Extrae to support this number of arguments!\n"
				 PACKAGE_NAME":        Use src/tracer/wrappers/OMP/genstubs-kmpc-11.sh to do so.\n"
				 PACKAGE_NAME":        Check Extrae FAQ Webpage (https://tools.bsc.es/sites/default/files/documentation/html/extrae/FAQ.html#execution-faq) for more details",
		                 kmpc_parallel_sched_name, argc);
		exit (-1);                                                                  
	}

	if (EXTRAE_ON())
	{
		Probe_OpenMP_ParRegion_Entry ();
		Extrae_OpenMP_EmitTaskStatistics();

		snprintf(kmpc_parallel_wrap_name, sizeof(kmpc_parallel_wrap_name), "__kmpc_parallel_wrap_%d_args", argc);
		wrap_ptr = dlsym(RTLD_DEFAULT, kmpc_parallel_wrap_name);
		if (wrap_ptr == NULL)
		{
			fprintf (stderr, PACKAGE_NAME": Error! Can't retrieve handler to stub '%s' (%d arguments)! Quitting!\n"
		 	                 PACKAGE_NAME":        Recompile Extrae to support this number of arguments!\n"
					 PACKAGE_NAME":        Use src/tracer/wrappers/OMP/genstubs-kmpc-11.sh to do so.\n"
					 PACKAGE_NAME":        Check Extrae FAQ Webpage (https://tools.bsc.es/sites/default/files/documentation/html/extrae/FAQ.html#execution-faq) for more details",
			                 kmpc_parallel_wrap_name, argc);
			exit (-1);                                                                  
		}
	}

	Backend_Leave_Instrumentation ();

	/* Call the scheduling routine. 
	 * If wrap_ptr is not NULL, it will interpose a call to wrap_ptr with an extra
	 * parameter with the real task_ptr, in order to instrument when the task
	 * starts executing.
	 */
	kmpc_parallel_sched_ptr(loc, argc, task_ptr, wrap_ptr, args); 

	Backend_Enter_Instrumentation ();

	if (EXTRAE_ON())
	{
		Probe_OpenMP_ParRegion_Exit ();	
		Extrae_OpenMP_EmitTaskStatistics();
	}
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_fork_call_dyninst exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

int __kmpc_single (void *loc, int global_tid)
{
	Backend_Enter_Instrumentation ();
	int res = 0;

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_single enter: @=%p args=(%p %d)\n ", THREAD_LEVEL_VAR, __kmpc_single_real, loc, global_tid);
#endif

	RECHECK_INIT(__kmpc_single_real);

	if (TRACE(__kmpc_single_real))
	{
		Probe_OpenMP_Single_Entry ();

		res = __kmpc_single_real (loc, global_tid);

		if (res) /* If the thread entered in the single region, track it */
		{
			/* 
			 * Retrieve the outlined function.
			 * This is executed inside a parallel by multiple threads, so the current worker thread 
			 * retrieves this data from the parent thread who store it at the start of the parallel.
			 */
		  	void *par_uf = par_func;
#if defined(DEBUG)
			fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_single: par_uf=%p\n ", THREAD_LEVEL_VAR, par_uf);
#endif

			Probe_OpenMP_UF_Entry (par_uf); 
		}
		else
		{
			Probe_OpenMP_Single_Exit ();
		}
	}
	else if (__kmpc_single_real != NULL)
	{
		res = __kmpc_single_real (loc, global_tid);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_single: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_single exit: res=%d\n ", THREAD_LEVEL_VAR, res);
#endif
	Backend_Leave_Instrumentation ();
	return res;
}

void __kmpc_end_single (void *loc, int global_tid)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_end_single enter: @=%p args=(%p %d)\n ", THREAD_LEVEL_VAR, __kmpc_end_single_real, loc, global_tid);
#endif

	RECHECK_INIT(__kmpc_end_single_real);

	if (TRACE(__kmpc_end_single_real))
	{
		/* This is only executed by the thread that entered the single region */
		Probe_OpenMP_UF_Exit ();
		__kmpc_end_single_real (loc, global_tid);
		Probe_OpenMP_Single_Exit ();
	}
	else if (__kmpc_end_single_real != NULL)
	{
		__kmpc_end_single_real (loc, global_tid);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_end_single: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_end_single exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

void * __kmpc_omp_task_alloc (void *loc, int gtid, int flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, void *task_entry)
{
	void *res = NULL;

	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_task_alloc enter: @=%p args=(%p %d %d %d %d %p)\n ", THREAD_LEVEL_VAR, __kmpc_omp_task_alloc_real, loc, gtid, flags, (int)sizeof_kmp_task_t, (int)sizeof_shareds, task_entry);
#endif

	RECHECK_INIT(__kmpc_omp_task_alloc_real);

	if (TRACE(__kmpc_omp_task_alloc_real))
	{
		Probe_OpenMP_Task_Entry (task_entry);
		Extrae_OpenMP_Notify_NewInstantiatedTask();
		/* 
		 * We change the task to execute to be the callback helper__kmpc_task_substitute.
		 * The pointer to this new task (wrap_task) is associated to the real task 
		 * with helper__kmpc_task_register. The callback function receives the 
		 * wrap_task pointer by parameter, which will be used to retrieve the
		 * pointer to the real task (see helper__kmpc_task_substitute).
		 */
		res = __kmpc_omp_task_alloc_real (loc, gtid, flags, sizeof_kmp_task_t, sizeof_shareds, helper__kmpc_task_substitute);
		helper__kmpc_task_register (res, task_entry);
		Probe_OpenMP_Task_Exit ();
	}
	else if (__kmpc_omp_task_alloc_real != NULL)
	{
		res = __kmpc_omp_task_alloc_real (loc, gtid, flags, sizeof_kmp_task_t, sizeof_shareds, task_entry);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_task_alloc: ERROR! This function is not hooked. Exiting!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}
	
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_task_alloc exit: res=%p\n ", THREAD_LEVEL_VAR, res);
#endif

	Backend_Leave_Instrumentation ();
	return res;
}

void __kmpc_omp_task_begin_if0 (void *loc, int gtid, void *task)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_task_begin_if0 enter: @=%p args=(%p %d %p)\n ", THREAD_LEVEL_VAR, __kmpc_omp_task_begin_if0_real, loc, gtid, task);
#endif

	RECHECK_INIT(__kmpc_omp_task_begin_if0_real);

	struct helper__kmpc_task_t *task_info = helper__kmpc_task_retrieve (task);
        void (*__kmpc_task_substituted_func)(int,void*) = task_info->real_task;

	if (TRACE(__kmpc_task_substituted_func))
	{
		if (__kmpc_omp_task_begin_if0_real != NULL)
		{
			Probe_OpenMP_TaskUF_Entry (__kmpc_task_substituted_func);
			Extrae_OpenMP_Notify_NewInstantiatedTask();
			__kmpc_omp_task_begin_if0_real (loc, gtid, task);
		}
		else
		{
			fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_task_begin_if0: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
			exit (-1);
		}
	}
	else if (__kmpc_task_substituted_func != NULL)
	{
		 __kmpc_omp_task_begin_if0_real (loc, gtid, task);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_task_begin_if0: Did not find task substitution for task=%p\n ", THREAD_LEVEL_VAR, task);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_task_begin_if0 exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

void __kmpc_omp_task_complete_if0 (void *loc, int gtid, void *task)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_task_complete_if0 enter: @=%p args=(%p %d %p)\n ", THREAD_LEVEL_VAR, __kmpc_omp_task_complete_if0_real, loc, gtid, task);
#endif

	RECHECK_INIT(__kmpc_omp_task_complete_if0_real);

	if (TRACE(__kmpc_omp_task_complete_if0_real))
	{
		__kmpc_omp_task_complete_if0_real (loc, gtid, task);
		Extrae_OpenMP_Notify_NewExecutedTask();
		Probe_OpenMP_TaskUF_Exit ();
	}
	else if (__kmpc_omp_task_complete_if0_real != NULL)
	{
		__kmpc_omp_task_complete_if0_real (loc, gtid, task);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_task_complete_if0: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_task_complete_if0 exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

int __kmpc_omp_taskwait (void *loc, int gtid)
{
	Backend_Enter_Instrumentation ();
	int res;

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_taskwait enter: @=%p args=(%p %d)\n ", THREAD_LEVEL_VAR, __kmpc_omp_taskwait_real, loc, gtid);
#endif

	RECHECK_INIT(__kmpc_omp_taskwait_real);

	if (TRACE(__kmpc_omp_taskwait_real))
	{
		Probe_OpenMP_Taskwait_Entry();
		Extrae_OpenMP_EmitTaskStatistics();
		Backend_Leave_Instrumentation ();
		res = __kmpc_omp_taskwait_real (loc, gtid);
		Backend_Enter_Instrumentation ();
		Probe_OpenMP_Taskwait_Exit();
		Extrae_OpenMP_EmitTaskStatistics();
	}
	else if (__kmpc_omp_taskwait_real != NULL)
	{
		res = __kmpc_omp_taskwait_real (loc, gtid);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_taskwait: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_omp_taskwait exit: res=%d\n ", THREAD_LEVEL_VAR, res);
#endif

	Backend_Leave_Instrumentation ();
	return res;
}

/**
 * __kmpc_taskloop
 *
 * When the user application has a taskloop, before the runtime executes this
 * instrumented function, it executes __kmpc_omp_task_alloc, which is also 
 * instrumented. This means that we have captured the allocation of the task,
 * and modified it to invoke the callback helper__kmpc_task_substitute, and
 * added a new entry on the list of (wrapped,real) tasks. The parameter "task"
 * of this function corresponds to the wrapped task, whose routine_entry_ptr
 * field (it's a kmp_task_t struct) points to our callback.
 *
 * However, the runtime internally makes copies of this task, so we can no
 * longer use its pointer to retrieve the real task from our list. To solve
 * this:
 * 1) We retrieve the real task pointer before calling the runtime.
 * 2) We modify the field routine_entry_ptr from the "kmp_task_t task"
 * parameter, directly offsetting the structure, because we have looked in the 
 * libomp how it is implemented. Instead, we assign a pointer to one helper 
 * function, each time one different, so that the tasks executed from different
 * taskloops rely on a separate helper. 
 * 3) We assign helpers incrementally, and the ID of the helper is used to save
 * a map of "helper id" -> "real task pointer".
 * 4) When the runtime executes the task, the helper pointed by
 * routine_entry_ptr will be invoked. These helpers are static routines
 * generated by us, so that each routine passes one last parameter with its own 
 * ID (see intel-kmpc-11-taskloop-helpers.c), and we use the ID to retrieve 
 * the real task pointer.
 *
 * NOTE: Currently, we have support for 1024 helpers, then they start being 
 * reused. So as long as there's no more than 1024 different taskloops going 
 * on simultaneously, this structure should hold.
 */
void __kmpc_taskloop(void *loc, int gtid, void *task, int if_val, void *lb, void *ub, long st, int nogroup, int sched, long grainsize, void *task_dup)
{ 
  int helper_id = 0;	
	void *real_task = NULL;

	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_taskloop enter: @=%p args=(%p %d %p %d %p %p %ld %d %d %ld %p)\n ", THREAD_LEVEL_VAR, __kmpc_taskloop_real, loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize, task_dup);
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_taskloop: instrumentation is %s\n", THREAD_LEVEL_VAR, (getTrace_OMPTaskloop() ? "enabled" : "disabled"));
#endif

	RECHECK_INIT(__kmpc_taskloop_real);

	/* Retrieve the real task pointer from the list that is maintained in the
	 * instrumented function __kmpc_omp_task_alloc */

	struct helper__kmpc_task_t *task_info = helper__kmpc_task_retrieve (task);
        real_task = task_info->real_task;

	if (TRACE(__kmpc_taskloop_real) && (getTrace_OMPTaskloop()))
	{
		Probe_OpenMP_TaskLoop_Entry ();

		/* Assign a new helper for this taskloop */
		pthread_mutex_lock(&hl__kmpc_taskloop_mtx);
		helper_id = hl__kmpc_taskloop->next_id;
	  hl__kmpc_taskloop->next_id = (hl__kmpc_taskloop->next_id + 1) % MAX_TASKLOOP_HELPERS;
		pthread_mutex_unlock(&hl__kmpc_taskloop_mtx);

		/* Modify the routine_entry_ptr field from the "kmp_task_t task", to point
		 * to the corresponding helper function */
		void **routine_entry_ptr = task + sizeof(void *);
		*routine_entry_ptr = get_taskloop_helper_fn_ptr(helper_id);

		/* Save a map of helper_id => real_task */
		hl__kmpc_taskloop->real_task_map_by_helper[helper_id] = real_task;

		/* Call the runtime */
		__kmpc_taskloop_real(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize, task_dup);
		Probe_OpenMP_TaskLoop_Exit ();
	}
	else if (__kmpc_taskloop_real != NULL)
	{
		__kmpc_taskloop_real(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize, task_dup);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_taskloop: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_taskloop exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

void __kmpc_taskloop_5(void *loc, int gtid, void *task, int if_val, void *lb, void *ub, long st, int nogroup, int sched, long grainsize, int modifier, void *task_dup)
{
	int helper_id = 0;
	void *real_task = NULL;

	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_taskloop_5 enter: @=%p args=(%p %d %p %d %p %p %ld %d %d %ld %d %p)\n ", THREAD_LEVEL_VAR, __kmpc_taskloop_5_real, loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize, modifier, task_dup);
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_taskloop: instrumentation is %s\n", THREAD_LEVEL_VAR, (getTrace_OMPTaskloop() ? "enabled" : "disabled"));
#endif

	RECHECK_INIT(__kmpc_taskloop_5_real);

	/* Retrieve the real task pointer from the list that is maintained in the
	 * instrumented function __kmpc_omp_task_alloc */
	real_task = helper__kmpc_task_retrieve(task);

	if (TRACE(__kmpc_taskloop_5_real) && (getTrace_OMPTaskloop()))
	{
		Probe_OpenMP_TaskLoop_Entry ();

		/* Assign a new helper for this taskloop */
		pthread_mutex_lock(&hl__kmpc_taskloop_mtx);
		helper_id = hl__kmpc_taskloop->next_id;
	  hl__kmpc_taskloop->next_id = (hl__kmpc_taskloop->next_id + 1) % MAX_TASKLOOP_HELPERS;
		pthread_mutex_unlock(&hl__kmpc_taskloop_mtx);

		/* Modify the routine_entry_ptr field from the "kmp_task_t task", to point
		 * to the corresponding helper function */
		void **routine_entry_ptr = task + sizeof(void *);
		*routine_entry_ptr = get_taskloop_helper_fn_ptr(helper_id);

		/* Save a map of helper_id => real_task */
		hl__kmpc_taskloop->real_task_map_by_helper[helper_id] = real_task;

		/* Call the runtime */
		__kmpc_taskloop_5_real(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize, modifier, task_dup);
		Probe_OpenMP_TaskLoop_Exit ();
	}
	else if (__kmpc_taskloop_5_real != NULL)
	{
		__kmpc_taskloop_5_real(loc, gtid, task, if_val, lb, ub, st, nogroup, sched, grainsize, modifier, task_dup);
	}
	else
	{
		fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_taskloop_5: ERROR! This function is not hooked! Exiting!!\n ", THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "__kmpc_taskloop_5 exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

void
__kmpc_taskgroup(void *loc, int global_tid)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL
	    "__kmpc_taskgroup enter: @=%p args=(%p %d)\n ",
	    THREAD_LEVEL_VAR, __kmpc_taskgroup_real, loc, global_tid);
#endif

	RECHECK_INIT(__kmpc_taskgroup_real);

	if (TRACE(__kmpc_taskgroup_real))
	{
		Probe_OpenMP_Taskgroup_start_Entry();
		Extrae_OpenMP_EmitTaskStatistics();
		__kmpc_taskgroup_real(loc, global_tid);
		Probe_OpenMP_Taskgroup_start_Exit();
	}
	else if (__kmpc_taskgroup_real != NULL)
	{
		__kmpc_taskgroup_real(loc, global_tid);
	}
	else
	{
		fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL
		    "__kmpc_taskgroup: ERROR! This function is not hooked! Exiting!!\n ",
		    THREAD_LEVEL_VAR);
		exit(-1);
	}

#if defined(DEBUG)
	fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL
	    "__kmpc_taskgroup exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

void
__kmpc_end_taskgroup(void *loc, int global_tid)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL
	    "__kmpc_end_taskgroup enter: @=%p args=(%p %d)\n ",
	    THREAD_LEVEL_VAR, __kmpc_end_taskgroup_real, loc, global_tid);
#endif

	RECHECK_INIT(__kmpc_end_taskgroup_real);

	if (TRACE(__kmpc_end_taskgroup_real))
	{
		Probe_OpenMP_Taskgroup_end_Entry();
		__kmpc_end_taskgroup_real (loc, global_tid);
		Probe_OpenMP_Taskgroup_end_Exit();
		Extrae_OpenMP_EmitTaskStatistics();
	}
	else if (__kmpc_end_taskgroup_real != NULL)
	{
		__kmpc_end_taskgroup_real(loc, global_tid);
	}
	else
	{
		fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL
		    "__kmpc_end_taskgroup: ERROR! This function is not hooked! Exiting!!\n ",
		    THREAD_LEVEL_VAR);
		exit (-1);
	}

#if defined(DEBUG)
	fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL
	    "__kmpc_taskgroup exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation ();
}

void
__kmpc_push_num_threads(void *loc, int global_tid, int num_threads)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
	fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL
	    "__kmpc_push_num_threads enter: @=%p args=(%p %d %d)\n ",
	    THREAD_LEVEL_VAR, __kmpc_push_num_threads_real, loc, global_tid, num_threads);
#endif

	RECHECK_INIT(__kmpc_push_num_threads_real);

	if (TRACE(__kmpc_push_num_threads_real))
	{
		/*
		 * Change number of threads only if in a library not mixing runtimes.
		 */
		OMP_CLAUSE_NUM_THREADS_CHANGE(num_threads);

		Probe_OpenMP_SetNumThreads_Entry(num_threads);
		__kmpc_push_num_threads_real(loc, global_tid, num_threads);
		Probe_OpenMP_SetNumThreads_Exit();
		Backend_Leave_Instrumentation();
	}
	else if (__kmpc_push_num_threads_real != NULL)
	{
		__kmpc_push_num_threads(loc, global_tid, num_threads);
	}
	else
	{
		fprintf(stderr, PACKAGE_NAME
		    ": __kmpc_push_num_threads: ERROR! This function is not hooked! Exiting!!\n");
		exit(-1);
	}

#if defined(DEBUG)
	fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL
	    "__kmpc_push_num_threads exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation();
}

void
__kmpc_omp_taskyield(void *loc, uint32_t global_tid, int end_part)
{
	Backend_Enter_Instrumentation ();
#if defined(DEBUG)
        fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL
            "__kmpc_omp_taskyield enter: @=%p args=(%p %u %d)\n ",
            THREAD_LEVEL_VAR, __kmpc_omp_taskyield_real, loc, global_tid, end_part);
#endif

        RECHECK_INIT(__kmpc_omp_taskyield_real);

        if (TRACE(__kmpc_omp_taskyield_real))
        {
		Probe_OpenMP_Taskyield_Entry();
		__kmpc_omp_taskyield_real(loc, global_tid, end_part);
		Probe_OpenMP_Taskyield_Exit();
        }
        else if (__kmpc_omp_taskyield_real != NULL)
        {
                __kmpc_omp_taskyield_real(loc, global_tid, end_part);
        }
        else
        {
                fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL
                    "__kmpc_taskyield: ERROR! This function is not hooked! Exiting!!\n ",
                    THREAD_LEVEL_VAR);
                exit(-1);
        }

#if defined(DEBUG)
        fprintf(stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL
            "__kmpc_taskyield exit\n ", THREAD_LEVEL_VAR);
#endif
	Backend_Leave_Instrumentation();
}


/******************************************************************************\
 *                                                                            *
 *                             INITIALIZATIONS                                *
 *                                                                            *
\******************************************************************************/

/**
 * intel_kmpc_get_hook_points
 *
 * Find the real implementation of the functions. We use dlsym to find the next 
 * definition of the different symbols of the OpenMP runtime (i.e. skip our     
 * wrapper, find the real one). 
 *
 * @param rank The current process ID (not used).
 *
 * @return 1 if any hook was found; 0 otherwise.
 */
static int intel_kmpc_get_hook_points (int rank)
{
	int count = 0;

	UNREFERENCED_PARAMETER(rank);

	/* Obtain @ for ompc_set_num_threads */
	ompc_set_num_threads_real =
		(void(*)(int)) dlsym (RTLD_NEXT, "ompc_set_num_threads");
	INC_IF_NOT_NULL(ompc_set_num_threads_real, count);

	/* Obtain @ for __kmpc_barrier */
	__kmpc_barrier_real =
		(void(*)(void*,int))
		dlsym (RTLD_NEXT, "__kmpc_barrier");
	INC_IF_NOT_NULL(__kmpc_barrier_real,count);

	/* Obtain @ for __kmpc_critical */
	__kmpc_critical_real =
		(void(*)(void*,int,void*))
		dlsym (RTLD_NEXT, "__kmpc_critical");
	INC_IF_NOT_NULL(__kmpc_critical_real,count);

	/* Obtain @ for __kmpc_critical_with_hint */
	__kmpc_critical_with_hint_real =
		(void(*)(void*,int,void*,uint32_t))
		dlsym (RTLD_NEXT, "__kmpc_critical_with_hint");
	INC_IF_NOT_NULL(__kmpc_critical_with_hint_real,count);

	/* Obtain @ for __kmpc_end_critical */
	__kmpc_end_critical_real =
		(void(*)(void*,int,void*))
		dlsym (RTLD_NEXT, "__kmpc_end_critical");
	INC_IF_NOT_NULL(__kmpc_end_critical_real,count);

	/* Obtain @ for __kmpc_set_lock */
	__kmpc_set_lock_real =
	    (void(*)(void*, int, void **))
	    dlsym(RTLD_NEXT, "__kmpc_set_lock");
	INC_IF_NOT_NULL(__kmpc_set_lock_real, count);

	/* Obtain @ for __kmpc_unset_lock */
	__kmpc_unset_lock_real =
	    (void(*)(void*, int, void **))
	    dlsym(RTLD_NEXT, "__kmpc_unset_lock");
	INC_IF_NOT_NULL(__kmpc_unset_lock_real, count);

	/* Obtain @ for __kmpc_dispatch_init_4 */
	__kmpc_dispatch_init_4_real =
		(void(*)(void*,int,int,int,int,int,int)) dlsym (RTLD_NEXT, "__kmpc_dispatch_init_4");
	INC_IF_NOT_NULL(__kmpc_dispatch_init_4_real,count);

	/* Obtain @ for __kmpc_dispatch_init_4u */
	__kmpc_dispatch_init_4u_real =
		(void(*)(void*,int,int,unsigned int,unsigned int,int,int)) dlsym (RTLD_NEXT, "__kmpc_dispatch_init_4u");
	INC_IF_NOT_NULL(__kmpc_dispatch_init_4u_real,count);

	/* Obtain @ for __kmpc_dispatch_init_8 */
	__kmpc_dispatch_init_8_real =
		(void(*)(void*,int,int,long long,long long,long long,long long)) dlsym (RTLD_NEXT, "__kmpc_dispatch_init_8");
	INC_IF_NOT_NULL(__kmpc_dispatch_init_8_real,count);

	/* Obtain @ for __kmpc_dispatch_init_8u */
	__kmpc_dispatch_init_8u_real =
		(void(*)(void*,int,int,unsigned long long,unsigned long long,long long,long long)) dlsym (RTLD_NEXT, "__kmpc_dispatch_init_8u");
	INC_IF_NOT_NULL(__kmpc_dispatch_init_8u_real,count);

	/* Obtain @ for __kmpc_dispatch_next_4 */
	__kmpc_dispatch_next_4_real =
		(int(*)(void*,int,int*,int*,int*,int*))
		dlsym (RTLD_NEXT, "__kmpc_dispatch_next_4");
	INC_IF_NOT_NULL(__kmpc_dispatch_next_4_real,count);

	/* Obtain @ for __kmpc_dispatch_next_4u */
	__kmpc_dispatch_next_4u_real =
		(int(*)(void*,int,int*,unsigned int*,unsigned int*,int*))
		dlsym (RTLD_NEXT, "__kmpc_dispatch_next_4u");
	INC_IF_NOT_NULL(__kmpc_dispatch_next_4u_real,count);

	/* Obtain @ for __kmpc_dispatch_next_8 */
	__kmpc_dispatch_next_8_real =
		(int(*)(void*,int,int*,long long *,long long *, long long *))
		dlsym (RTLD_NEXT, "__kmpc_dispatch_next_8");
	INC_IF_NOT_NULL(__kmpc_dispatch_next_8_real,count);

	/* Obtain @ for __kmpc_dispatch_next_8u */
	__kmpc_dispatch_next_8u_real =
		(int(*)(void*,int,int*,unsigned long long *,unsigned long long *, long long *))
		dlsym (RTLD_NEXT, "__kmpc_dispatch_next_8u");
	INC_IF_NOT_NULL(__kmpc_dispatch_next_8u_real,count);

	/* Obtain @ for __kmpc_dispatch_fini_4 */
	__kmpc_dispatch_fini_4_real =
		(void(*)(void*,int)) dlsym (RTLD_NEXT, "__kmpc_dispatch_fini_4");
	INC_IF_NOT_NULL(__kmpc_dispatch_fini_4_real,count);

	/* Obtain @ for __kmpc_dispatch_fini_4u */
	__kmpc_dispatch_fini_4u_real =
		(void(*)(void*,int)) dlsym (RTLD_NEXT, "__kmpc_dispatch_fini_4u");
	INC_IF_NOT_NULL(__kmpc_dispatch_fini_4u_real,count);

	/* Obtain @ for __kmpc_dispatch_fini_8 */
	__kmpc_dispatch_fini_8_real =
		(void(*)(void*,int)) dlsym (RTLD_NEXT, "__kmpc_dispatch_fini_8");
	INC_IF_NOT_NULL(__kmpc_dispatch_fini_8_real,count);

	/* Obtain @ for __kmpc_dispatch_fini_8u */
	__kmpc_dispatch_fini_8u_real =
		(void(*)(void*,int)) dlsym (RTLD_NEXT, "__kmpc_dispatch_fini_8u");
	INC_IF_NOT_NULL(__kmpc_dispatch_fini_8u_real,count);

	/* Obtain @ for __kmpc_fork_call */
	if (__kmpc_fork_call_real == NULL)
	{
		/* Careful, do not overwrite the pointer to the real call if Dyninst has already done it */
		__kmpc_fork_call_real =
			(void(*)(void*,int,void*,...))
			dlsym (RTLD_NEXT, "__kmpc_fork_call");
		INC_IF_NOT_NULL(__kmpc_fork_call_real,count);
	}

	/* Obtain @ for __kmpc_single */
	__kmpc_single_real =
		(int(*)(void*,int)) dlsym (RTLD_NEXT, "__kmpc_single");
	INC_IF_NOT_NULL(__kmpc_single_real,count);

	/* Obtain @ for __kmpc_end_single */
	__kmpc_end_single_real =
		(void(*)(void*,int)) dlsym (RTLD_NEXT, "__kmpc_end_single");
	INC_IF_NOT_NULL(__kmpc_end_single_real,count);

	/* Obtain @ for __kmpc_omp_task_alloc */
	__kmpc_omp_task_alloc_real =
		(void*(*)(void*,int,int,size_t,size_t,void*)) dlsym (RTLD_NEXT, "__kmpc_omp_task_alloc");
	INC_IF_NOT_NULL(__kmpc_omp_task_alloc_real, count);

	/* Obtain @ for __kmpc_omp_task_begin_if0 */
	__kmpc_omp_task_begin_if0_real =
		(void(*)(void*,int,void*)) dlsym (RTLD_NEXT, "__kmpc_omp_task_begin_if0");
	INC_IF_NOT_NULL(__kmpc_omp_task_begin_if0_real, count);

	/* Obtain @ for __kmpc_omp_task_complete_if0 */
	__kmpc_omp_task_complete_if0_real =
		(void(*)(void*,int,void*)) dlsym (RTLD_NEXT, "__kmpc_omp_task_complete_if0");
	INC_IF_NOT_NULL(__kmpc_omp_task_complete_if0_real, count);

	/* Obtain @ for __kmpc_omp_taskwait */
	__kmpc_omp_taskwait_real = (int(*)(void*,int)) dlsym (RTLD_NEXT, "__kmpc_omp_taskwait");
	INC_IF_NOT_NULL(__kmpc_omp_taskwait_real, count);

	/* Obtain @ for __kmpc_taskloop */
	__kmpc_taskloop_real = (void(*)(void*,int,void*,int,void*,void*,long,int,int,long,void*)) dlsym (RTLD_NEXT, "__kmpc_taskloop");
	INC_IF_NOT_NULL(__kmpc_taskloop_real, count);

	/* Obtain @ for __kmpc_taskloop_5 */
	__kmpc_taskloop_5_real = (void(*)(void*,int,void*,int,void*,void*,long,int,int,long,int,void*)) dlsym (RTLD_NEXT, "__kmpc_taskloop_5");
	INC_IF_NOT_NULL(__kmpc_taskloop_5_real, count);

	/* Obtain @ for __kmpc_taskgroup */
	__kmpc_taskgroup_real = (void(*)(void *, int))
	    dlsym(RTLD_NEXT, "__kmpc_taskgroup");
	INC_IF_NOT_NULL(__kmpc_taskgroup_real, count);

	/* Obtain @ for __kmpc_end_taskgroup */
	__kmpc_end_taskgroup_real = (void(*)(void *, int))
	    dlsym(RTLD_NEXT, "__kmpc_end_taskgroup");
	INC_IF_NOT_NULL(__kmpc_end_taskgroup_real, count);

	/* Obtain @ or __kmpc_push_num_threads */
	__kmpc_push_num_threads_real = (void(*)(void *, int, int))
	    dlsym(RTLD_NEXT, "__kmpc_push_num_threads");
	INC_IF_NOT_NULL(__kmpc_push_num_threads_real, count);

	/* Obtain @ or __kmpc_omp_taskyield */
	__kmpc_omp_taskyield_real = (void(*)(void*, uint32_t, int))
	    dlsym(RTLD_NEXT, "__kmpc_omp_taskyield");
	INC_IF_NOT_NULL(__kmpc_omp_taskyield_real, count);

	/* Any hook point? */
	return count > 0;
}

/**
 * _extrae_intel_kmpc_init_dyninst
 *
 * __kmpc_fork_call can not be wrapped as usual with Dyninst because dlsym() 
 * fails to find the real symbol. We pass the pointer to the real function
 * from the Dyninst launcher.
 *
 * @param fork_call_ptr The pointer to the real __kmpc_fork_call.
 */
void _extrae_intel_kmpc_init_dyninst(void *fork_call_ptr)
{
#if defined(DEBUG)
	fprintf (stderr, PACKAGE_NAME ":" THREAD_LEVEL_LBL "_extrae_intel_kmpc_init_dyninst enter: args=(%p)\n ", THREAD_LEVEL_VAR, fork_call_ptr);
#endif

	__kmpc_fork_call_real = (void(*)(void*,int,void*,...)) fork_call_ptr;
}

/**
 * _extrae_intel_kmpc_init
 *
 * Initializes the instrumentation module for Intel KMPC.
 *
 * @param rank The current process ID (not used).
 */
int _extrae_intel_kmpc_init(int rank)
{
	preallocate_kmpc_helpers();

	return intel_kmpc_get_hook_points(rank);
}

#endif /* PIC */
