/*****************************************************************************\
 *                        ANALYSIS PERFORMANCE TOOLS                         *
 *                                   Extrae                                  *
 *              Instrumentation package for parallel applications            *
 *****************************************************************************
 *     ___     This library is free software; you can redistribute it and/or *
 *    /  __         modify it under the terms of the GNU LGPL as published   *
 *   /  /  _____    by the Free Software Foundation; either version 2.1      *
 *  /  /  /     \   of the License, or (at your option) any later version.   *
 * (  (  ( B S C )                                                           *
 *  \  \  \_____/   This library is distributed in hope that it will be      *
 *   \  \__         useful but WITHOUT ANY WARRANTY; without even the        *
 *    \___          implied warranty of MERCHANTABILITY or FITNESS FOR A     *
 *                  PARTICULAR PURPOSE. See the GNU LGPL for more details.   *
 *                                                                           *
 * You should have received a copy of the GNU Lesser General Public License  *
 * along with this library; if not, write to the Free Software Foundation,   *
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA          *
 * The GNU LEsser General Public License is contained in the file COPYING.   *
 *                                 ---------                                 *
 *   Barcelona Supercomputing Center - Centro Nacional de Supercomputacion   *
\*****************************************************************************/

#include "common.h"

#ifdef HAVE_STDLIB_H
# include <stdlib.h>
#endif
#ifdef HAVE_LIBGEN_H
# include <libgen.h>
#endif
#ifdef HAVE_STDIO_H
# include <stdio.h>
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#endif
#ifdef HAVE_TIME_H
# include <time.h>
#endif
#ifdef HAVE_SYS_TYPES_H
# include <sys/types.h>
#endif
#ifdef HAVE_SYS_STAT_H
# include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
# include <fcntl.h>
#endif
#ifdef HAVE_CTYPE_H
# include <ctype.h>
#endif
#ifdef HAVE_UNISTD_H
# include <unistd.h>
#endif

#include "utils.h"
#include "xalloc.h"
#include "semantics.h"
#include "dump.h"
#include "file_set.h"
#include "object_tree.h"
#include "mpi2out.h"
#include "trace_to_prv.h"
#include "trace_to_trf.h"
#include "labels.h"
#include "addr2info_hashcache.h"
#include "paraver_state.h"
#include "options.h"
#include "addresses.h"
#include "intercommunicators.h"
#include "HardwareCounters.h"

#if defined(PARALLEL_MERGE)
# include "parallel_merge_aux.h"
# include "mpi-aux.h"
# include <mpi.h>
#endif

#if defined(HAVE_LIBADDR2LINE)
# include "addr2info.h" 
#endif

typedef enum {Block, Cyclic, Size, ConsecutiveSize} WorkDistribution_t;

static struct input_t *InputTraces = NULL;
unsigned nTraces = 0;
static int AutoSincronitzaTasks = TRUE;
static WorkDistribution_t WorkDistribution= Block;
static char **MPITS_Files = NULL;
static unsigned Num_MPITS_Files = 0;

#define NOT_USED 0

/******************************************************************************
 ***  Help
 ******************************************************************************/

void Help (const char *ProgName)
{
  printf ("Usage: %s inputfile1 ... [--] inputfileN [-o <OutputFile>] [otheroptions]\n"
		  "       %s -f file.mpits [-o <OutputFile>] [otheroptions]\n"
		  "       %s -h\n"
		  "Options:\n"
		  "    -h                    Get this help.\n"
		  "    -v                    Increase verbosity.\n"
		  "    -absolute-counters    Emit hardware counters in absolute form in addition to relative form.\n"
		  "    -o <file>             Output trace file name.\n"
		  "    -e <file>             Uses the specified executable file to translate memory addresses into debugging information (superseded by /proc/self/maps if available).\n"
		  "    -f <file>             Specifies the \".mpits\" file that contains the list of names of the \".mpit\" input files.\n"
		  "    -syn-by-task          Synchronize traces at the MPI task-level using the MPI_Init information.\n"
		  "    -syn-by-node          Synchronize traces at the MPI node-level using the MPI_Init information (default).\n"
		  "    -syn-apps             Align all applications at their synchronization points.\n"
		  "    -no-syn               Do not synchronize traces at the end of MPI_Init.\n"
		  "    -maxmem <MB>          Uses up to <MB> megabytes of memory at the last step of merging process.\n"
		  "    -dimemas              Force the generation of a Dimemas trace.\n"
		  "    -paraver              Force the generation of a Paraver trace.\n"
		  "    -[no-]keep-mpits      Keeps (default) or removes MPIT files after trace generation.\n"
		  "    -[no-]trace-overwrite Overwrite (default) or prevent overwriting the trace file.\n"
		  "    -stop-at-percentage   Stops the merging process at the specified percentage.\n"
#if defined(IS_BG_MACHINE)
		  "    -xyzt                 Generates additional output file with BG/L torus coordinates.\n"
#endif
#if defined(PARALLEL_MERGE)
		  "    -tree-fan-out <N>     Orders the parallel merge to distribute its work in a N-order tree.\n"
		  "    -cyclic               Distributes MPIT files cyclically among tasks.\n"
		  "    -block                Distributes MPIT files in a block fashion among tasks.\n"
		  "    -size                 Distributes MPIT trying to build groups of equal size.\n"
		  "    -consecutive-size     Distributes MPIT files in a block fashion considering file size.\n"
		  "    -use-disk-for-comms   Uses the disk instead of memory to match foreign communications.\n"
#endif
		  "    -s <file>                      Indicates the symbol (*.sym) file attached to the *.mpit files.\n"
		  "    -d/-dump                       Sequentially dumps the contents of every *.mpit file.\n"
		  "    -[no-]dump-time                Show (default) or supress event timestamps when dumping events (useful for testing).\n"
		  "    -[no-]dump-symtab              Include or omit (default) the main's task symbol table in the PCF.\n"
		  "    -remove-files                  Remove intermediate files after processing them.\n"
		  "    -split-states                  Do not merge consecutives states that are the same.\n"
		  "    -skip-sendrecv                 Do not emit communication for SendReceive operations.\n"
		  "    -unique-caller-id              Enable unique value identifiers for different callers.\n"
		  "    -[no-]translate-addresses      Translate code addresses (callstack, kernels, outlined...) into source references (default), or keep raw addresses.\n"
		  "    -[no-]translate-data-addresses Translate data addresses (PEBS samples) to their full allocation callpath (default), or keep raw <library, offset> identifiers.\n" 
		  "    -emit-library-events           Emit library information for unknown references if possible.\n"
		  "    -sort-addresses                Sort source code references by <line, filename>.\n"
		  "    -task-view                     Swap the thread level in Paraver timeline to show Nanos Tasks.\n"
		  "    --                             Take the next trace files as a diferent parallel task.\n"
		  "\n",
          ProgName, ProgName, ProgName);
}

/******************************************************************************
 ***  Process_MPIT_File
 ***  Adds an MPIT file into the required structures.
 ******************************************************************************/

static void Process_MPIT_File (char *file, char *thdname, int *cptask,
	int taskid)
{
	int name_length;
	int task;
	int thread;
	int i;
	int cur_ptask = *cptask;
	char *tmp_name;
	size_t pos;
	int has_node_separator;
	int hostname_len;

	InputTraces = xrealloc(InputTraces, sizeof(struct input_t) * (nTraces + 1));

	InputTraces[nTraces].InputForWorker = -1;
	InputTraces[nTraces].name = (char *) xmalloc (strlen (file) + 1);
	strcpy (InputTraces[nTraces].name, file);

	pos = strlen(file)-strlen(EXT_MPIT)-DIGITS_PID-DIGITS_TASK
	  -DIGITS_THREAD-1; // Last -1 is for extra .
	has_node_separator = FALSE;
	hostname_len = 0;
	while (!has_node_separator)
	{
		has_node_separator = file[pos] == TEMPLATE_NODE_SEPARATOR_CHAR;
		if (has_node_separator)
		{
			InputTraces[nTraces].node = (char*) xmalloc((hostname_len+1)*sizeof(char));

			snprintf (InputTraces[nTraces].node, hostname_len, "%s", &file[pos+1]);
			break;
		}
		else
		{
			if (pos == 0)
			{
				fprintf (stderr, "merger: Could not find node separator in file '%s'\n", file);
				InputTraces[nTraces].node = "(unknown)";
				break;
			}
			else
			{
				hostname_len++;
				pos--;
			}
		}
	}

	name_length = strlen (InputTraces[nTraces].name);
	tmp_name = InputTraces[nTraces].name;
	tmp_name = &(tmp_name[name_length - strlen(EXT_MPIT)]);
	if (strcmp (tmp_name, EXT_MPIT))
	{
		fprintf (stderr, "mpi2prv: Error! File %s does not contain a valid extension!. Skipping.\n", InputTraces[nTraces].name);
		return;
	}

	InputTraces[nTraces].filesize = 0;

	/* this will be shared afterwards at merger_post_share_file_sizes */
	if (taskid == 0) 
	{
		int fd = open (InputTraces[nTraces].name, O_RDONLY);
		if (-1 != fd)
		{
			InputTraces[nTraces].filesize = lseek (fd, 0, SEEK_END);
			close (fd);
		}
	}

	tmp_name = InputTraces[nTraces].name;
	tmp_name = &(tmp_name[name_length - strlen(EXT_MPIT) - DIGITS_TASK - DIGITS_THREAD]);

	/* Extract the information from the filename */
	task = 0;
	for (i = 0; i < DIGITS_TASK; i++)
	{
		task = task * 10 + ((int) tmp_name[0] - ((int) '0'));
		tmp_name++;
	}
	InputTraces[nTraces].task = task;

	thread = 0;
	for (i = 0; i < DIGITS_THREAD; i++)
	{
		thread = thread * 10 + (tmp_name[0] - ((int) '0'));
		tmp_name++;
	}
	InputTraces[nTraces].thread = thread;
	InputTraces[nTraces].task++;
	InputTraces[nTraces].thread++;
	InputTraces[nTraces].ptask = cur_ptask;
	InputTraces[nTraces].order = nTraces;
	/* This will be changed latter if Read_SPAWN_file is applied */
	InputTraces[nTraces].SpawnOffset = 0;

	if (thdname != NULL)
	{
		InputTraces[nTraces].threadname = strdup (thdname);
		if (InputTraces[nTraces].threadname == NULL)
		{
			fprintf (stderr, "mpi2prv: Error cannot obtain memory for THREAD NAME information!\n");
			fflush (stderr);
			exit (1);
		}
	}
	else
	{
		int res;

		/* 7+4 for THREAD + (ptask + three dots) THREAD 1.1.1 */
		InputTraces[nTraces].threadname = xmalloc (sizeof(char)*(10+DIGITS_TASK+DIGITS_THREAD+1));

		res = sprintf (InputTraces[nTraces].threadname, "THREAD %d.%d.%d",
		  InputTraces[nTraces].ptask, InputTraces[nTraces].task,
		  InputTraces[nTraces].thread);
		if (res >= 10+DIGITS_TASK+DIGITS_THREAD+1)
		{
			fprintf (stderr, "mpi2prv: Error! Thread name exceeds buffer size!\n");
			fflush (stderr);
			exit (1);
		}
	}

	nTraces++;
}

#if defined(MPI_SUPPORTS_MPI_COMM_SPAWN)
void Read_SPAWN_file (char *mpit_file, int current_ptask)
{
  char spawn_file_name[PATH_MAX];
  strcpy (spawn_file_name, mpit_file);
  spawn_file_name[strlen(spawn_file_name)-strlen(EXT_MPITS)] = (char) 0; /* remove ".mpit" extension */
  strcat (spawn_file_name, EXT_SPAWN);

  if (__Extrae_Utils_file_exists(spawn_file_name))
  {
    /* Read the synchronization latency */
    unsigned i;
    FILE *fd;
    char line[256];
    unsigned long long SpawnSyncLatency = 0;

    fd = fopen(spawn_file_name, "r");
	if (fd == NULL)
	{
		fprintf (stderr, "mpi2prv: Fatal error! Cannot load spawn file '%s'\n", spawn_file_name);
		exit (-1);
	}
    fgets(line, sizeof(line), fd);
    sscanf(line, "%llu", &SpawnSyncLatency);
    fclose(fd);

    for (i=0; i<nTraces; i++)
    {
      if (InputTraces[i].ptask == current_ptask)
      {
        InputTraces[i].SpawnOffset = SpawnSyncLatency;
      }
    }
    
    /* Load the intercommunicators table */
    intercommunicators_load (spawn_file_name, current_ptask);
  }
}
#endif /* MPI_SUPPORTS_MPI_COMM_SPAWN */


/******************************************************************************
 ***  Read_MPITS_file
 ***  Inserts into trace tables the contents of a ascii file!
 ******************************************************************************/

static char *last_mpits_file = NULL;

void Read_MPITS_file (const char *file, int *cptask, FileOpen_t opentype, int taskid)
{
	int info;
	char mybuffer[4096];
	char thdname[2048];
	char path[2048];

	char *env_enforce_fs_sync = getenv("EXTRAE_ENFORCE_FS_SYNC");
	int enforce_fs_sync = (env_enforce_fs_sync != NULL)                &&
	                       ((atoi(env_enforce_fs_sync) == 1)           ||
	                        (strcmp(env_enforce_fs_sync, "TRUE") == 0) ||
	                        (strcmp(env_enforce_fs_sync, "true") == 0));

	if (enforce_fs_sync)
	{
		int delay = __Extrae_Utils_sync_on_file(file);

		if (delay == -1)
		{
			fprintf(stderr, "mpi2prv: Aborting due to task %d timeout waiting on file system synchronization (> %d second(s) elapsed): %s is not ready\n", taskid, FS_SYNC_TIMEOUT, file);
			exit(-1);
		}
		else if (delay > 0)
		{
			fprintf(stderr, "mpi2prv: Task %d syncs on %s after %d seconds\n", taskid, file, delay);
		}
	}
	
	FILE *fd = fopen (file, "r");

	if (fd == NULL)
	{
		fprintf (stderr, "mpi2prv: Unable to open %s file.\n", file);
		return;
	}

	MPITS_Files = (char**) xrealloc (MPITS_Files, sizeof(char*)*(Num_MPITS_Files+1));
	MPITS_Files[Num_MPITS_Files] = strdup (file);
	Num_MPITS_Files++;

	last_mpits_file = (char*) file;

	do
	{
		char * res = fgets (mybuffer, sizeof(mybuffer), fd);
		if (!feof(fd) && res != NULL)
		{
			char *stripped;

			path[0] = thdname[0] = (char) 0;

			info = sscanf (mybuffer, "%s named %s", path, thdname);
			stripped = __Extrae_Utils_trim (path);

			if (strncmp (mybuffer, "--", 2) == 0)
			{
				/* If we find --, advance to the next ptask */
				(*cptask)++;
			}
			else if (info >= 1 && opentype == FileOpen_Default)
			{
				/* If mode is not forced, check first if the absolute path exists,
				   if not, try to open in the current directory */

				if (!__Extrae_Utils_file_exists(stripped))
				{
					/* Look for /set- in string, and then use set- (thus +1) */
					char * stripped_basename = strstr (stripped, "/set-");
					if (stripped_basename != NULL)
					{
						/* Look in current directory, if not use list file directory */
						if (!__Extrae_Utils_file_exists(&stripped_basename[1]))
						{
							char dir_file[2048];
							char *duplicate = strdup (file);
							char *directory = dirname (duplicate);

							sprintf (dir_file, "%s%s", directory, stripped_basename);
							Process_MPIT_File (dir_file, (info==2)?thdname:NULL, cptask, taskid);

							xfree (duplicate);
						}
						else
							Process_MPIT_File (&stripped_basename[1], (info==2)?thdname:NULL, cptask, taskid);
					}
					else
						fprintf (stderr, "merger: Error cannot find 'set-' signature in filename %s\n", stripped);
				}
				else
					Process_MPIT_File (stripped, (info==2)?thdname:NULL, cptask, taskid);
			}
			else if (info >= 1 && opentype == FileOpen_Absolute)
			{
				Process_MPIT_File (stripped, (info==2)?thdname:NULL, cptask, taskid);
			}
			else if (info >= 1 && opentype == FileOpen_Relative)
			{
				/* Look for /set- in string, and then use set- (thus +1) */
				char * stripped_basename = strstr (stripped, "/set-");
				if (stripped_basename != NULL)
				{
					/* Look in current directory, if not use list file directory */
					if (!__Extrae_Utils_file_exists(&stripped_basename[1]))
					{
						char dir_file[2048];
						char *duplicate = strdup (file);
						char *directory = dirname (duplicate);

						sprintf (dir_file, "%s%s", directory, stripped_basename);
						Process_MPIT_File (dir_file, (info==2)?thdname:NULL, cptask, taskid);

						xfree (duplicate);
					}
					else
						Process_MPIT_File (&stripped_basename[1], (info==2)?thdname:NULL, cptask, taskid);
				}
				else
					fprintf (stderr, "merger: Error cannot find 'set-' signature in filename %s\n", stripped);
			}
		}
	}
	while (!feof(fd));

	fclose (fd);

#if defined(MPI_SUPPORTS_MPI_COMM_SPAWN)
	Read_SPAWN_file (file, *cptask);
#endif
}


/**
 * loadGlobalSYMfile
 *
 * Loads the specified global SYM file if 'sym_file' is given. Otherwise,
 * loads the global SYM file with the same name as the given 'mpits_file', 
 * if present.
 */
static int loadGlobalSYMfile(int rank, char *sym_file, char *mpits_file, int ptask)
{
	if ((sym_file != NULL) && (__Extrae_Utils_file_exists(sym_file)))
	{
		Labels_loadSYMfile (rank, NOT_USED, ptask, NOT_USED, sym_file, TRUE, NULL, NULL);
		return 1;
	}
	else if (mpits_file != NULL)
	{
		char global_SYM_file[1024];

		strncpy(global_SYM_file, mpits_file, sizeof(global_SYM_file)-1);
		if (strcmp(&global_SYM_file[strlen(global_SYM_file)-strlen(".mpits")], ".mpits") == 0)
		{
			strncpy (&global_SYM_file[strlen(global_SYM_file)-strlen(".mpits")], ".sym", strlen(".sym")+1);

			if (__Extrae_Utils_file_exists(global_SYM_file)) 
			{
				Labels_loadSYMfile (rank, NOT_USED, ptask, NOT_USED, global_SYM_file, TRUE, NULL, NULL);
				return 1;
			}
		}
	}
	return 0;
}


/******************************************************************************
 ***  ProcessArgs
 ******************************************************************************/

void ProcessArgs (int rank, int argc, char *argv[])
{
	char *BinaryName, *bBinaryName;
	int CurArg;
	unsigned int cur_ptask = 1;   /* Ptask counter. Each -- the ptask number is
	                               * incremented. */
	int found_MPITS_for_current_ptask = FALSE;
	int found_SYM_for_current_ptask = FALSE;

	if (argc == 1)                /* No params? */
	{
		Help (argv[0]);
		exit (0);
	}

	BinaryName = strdup (argv[0]);
	if (NULL == BinaryName)
	{
		fprintf (stderr, "merger: Error! Unable to duplicate binary name!\n");
		exit (-1);
	}
	bBinaryName = basename (BinaryName);

	if ((strncmp (bBinaryName, "mpi2prv", 7) == 0)
	    || (strncmp (bBinaryName, "mpimpi2prv", 10) == 0))
	{
		set_option_merge_ParaverFormat (TRUE);
		set_option_merge_ForceFormat (FALSE);
	}
	else if ((strncmp (bBinaryName, "mpi2dim", 7) == 0)
	    || (strncmp (bBinaryName, "mpimpi2dim", 10) == 0))
	{
		set_option_merge_ParaverFormat (FALSE);
		set_option_merge_ForceFormat (FALSE);
	}
	else
	{
		set_option_merge_ParaverFormat (TRUE);
		set_option_merge_ForceFormat (FALSE);
	}
	xfree (BinaryName);

	for (CurArg = 1; CurArg < argc; CurArg++)
	{
		if (!strcmp (argv[CurArg], "-h"))
		{
			Help (argv[0]);
			exit (0);
		}
		if (!strcmp (argv[CurArg], "-keep-mpits"))
		{
			set_option_merge_RemoveFiles (FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-keep-mpits"))
		{
			set_option_merge_RemoveFiles (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-trace-overwrite"))
		{
			set_option_merge_TraceOverwrite (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-trace-overwrite"))
		{
			set_option_merge_TraceOverwrite (FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-v"))
		{
			set_option_merge_VerboseLevel (get_option_merge_VerboseLevel()+1);
			continue;
		}
		if (!strcmp (argv[CurArg], "-translate-addresses"))
		{
			set_option_merge_TranslateAddresses (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-translate-addresses"))
		{
			set_option_merge_TranslateAddresses (FALSE);
			set_option_merge_SortAddresses (FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-emit-library-events"))
		{
			set_option_merge_EmitLibraryEvents (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-absolute-counters"))
		{
			set_option_merge_AbsoluteCounters (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-stop-at-percentage"))
		{
			CurArg++;
			if (CurArg < argc)
			{
				long stopatpct = strtol(argv[CurArg], NULL, 10);
				if (stopatpct > 0 && stopatpct < 100)
					set_option_merge_StopAtPercentage(stopatpct);
			} else
			{
				if ( 0 == rank)
					fprintf(stderr, PACKAGE_NAME": Option -stop-at-percentage: You must specify a percentage.\n");
				Help(argv[0]);
				exit(0);
			}
			continue;
		}
		if (!strcmp (argv[CurArg], "-o"))
		{
			CurArg++;
			if (CurArg < argc)
			{
				set_merge_OutputFileName (TRACE_FILENAME,argv[CurArg]);
			}
			else 
			{
				if (0 == rank)
					fprintf (stderr, PACKAGE_NAME": Option -o: You must specify the output trace name.\n");
				Help(argv[0]);
				exit(0);
			}
			continue;
		}
		if (!strcmp (argv[CurArg], "-s"))
		{
			CurArg++;
			if (CurArg < argc)
			{
				char *global_SYM_file = argv[CurArg];

				if ((rank == 0) && (loadGlobalSYMfile(rank, global_SYM_file, NULL, cur_ptask)))
				{
					found_SYM_for_current_ptask = TRUE;
				}
			}
			continue;
		}
		if (!strcmp (argv[CurArg], "-c"))
		{
			CurArg++;
			if (CurArg < argc)
			{
				set_merge_CallbackFileName (argv[CurArg]);
			}
			else 
			{
				if (0 == rank)
					fprintf (stderr, PACKAGE_NAME": Option -c: You must specify the path of the callback file.\n");
				Help(argv[0]);
				exit(0);
			}
			continue;
		}
		if (!strcmp(argv[CurArg], "-e"))
		{
			CurArg++;
			if (CurArg < argc)
			{
				set_merge_ExecutableFileName (argv[CurArg]);
				continue;
			}
			else 
			{
				if (0 == rank)
					fprintf (stderr, PACKAGE_NAME": Option -e: You must specify the path of the executable file.\n");
				Help(argv[0]);
				exit(0);
			}
		}
		if (!strcmp (argv[CurArg], "-f"))
		{
			CurArg++;
			if (CurArg < argc)
			{
				found_MPITS_for_current_ptask = TRUE;
				Read_MPITS_file (argv[CurArg], &cur_ptask, FileOpen_Default, rank);
			}
			else 
			{
				if (0 == rank)
					fprintf (stderr, PACKAGE_NAME": Option -f: You must specify the path of the list file.\n");
				Help(argv[0]);
				exit(0);
			}
			continue;
		}
		if (!strcmp (argv[CurArg], "-f-relative"))
		{
			CurArg++;
			if (CurArg < argc)
			{
				Read_MPITS_file (argv[CurArg], &cur_ptask, FileOpen_Relative, rank);
			}
			else 
			{
				if (0 == rank)
					fprintf (stderr, PACKAGE_NAME": Option -f-relative: You must specify the path of the list file.\n");
				Help(argv[0]);
				exit(0);
			}
			continue;
		}
		if (!strcmp (argv[CurArg], "-f-absolute"))
		{
			CurArg++;
			if (CurArg < argc)
			{
				Read_MPITS_file (argv[CurArg], &cur_ptask, FileOpen_Absolute, rank);
			}
			else 
			{
				if (0 == rank)
					fprintf (stderr, PACKAGE_NAME": Option -f-absolute: You must specify the path of the list file.\n");
				Help(argv[0]);
				exit(0);
			}
		  continue;
		}
#if defined(IS_BG_MACHINE)
		if (!strcmp (argv[CurArg], "-xyzt"))
		{
			set_option_merge_BG_XYZT (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-xyzt"))
		{
			set_option_merge_BG_XYZT (FALSE);
			continue;
		}
#endif
		if (!strcmp (argv[CurArg], "-unique-caller-id"))
		{
			set_option_merge_UniqueCallerID (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-unique-caller-id"))
		{
			set_option_merge_UniqueCallerID (FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-split-states"))
		{
			set_option_merge_JointStates (FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-split-states"))
		{
			set_option_merge_JointStates (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-use-disk-for-comms"))
		{
			set_option_merge_UseDiskForComms (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-use-disk-for-comms"))
		{
			set_option_merge_UseDiskForComms (FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-sort-addresses"))
		{
			set_option_merge_TranslateAddresses(TRUE);
			set_option_merge_SortAddresses (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-sort-addresses"))
		{
			set_option_merge_SortAddresses (FALSE);
			continue;
		}
#if defined(PARALLEL_MERGE)
		if (!strcmp (argv[CurArg], "-cyclic"))
		{
			WorkDistribution = Cyclic;
			continue;
		}
		if (!strcmp (argv[CurArg], "-block"))
		{
			WorkDistribution = Block;
			continue;
		}
		if (!strcmp (argv[CurArg], "-consecutive-size"))
		{
			WorkDistribution = ConsecutiveSize;
			continue;
		}
		if (!strcmp (argv[CurArg], "-size"))
		{
			WorkDistribution = Size;
			continue;
		}
		if (!strcmp (argv[CurArg], "-tree-fan-out"))
		{
			CurArg++;
			if (CurArg < argc)
			{
				if (atoi(argv[CurArg]) > 0)
				{
					set_option_merge_TreeFanOut (atoi(argv[CurArg]));
				}
				else
				{
					if (0 == rank)
						fprintf (stderr, "mpi2prv: WARNING: Invalid value for -tree-fan-out parameter\n");
				}
			}
			continue;
		}
#endif
		if (!strcmp (argv[CurArg], "-evtnum"))
		{
			CurArg++;
			if (CurArg < argc)
			{
				if (atoi(argv[CurArg]) > 0)
				{
					if (0 == rank)
						fprintf (stderr, "mpi2prv: Using %d events for thread\n", atoi(argv[CurArg]));
					setLimitOfEvents (atoi(argv[CurArg]));
				}
				else
				{
					if (0 == rank)
						fprintf (stderr, "mpi2prv: WARNING: Invalid value for -evtnum parameter\n");
				}
			}
			continue;
		}
		if (!strcmp (argv[CurArg], "-d") || !strcmp(argv[CurArg], "-dump"))
		{
			set_option_merge_Dump (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-dump-time"))
		{
			set_option_merge_DumpTime (FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-dump-time"))
		{
			set_option_merge_DumpTime (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-dump-symtab"))
		{
			set_option_merge_DumpSymtab (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-dump-symtab"))
		{
			set_option_merge_DumpSymtab (FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-maxmem"))
		{

			CurArg++;
			if (CurArg < argc)
			{
				int tmp = atoi(argv[CurArg]);
				if (tmp == 0)
				{
					if (0 == rank)
						fprintf (stderr, "mpi2prv: Error! Invalid parameter for -maxmem option. Using 512 Mbytes\n");
					tmp = 512;
				}
				else if (tmp < 16)
				{
					if (0 == rank)
						fprintf (stderr, "mpi2prv: Error! Cannot use less than 16 MBytes for the merge step\n");
					tmp = 16;
				}
				set_option_merge_MaxMem (tmp);
			}
			else
			{	
				if (0 == rank)
					fprintf (stderr, "mpi2prv: WARNING: Invalid value for -maxmem parameter\n");
			}
			continue;
		}
		if (!strcmp (argv[CurArg], "-dimemas"))
		{
			set_option_merge_ForceFormat (TRUE);
			set_option_merge_ParaverFormat (FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-paraver"))
		{
			set_option_merge_ForceFormat (TRUE);
			set_option_merge_ParaverFormat (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-skip-sendrecv"))
		{
			set_option_merge_SkipSendRecvComms (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-skip-sendrecv"))
		{
			set_option_merge_SkipSendRecvComms (FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-syn-by-task"))
		{
			set_option_merge_SincronitzaTasks (TRUE);
			set_option_merge_SincronitzaTasks_byNode (FALSE);
			AutoSincronitzaTasks = FALSE;
			continue;
		}
		if (!strcmp (argv[CurArg], "-syn-by-node"))
		{
			set_option_merge_SincronitzaTasks (TRUE);
			set_option_merge_SincronitzaTasks_byNode (TRUE);
			AutoSincronitzaTasks = FALSE;
			continue;
		}
		if (!strcmp (argv[CurArg], "-syn-apps"))
		{
			set_option_merge_SincronitzaApps(TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-syn"))
		{
			set_option_merge_SincronitzaTasks (FALSE);
			set_option_merge_SincronitzaTasks_byNode (FALSE);
			AutoSincronitzaTasks = FALSE;
			continue;
		}
		if (!strcmp (argv[CurArg], "-task-view"))
		{
			set_option_merge_NanosTaskView (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-task-view"))
		{
			set_option_merge_NanosTaskView (FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-remove-files"))
		{
			set_option_merge_RemoveFiles (TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-remove-files"))
		{
			set_option_merge_RemoveFiles (FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-translate-data-addresses"))
		{
			set_option_merge_TranslateDataAddresses(TRUE);
			continue;
		}
		if (!strcmp (argv[CurArg], "-no-translate-data-addresses"))
		{
			set_option_merge_TranslateDataAddresses(FALSE);
			continue;
		}
		if (!strcmp (argv[CurArg], "--"))
		{
			if (!found_SYM_for_current_ptask && found_MPITS_for_current_ptask && rank == 0)
			{
				loadGlobalSYMfile(rank, NULL, last_mpits_file, cur_ptask);
			}
			found_MPITS_for_current_ptask = FALSE;
			found_SYM_for_current_ptask = FALSE;
			cur_ptask++;
			continue;
		}
		else
			Process_MPIT_File (argv[CurArg], NULL, &cur_ptask, rank);
	}
	set_option_merge_NumApplications (cur_ptask);

	/* Specific things to be applied per format */
	if (rank == 0)
	{
		if (!found_SYM_for_current_ptask && found_MPITS_for_current_ptask)
		{
			loadGlobalSYMfile(rank, NULL, last_mpits_file, cur_ptask);
		}

		if (!get_option_merge_ParaverFormat())
		{
			/* Dimemas traces doesn't know about synchronization */
			set_option_merge_SincronitzaTasks (FALSE);
			set_option_merge_SincronitzaTasks_byNode (FALSE);
			AutoSincronitzaTasks = FALSE;

			fprintf (stdout, "merger: Output trace format is: Dimemas\n");

#if defined(PARALLEL_MERGE)
			if (WorkDistribution != Block)
			{
				fprintf (stdout, "merger: Other work distribution than 'block' are not supporting when generating Dimemas traces\n");
				WorkDistribution = Block;
			}
#endif

		}
		else
		{
			fprintf (stdout, "merger: Output trace format is: Paraver\n");
		}
	}

#if defined(PARALLEL_MERGE)
# if USE_HARDWARE_COUNTERS || defined(HETEROGENEOUS_SUPPORT)
	Share_HWC_Before_Processing_MPITS (rank);
# endif
#endif
}

static void PrintNodeNames (int numtasks, int processor_id, char **nodenames)
{
	int i;

	if (processor_id == 0)
	{
		fprintf (stdout, "mpi2prv: Assigned nodes <");
		for (i = 0; i < numtasks; i++)
			fprintf (stdout, " %s%c", nodenames[i], (i!=numtasks-1)?',':' ');
		fprintf (stdout, ">\n");
	}
}

typedef struct 
{
	unsigned ptask;
	unsigned task;
	off_t task_size;
} all_tasks_ids_t;

static void AssignFilesToWorker( unsigned merger_worker_id, all_tasks_ids_t task )
{
	unsigned i = 0;

	for (i=0; i<nTraces; i++)
	{
		if ((InputTraces[i].ptask == task.ptask) &&
		    (InputTraces[i].task  == task.task))
		{
			InputTraces[i].InputForWorker = merger_worker_id;
		}
	}
}

int SortTasksBySize (const void *t1, const void *t2)
{
        all_tasks_ids_t *task1 = (all_tasks_ids_t *)t1;
        all_tasks_ids_t *task2 = (all_tasks_ids_t *)t2;

        if (task1->task_size < task2->task_size)
                return -1;
        else if (task1->task_size > task2->task_size)
                return 1;
        else
                return 0;
}

static void DistributeWork (unsigned num_processors, unsigned processor_id)
{
	unsigned num_apps = 0;
	unsigned *num_tasks_per_app = NULL;
	unsigned **task_sizes_per_app = NULL;
	unsigned i = 0;
	unsigned j = 0;
	unsigned index = 0;
	unsigned all_tasks = 0;
	all_tasks_ids_t *all_tasks_ids = NULL;

#if defined(DEBUG)
	for (i = 0; i < nTraces; i ++)
	{
		fprintf(stderr, "[DEBUG] InputTraces[%d] ptask=%u task=%u\n", i, InputTraces[i].ptask, InputTraces[i].task);
	}
#endif

	for (i = 0; i < nTraces; i ++)
	{
		num_apps = MAX(num_apps, InputTraces[i].ptask);
	}	

#if defined(DEBUG)
	fprintf(stderr, "[DEBUG] num_apps = %d\n", num_apps);
#endif

	num_tasks_per_app = xmalloc(num_apps * sizeof(unsigned));
	task_sizes_per_app = xmalloc(num_apps * sizeof(unsigned *));

	for (i = 0; i < num_apps; i++)
	{
		num_tasks_per_app[i] = 0;
	}

	for (i = 0; i < nTraces; i++)
	{
		num_tasks_per_app[ InputTraces[i].ptask - 1 ] = MAX( num_tasks_per_app[ InputTraces[i].ptask - 1 ], InputTraces[i].task );
	}

	for (i = 0; i < num_apps; i++)
	{
#if defined(DEBUG)
		fprintf(stderr, "[DEBUG] num_tasks_per_app[%d]=%d\n", i, num_tasks_per_app[i]);
#endif

		task_sizes_per_app[i] = xmalloc(num_tasks_per_app[i] * sizeof(unsigned));
		for (j = 0; j < num_tasks_per_app[i]; j++)
		{
			task_sizes_per_app[i][j] = 0;
		}

		all_tasks += num_tasks_per_app[i];
	}

	if (all_tasks < num_processors)
	{
		fprintf (stderr, "mpi2prv: FATAL ERROR! You are using more tasks for merging than tasks were traced! Please use less than %d tasks to merge.\n", all_tasks);
		exit(-1);
	}

	for (i = 0; i < nTraces; i++)
	{
		task_sizes_per_app[ InputTraces[i].ptask - 1 ][ InputTraces[i].task - 1 ] += InputTraces[i].filesize;
	}

	all_tasks_ids = xmalloc(all_tasks * sizeof(all_tasks_ids_t));
	index = 0;
	for (i = 0; i < num_apps; i ++)
	{
		for (j = 0; j < num_tasks_per_app[i]; j ++)
		{
			all_tasks_ids[index].ptask = i+1;
			all_tasks_ids[index].task  = j+1;
			all_tasks_ids[index].task_size = task_sizes_per_app[i][j];
			index ++;
		}
	}

#if defined(DEBUG)
	fprintf(stderr, "[DEBUG] all_tasks=%d\n", all_tasks);
	for (i = 0; i < all_tasks; i++)
	{
		fprintf(stderr, "[DEBUG] all_tasks[%d] ptask=%d task=%d task_size=%d\n", 
		  i+1, all_tasks_ids[i].ptask, all_tasks_ids[i].task, (int)all_tasks_ids[i].task_size);
	}
#endif

	if (WorkDistribution == Block)
	{
		unsigned offset = 0;
		unsigned avg_tasks_per_merger = all_tasks / num_processors;
		unsigned mod_tasks_per_merger = all_tasks % num_processors;

		for (i=0; i<num_processors; i++)
		{
			unsigned tasks_this_merger = avg_tasks_per_merger + (i < mod_tasks_per_merger ? 1 : 0);
			for (j=0; j<tasks_this_merger; j++)
			{
				unsigned task_to_assign = offset + j;
				if (task_to_assign < all_tasks)
					AssignFilesToWorker( i, all_tasks_ids[task_to_assign] );
			}
			offset += tasks_this_merger;
		}
	}
	else if (WorkDistribution == Cyclic)
	{
		/* Files will be distributed in cycles */
		for (i=0; i < all_tasks; i++)
			AssignFilesToWorker(i % num_processors, all_tasks_ids[i]);
	}
	else if (WorkDistribution == Size || WorkDistribution == ConsecutiveSize)
	{
		off_t average_size_per_worker;
		off_t remaining_size = 0;
		off_t assigned_size[num_processors];
		char assigned_files[all_tasks];
		unsigned current_task;

		if (WorkDistribution == Size)
			qsort (all_tasks_ids, all_tasks, sizeof(all_tasks_ids_t), SortTasksBySize);

		for (i=0; i < num_processors; i++)
		{
			assigned_size[i] = 0;
		}

		for (i=0; i < all_tasks; i++)
		{
			remaining_size += all_tasks_ids[i].task_size;
			assigned_files[i] = FALSE;
		}
		
		average_size_per_worker = remaining_size / num_processors;

		for (i=0; i<num_processors; i++)
		{
			current_task = 0;
			while (assigned_size[i] < average_size_per_worker)
			{
				if (!assigned_files[current_task])
				{
					if (assigned_size[i]+all_tasks_ids[current_task].task_size <= average_size_per_worker)
					{
						assigned_files[current_task] = TRUE;
						assigned_size[i] += all_tasks_ids[current_task].task_size;
						AssignFilesToWorker(i, all_tasks_ids[current_task]);
						remaining_size -= all_tasks_ids[current_task].task_size;
					}
				}
				if (++current_task >= all_tasks)
				{
					break;
				}
			}
			average_size_per_worker = remaining_size / (num_processors - i - 1);
		}
	}

	/* Check assigned traces... */
	for (index = 0; index < nTraces; index++)
		if (InputTraces[index].InputForWorker >= (int)num_processors ||
		  InputTraces[index].InputForWorker < 0)
		{
			fprintf (stderr, "mpi2prv: FATAL ERROR! Bad input assignament into processor namespace.\n");
			fprintf (stderr, "mpi2prv: FATAL ERROR! Input %d assigned to processor %d.\n", index, InputTraces[index].InputForWorker);
			exit (-1);
		}

	/* Show information of sizes */
	if (processor_id == 0)
	{
		fprintf (stdout, "mpi2prv: Assigned size per processor <");
		for (index = 0; index < num_processors; index++)
		{
			unsigned file;
			off_t size_assigned_to_task;

			size_assigned_to_task = 0;
			for (file = 0; file < nTraces; file++)
			if (InputTraces[file].InputForWorker == (int)index)
				size_assigned_to_task += InputTraces[file].filesize;

			if (size_assigned_to_task != 0)
			{
				if (size_assigned_to_task < 1024*1024)
					fprintf (stdout, " <1 Mbyte");
				else
#if SIZEOF_OFF_T == 8 && SIZEOF_LONG == 8
					fprintf (stdout, " %ld Mbytes", size_assigned_to_task/(1024*1024));
#elif SIZEOF_OFF_T == 8 && SIZEOF_LONG == 4
					fprintf (stdout, " %lld Mbytes", size_assigned_to_task/(1024*1024));
#elif SIZEOF_OFF_T == 4
					fprintf (stdout, " %d Mbytes", size_assigned_to_task/(1024*1024));
#endif
			}
			else
				fprintf (stdout, " 0 bytes");
			fprintf (stdout, "%c", (index!=num_processors-1)?',':' ');
		}
		fprintf (stdout, ">\n");

		for (index = 0 ; index < nTraces; index++)
			fprintf (stdout,"mpi2prv: File %s is object %d.%d.%d on node %s assigned to processor %d\n",
				InputTraces[index].name, InputTraces[index].ptask,
				InputTraces[index].task, InputTraces[index].thread,
				InputTraces[index].node==NULL?"unknown":InputTraces[index].node,
				InputTraces[index].InputForWorker);
			fflush (stdout);
	}

	if (task_sizes_per_app)
	{
		for (i = 0; i < num_apps; i++)
			if (task_sizes_per_app[i])
				xfree (task_sizes_per_app[i]);
		xfree (task_sizes_per_app);
	}
	if (num_tasks_per_app)
		xfree (num_tasks_per_app);
	if (all_tasks_ids)
		xfree (all_tasks_ids);
}


/******************************************************************************
 ***  main entry point
 ******************************************************************************/

/* To be called before ProcessArgs */

void merger_pre (int numtasks)
{
#if !defined(PARALLEL_MERGE)
	UNREFERENCED_PARAMETER(numtasks);
#endif

#if defined(PARALLEL_MERGE)
	if (numtasks <= 1)
	{
		fprintf (stderr, "mpimpi2prv: The parallel merger mpimpi2prv can not run with 1 MPI process.\n"
                                 "mpimpi2prv: Please use the sequential merger mpi2prv or run with more MPI processes.\n"
                                 "mpimpi2prv: Exiting...\n");
		exit (1);
	}
#endif
}


/* To be called after ProcessArgs */

#if defined(PARALLEL_MERGE)
static void merger_post_share_file_sizes (int taskid)
{
	int res;
	unsigned i;
	unsigned long long *sizes;

	sizes = xmalloc (sizeof(unsigned long long)*nTraces);

	if (taskid == 0)
		for (i = 0; i < nTraces; i++)
			sizes[i] = InputTraces[i].filesize; 

	res = MPI_Bcast (sizes, nTraces, MPI_LONG_LONG, 0, MPI_COMM_WORLD);
	MPI_CHECK(res, MPI_Bcast, "Cannot share trace file sizes");

	if (taskid != 0)
		for (i = 0; i < nTraces; i++)
			InputTraces[i].filesize = sizes[i]; 

	xfree (sizes);
}
#endif

int merger_post (int numtasks, int taskid)
{
	unsigned long long records_per_task;
#if defined(PARALLEL_MERGE)
	char **nodenames;
#else
	char nodename[1024];
	char *nodenames[1];
#endif
	int error;
	struct Pair_NodeCPU *NodeCPUinfo;

	if (taskid == 0)
		fprintf (stdout, "merger: %s\n", PACKAGE_STRING);

	if (0 == nTraces)
	{
	  fprintf (stderr, "mpi2prv: No intermediate trace files given.\n");
	  return 0;
	}

#if defined(PARALLEL_MERGE)
	merger_post_share_file_sizes (taskid);

	if (get_option_merge_TreeFanOut() == 0)
	{
		if (taskid == 0)
			fprintf (stdout, "mpi2prv: Tree order is not set. Setting automatically to %d\n", numtasks);
		set_option_merge_TreeFanOut (numtasks);
	}
	else if (get_option_merge_TreeFanOut() > numtasks)
	{
		if (taskid == 0)
			fprintf (stdout, "mpi2prv: Tree order is set to %d but is larger that numtasks. Setting tree order to %d\n", get_option_merge_TreeFanOut(), numtasks);
		set_option_merge_TreeFanOut (numtasks);
	}
	else if (get_option_merge_TreeFanOut() <= numtasks)
	{
		if (taskid == 0)
			fprintf (stdout, "mpi2prv: Tree order is set to %d\n", get_option_merge_TreeFanOut());
	}

	if (numtasks > nTraces)
	{
		if (taskid == 0)
			fprintf (stderr, "mpi2prv: FATAL ERROR! The tree fan out (%d) is larger than the number of MPITs (%d)\n", numtasks, nTraces);
		exit (0);
	}
#endif

	records_per_task = 1024*1024/sizeof(paraver_rec_t);  /* num of events in 1 Mbytes */
	records_per_task *= get_option_merge_MaxMem();       /* let's use this memory */
#if defined(PARALLEL_MERGE)
	records_per_task /= get_option_merge_TreeFanOut();   /* divide by the tree fan out */

	if (0 == records_per_task)
	{
		if (0 == taskid)
			fprintf (stderr, "mpi2prv: Error! Assigned memory by -maxmem is insufficient for this tree fan out\n");
		exit (-1);
	}
#endif

#if defined(PARALLEL_MERGE)
	ShareNodeNames (numtasks, &nodenames);
#else
	gethostname (nodename, sizeof(nodename));
	nodenames[0] = nodename;
#endif

	PrintNodeNames (numtasks, taskid, nodenames);
	DistributeWork (numtasks, taskid);
	NodeCPUinfo = AssignCPUNode (nTraces, InputTraces);

	if (AutoSincronitzaTasks)
	{
		unsigned i;
		unsigned all_nodes_are_equal = TRUE;
		unsigned first_node = InputTraces[0].nodeid;
		for (i = 1; i < nTraces && all_nodes_are_equal; i++)
			all_nodes_are_equal = (first_node == InputTraces[i].nodeid);
		set_option_merge_SincronitzaTasks (!all_nodes_are_equal);

		if (0 == taskid)
		{
			fprintf (stdout, "mpi2prv: Time synchronization has been turned %s\n", get_option_merge_SincronitzaTasks()?"on":"off");
			fflush (stdout);
		}
	}

#if defined(PARALLEL_MERGE)
	if (taskid == 0)
	{
		int res, tmp = get_option_merge_SortAddresses();
		res = MPI_Bcast (&tmp, 1, MPI_INT, 0, MPI_COMM_WORLD);
		MPI_CHECK(res, MPI_Bcast, "Cannot share whether option SortAddresses is turned on");
	}
	else
	{
		int res, tmp;
		res = MPI_Bcast (&tmp, 1, MPI_INT, 0, MPI_COMM_WORLD);
		MPI_CHECK(res, MPI_Bcast, "Cannot share whether option SortAddresses is turned on");
		set_option_merge_SortAddresses (tmp);
	}
#endif

	if (taskid == 0)
	{
		fprintf (stdout, "mpi2prv: Checking for target directory existence...");
		char *dirn = dirname(strdup(__Extrae_Utils_trim(get_merge_OutputFileName (TRACE_FILENAME))));
		if (!__Extrae_Utils_directory_exists(dirn))
		{
			fprintf (stdout, " does not exist. Creating ...");
			if (!__Extrae_Utils_mkdir_recursive(dirn))
			{
				fprintf (stdout, " failed to create (%s)!\n", dirn);
				exit (-1);
			}
			else
				fprintf (stdout, " done\n");
		}
		else
			fprintf (stdout, " exists, ok!\n");
	}

	if (get_option_merge_ParaverFormat())
		error = Paraver_ProcessTraceFiles (nTraces, InputTraces,
		    get_option_merge_NumApplications(),
			NodeCPUinfo, numtasks, taskid);
	else
		error = Dimemas_ProcessTraceFiles (__Extrae_Utils_trim(get_merge_OutputFileName (TRACE_FILENAME)),
			nTraces, InputTraces, get_option_merge_NumApplications(),
			NodeCPUinfo, numtasks, taskid);

	if (!error)
	{
		if (get_option_merge_RemoveFiles())
		{
			unsigned u;

			/* Remove MPITS and their SYM related files */
			for (u = 0; u < Num_MPITS_Files; u++)
			{
				char tmp[1024];
				strncpy (tmp, MPITS_Files[u], sizeof(tmp)-1);

				if (strcmp (&tmp[strlen(tmp)-strlen(".mpits")], ".mpits") == 0)
				{
					strncpy (&tmp[strlen(tmp)-strlen(".mpits")], ".sym", strlen(".sym")+1);
					unlink (tmp);
				}
				unlink (MPITS_Files[u]);
			}

			for (u = 0; u < nTraces; u++)
			{
				/* Remove the .mpit file */
				unlink (InputTraces[u].name);

				/* Remove the local .sym file for that .mpit file */
				{
					char tmp[1024];
					strncpy (tmp, InputTraces[u].name, sizeof(tmp)-1);
					strncpy (&tmp[strlen(tmp)-strlen(".mpit")], ".sym", strlen(".sym")+1);
					unlink (tmp);
				}

				/* Try to remove the container set-X directory */
				rmdir (dirname (InputTraces[u].name));
			}
		}
	}
	else
		fprintf (stderr, "mpi2prv: An error has been encountered when generating the tracefile. Dying...\n");

#if defined(HAVE_LIBADDR2LINE)
	if (get_option_merge_VerboseLevel() > 0)
		Addr2Info_HashCache_ShowStatistics();
#endif

	return 0;
}

/**
 * mergerLoadFilesInEmbeddedMode
 *
 * This is called from Backend_Finalize when XML merge option is enabled. Since the embedded
 * merger can not pass parameters by command-line to mpi2prv binary, the function ProcessArgs
 * is not called, and thus, the global sym file is not loaded. This function loads the 
 * necessary files for the embedded merger to start the merging process, including the 
 * global sym file.
 */
void mergerLoadFilesInEmbeddedMode(int taskid, int num_tasks, char *mpits_filename)
{
	int ptask = 1;

	if (taskid == 0)
		fprintf(stdout, "mpi2prv: Proceeding with the merge of the intermediate tracefiles.\n");

#if defined(MPI_SUPPORT)
	/* Synchronize all tasks at this point so none overtakes the master and
	   gets and invalid/blank trace file list (.mpits file) */
	if (taskid == 0)
		fprintf(stdout, "mpi2prv: Waiting for all tasks to reach the checkpoint.\n");

	PMPI_Barrier(MPI_COMM_WORLD);
#endif

	merger_pre(num_tasks);
	
	if (taskid == 0)
	{
		loadGlobalSYMfile(taskid, NULL, mpits_filename, ptask);
	}
#if defined(PARALLEL_MERGE)
# if USE_HARDWARE_COUNTERS || defined(HETEROGENEOUS_SUPPORT)
	Share_HWC_Before_Processing_MPITS(taskid);
# endif
#endif
	Read_MPITS_file(mpits_filename, &ptask, FileOpen_Default, taskid);

	if (taskid == 0)
		fprintf(stdout, "mpi2prv: Executing the merge process (using %s).\n", mpits_filename);

	merger_post(num_tasks, taskid);
}

/**
 * trace_exists_for_base
 * 
 * Check if a trace file already exists for a given base name.
 * It tries base+ext and base+gzext combinations and returns TRUE on success.
 * 
 * @param base Base name of the trace (without extension)
 * @param ext  Trace file extension (e.g., ".prv" or ".dim")
 * @param gzext Compressed extension (e.g., ".prv.gz" or ".dim.gz")
 * @return TRUE if any of the files exist, FALSE otherwise
*/

static int trace_exists_for_base(const char *base, const char *ext, const char *gzext)
{
	char buf[PATH_MAX];
	// base + ext
	snprintf(buf, sizeof(buf), "%s%s", base, ext);
	if (__Extrae_Utils_file_exists(buf)) return TRUE;
	// base + gzext
	snprintf(buf, sizeof(buf), "%s%s", base, gzext);
	return __Extrae_Utils_file_exists(buf);
}

/**
 * Extrae_GenerateOutputFileName
 * Generate output filenames for this merge (trace + sidecars).
 *
 * - Base name:
 *     * if user passed -o, use it (strip .prv/.dim and optional .gz)
 *     * else use main binary name; if unavailable, use format default
 * - Extensions:
 *     * trace: .prv/.dim (or .prv.gz/.dim.gz if .gz was requested)
 *     * sidecars: .pcf, .row (and .crd on BG builds)
 * - Overwrite:
 *     * if overwrite=no and target exists, append .0001, .0002, … until free
 *
 * @return void
 */

void Extrae_GenerateOutputFileName(void)
{
    unsigned lastid = 0;
    int is_prv = get_option_merge_ParaverFormat();

    const char *ext   = is_prv ? ".prv"    : ".dim";
    const char *gzext = is_prv ? ".prv.gz" : ".dim.gz";
    unsigned short want_gzip = 0;

    /* 1) Determine base name */
    char base[PATH_MAX];
    const char *user_name = get_merge_OutputFileName(TRACE_FILENAME);

    if (user_name && user_name[0] != '\0') {
        size_t len = strlen(user_name);
        if (len >= strlen(gzext) && strcmp(user_name + len - strlen(gzext), gzext) == 0) {
            want_gzip = 1;
            len -= strlen(gzext);
        } else if (len >= strlen(ext) && strcmp(user_name + len - strlen(ext), ext) == 0) {
            len -= strlen(ext);
        }
        memcpy(base, user_name, len);
        base[len] = '\0';
    } else {
        char *binpath = ObjectTree_getMainBinary(1, 1);
        if (binpath != NULL) {
            snprintf(base, sizeof(base), "%s", basename(binpath));
            xfree(binpath);
        } 
		else {
            if (is_prv)
                snprintf(base, sizeof(base), DEFAULT_PRV_OUTPUT_NAME);
            else
                snprintf(base, sizeof(base), DEFAULT_DIM_OUTPUT_NAME);
        }
    }

    /* 2) Check overwrite and add suffix if needed */
    char workbase[PATH_MAX];
    strncpy(workbase, base, sizeof(workbase)-1);
    workbase[sizeof(workbase)-1] = '\0';

    if (!get_option_merge_TraceOverwrite()) {
        while (trace_exists_for_base(workbase, ext, gzext)) {
            if (++lastid >= 10000) {
                fprintf(stderr,
                    "Error: exhausted 10000 unique filenames based on '%s'.\n"
                    "Hint: delete or archive older traces, "
                    "or choose a different name with -o.\n", base);
                exit(EXIT_FAILURE);
            }
            snprintf(workbase, sizeof(workbase), "%s.%04u", base, lastid);
        }
    }
    /* 3) Compose and store all output filenames */
    char tmp[PATH_MAX];

#ifdef HAVE_ZLIB
    if (want_gzip)
        snprintf(tmp, sizeof(tmp), "%s%s", workbase, gzext);
    else
#endif
        snprintf(tmp, sizeof(tmp), "%s%s", workbase, ext);

	set_merge_OutputFileName (TRACE_FILENAME, tmp);

    snprintf(tmp, sizeof(tmp), "%s.pcf", workbase);
    set_merge_OutputFileName (PCF_FILENAME, tmp);
	snprintf(tmp, sizeof(tmp), "%s.row", workbase);
	set_merge_OutputFileName (ROW_FILENAME, tmp);
	set_option_merge_OutputIsGzip(want_gzip);

#if defined(IS_BG_MACHINE)
#if defined(DEAD_CODE)
    snprintf(tmp, sizeof(tmp), "%s.crd", workbase);
    set_merge_OutputFileName (CRD_FILENAME, tmp);
#endif 
#endif
}
