src/grid/config.h
#include <stdlib.h>
#include <stdio.h>
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <stdarg.h>
#include <string.h>
#include <float.h>
#include <limits.h>
#include <math.h>
#include <time.h>
#include <sys/time.h>
#include <sys/resource.h>
#if NVTX
@include <nvtx3/nvToolsExt.h>
#endif
@define unmap(x,y)
@define trash(x) // data trashing is disabled by default. Turn it on with
// -DTRASH=1
auto macro2 BEGIN_FOREACH() {{...}}
#if _OPENMP
@ include <omp.h>
@ define OMP(x) Pragma(#x)
macro OMP_SERIAL()
{
@undef OMP
@define OMP(x)
; // necessary so that the preproc above is included
{...}
@undef OMP
@define OMP(x) Pragma(#x)
; // necessary so that the preproc above is included
}
#elif _MPI
@ define OMP(x)
macro OMP_SERIAL() {{...}}
@ include <mpi.h>
static int mpi_rank, mpi_npe;
@ define tid() mpi_rank
@ define pid() mpi_rank
@ define npe() mpi_npe
#else // not MPI, not OpenMP
@ define OMP(x)
macro OMP_SERIAL() {{...}}
#endif // _MPI
#if _CADNA
# include <cadna.h>
#endif // CADNA
#if __cplusplus
# define delete delete_qcc
# define right right_qcc
# define left left_qcc
# define norm norm_qcc
# define new new_qcc
#endif // _cplusplus
@define _NVARMAX 65536
@define is_constant(v) ((v).i >= _NVARMAX)
@define constant(v) (is_constant(v) ? _constant[(v).i - _NVARMAX] : nodata)
@define systderr stderr
@define systdout stdout
#if _MPI
FILE * qstderr (void);
FILE * qstdout (void);
FILE * ferr = NULL, * fout = NULL;
@ def not_mpi_compatible()
do {
if (npe() > 1) {
fprintf (ferr, "%s() is not compatible with MPI (yet)\n", __func__);
exit (1);
}
} while(0)
@
@ define system(command) (pid() == 0 ? system(command) : 0)
#else
@ define qstderr() stderr
@ define qstdout() stdout
@ define ferr stderr
@ define fout stdout
@ define not_mpi_compatible()
#endif
// redirect assert
static inline void qassert (const char * file, int line, const char * cond) {
fprintf (stderr, "%s:%d: Assertion `%s' failed.\n", file, line, cond);
abort();
}
#undef assert
#ifndef LINENO
# define LINENO __LINE__
#endif
#define assert(a) if (!(a)) qassert (__FILE__, LINENO, #a)
// Memory tracing
@define sysmalloc malloc
@define syscalloc calloc
@define sysrealloc realloc
@define sysfree free
@define systrdup strdup
#if MTRACE
# include "mtrace.h"
#else // !MTRACE
@ define pmalloc(s,func,file,line) malloc(s)
@ define pcalloc(n,s,func,file,line) calloc(n,s)
@ define prealloc(p,s,func,file,line) realloc(p,s)
@ define pfree(p,func,file,line) free(p)
@ define pstrdup(s,func,file,line) strdup(s)
#endif // !MTRACE
#define qrealloc(p, size, type) p = (type *) realloc (p, (size)*sizeof(type))
#define qmalloc(size, type) ((type *) malloc ((size)*sizeof(type)))
#define qcalloc(size, type) ((type *) calloc (size, sizeof(type)))
#include "array.h"
// Function tracing
#if TRACE == 1 // with Extrae library
#include <extrae_user_events.h>
typedef struct {
Array index, stack;
extrae_type_t type;
} Trace;
Trace trace_func = {
{NULL, 0, 0}, {NULL, 0, 0},
60000010,
};
Trace trace_mpi_func = {
{NULL, 0, 0}, {NULL, 0, 0},
60000011,
};
static int lookup_func (Array * a, const char * func)
{
for (int i = 0; i < a->len/sizeof(char *); i++) {
char * s = ((char **)a->p)[i];
if (!strcmp (func, s))
return i + 1;
}
char * s = strdup (func);
array_append (a, &s, sizeof(char *));
return a->len;
}
static void trace_push (Trace * t, const char * func)
{
int value = lookup_func (&t->index, func);
Extrae_eventandcounters (t->type, value);
array_append (&t->stack, &value, sizeof(int));
}
static void trace_pop (Trace * t, const char * func)
{
assert (t->stack.len > 0);
t->stack.len -= sizeof(int);
int value = t->stack.len > 0 ?
((int *)t->stack.p)[t->stack.len/sizeof(int) - 1] : 0;
Extrae_eventandcounters (t->type, value);
}
static void trace_define (Trace * t, char * description)
{
if (t->index.len > 0) {
extrae_value_t values[t->index.len/sizeof(char *) + 1];
char * names[t->index.len/sizeof(char *) + 1],
** func = (char **) t->index.p;
names[0] = "OTHER";
values[0] = 0;
unsigned len = 1;
for (int i = 0; i < t->index.len/sizeof(char *); i++, func++) {
names[len] = *func;
values[len++] = i + 1;
}
Extrae_define_event_type (&t->type, description, &len, values, names);
}
}
static void trace_free (Trace * t)
{
char ** func = (char **) t->index.p;
for (int i = 0; i < t->index.len/sizeof(char *); i++, func++)
free (*func);
free (t->index.p);
free (t->stack.p);
}
static void trace_off()
{
trace_define (&trace_func, "Basilisk functions");
trace_define (&trace_mpi_func, "Basilisk functions (MPI-related)");
trace_free (&trace_func);
trace_free (&trace_mpi_func);
}
#if 0
#define TRACE_TYPE(func) (strncmp (func, "mpi_", 4) ? \
&trace_func : &trace_func)
#else
#define TRACE_TYPE(func) &trace_func
#endif
@ define tracing(func, file, line) trace_push (TRACE_TYPE(func), func)
@ define end_tracing(func, file, line) trace_pop (TRACE_TYPE(func), func)
#elif TRACE // built-in function tracing
typedef struct {
char * func, * file;
int line, calls;
double total, self;
#if _MPI
double min, max;
#endif // _MPI
} TraceIndex;
struct {
Array stack, index;
double t0;
} Trace = {
{NULL, 0, 0}, {NULL, 0, 0},
-1
};
static void trace_add (const char * func, const char * file, int line,
double total, double self)
{
TraceIndex * t = (TraceIndex *) Trace.index.p;
int i, len = Trace.index.len/sizeof(TraceIndex);
for (i = 0; i < len; i++, t++)
if (t->line == line && !strcmp (func, t->func) && !strcmp (file, t->file))
break;
if (i == len) {
TraceIndex t = {strdup(func), strdup(file), line, 1, total, self};
array_append (&Trace.index, &t, sizeof(TraceIndex));
}
else
t->calls++, t->total += total, t->self += self;
}
static void tracing (const char * func, const char * file, int line)
{
struct timeval tv;
gettimeofday (&tv, NULL);
if (Trace.t0 < 0)
Trace.t0 = tv.tv_sec + tv.tv_usec/1e6;
double t[2] = {(tv.tv_sec - Trace.t0) + tv.tv_usec/1e6, 0.};
array_append (&Trace.stack, t, 2*sizeof(double));
#if 0
fprintf (stderr, "trace %s:%s:%d t: %g sum: %g\n",
func, file, line, t[0], t[1]);
#endif
#if NVTX
nvtxRangePush (func);
#endif
}
static void end_tracing (const char * func, const char * file, int line)
{
struct timeval tv;
gettimeofday (&tv, NULL);
double te = (tv.tv_sec - Trace.t0) + tv.tv_usec/1e6;
double * t = (double *) Trace.stack.p;
assert (Trace.stack.len >= 2*sizeof(double));
t += Trace.stack.len/sizeof(double) - 2;
Trace.stack.len -= 2*sizeof(double);
double dt = te - t[0];
#if 0
fprintf (stderr, "end trace %s:%s:%d ts: %g te: %g dt: %g sum: %g\n",
func, file, line, t[0], te, dt, t[1]);
#endif
trace_add (func, file, line, dt, dt - t[1]);
if (Trace.stack.len >= 2*sizeof(double)) {
t -= 2;
t[1] += dt;
}
#if NVTX
nvtxRangePop();
#endif
}
static int compar_self (const void * p1, const void * p2)
{
const TraceIndex * t1 = p1, * t2 = p2;
return t1->self < t2->self;
}
#if _MPI
static int compar_func (const void * p1, const void * p2)
{
const TraceIndex * t1 = p1, * t2 = p2;
if (t1->line != t2->line)
return t1->line < t2->line;
return strcmp (t1->file, t2->file);
}
#endif
void trace_print (FILE * fp, double threshold)
{
int i, len = Trace.index.len/sizeof(TraceIndex);
double total = 0.;
TraceIndex * t;
Array * index = array_new();
for (i = 0, t = (TraceIndex *) Trace.index.p; i < len; i++, t++)
array_append (index, t, sizeof(TraceIndex)), total += t->self;
#if _MPI
qsort (index->p, len, sizeof(TraceIndex), compar_func);
double tot[len], self[len], min[len], max[len];
for (i = 0, t = (TraceIndex *) index->p; i < len; i++, t++)
tot[i] = t->total, self[i] = t->self;
MPI_Reduce (self, min, len, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);
MPI_Reduce (self, max, len, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
MPI_Reduce (pid() ? self : MPI_IN_PLACE,
self, len, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
MPI_Reduce (pid() ? tot : MPI_IN_PLACE,
tot, len, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
total = 0.;
for (i = 0, t = (TraceIndex *) index->p; i < len; i++, t++)
t->total = tot[i]/npe(), t->self = self[i]/npe(),
t->max = max[i], t->min = min[i], total += t->self;
#endif // _MPI
qsort (index->p, len, sizeof(TraceIndex), compar_self);
fprintf (fp, " calls total self %% total function\n");
for (i = 0, t = (TraceIndex *) index->p; i < len; i++, t++)
if (t->self*100./total > threshold) {
fprintf (fp, "%8d %6.2f %6.2f %4.1f%%",
t->calls, t->total, t->self, t->self*100./total);
#if _MPI
fprintf (fp, " (%4.1f%% - %4.1f%%)", t->min*100./total, t->max*100./total);
#endif
fprintf (fp, " %s():%s:%d\n", t->func, t->file, t->line);
}
fflush (fp);
array_free (index);
for (i = 0, t = (TraceIndex *) Trace.index.p; i < len; i++, t++)
t->calls = t->total = t->self = 0.;
}
static void trace_off()
{
trace_print (fout, 0.);
int i, len = Trace.index.len/sizeof(TraceIndex);
TraceIndex * t;
for (i = 0, t = (TraceIndex *) Trace.index.p; i < len; i++, t++)
free (t->func), free (t->file);
free (Trace.index.p);
Trace.index.p = NULL;
Trace.index.len = Trace.index.max = 0;
free (Trace.stack.p);
Trace.stack.p = NULL;
Trace.stack.len = Trace.stack.max = 0;
}
#else // disable tracing
@ define tracing(...)
@ define end_tracing(...)
#endif
// OpenMP / MPI
#if _OPENMP
@define tid() omp_get_thread_num()
@define pid() 0
@define npe() omp_get_num_threads()
@define mpi_all_reduce(v,type,op)
@define mpi_all_reduce_array(v,type,op,elem)
@define mpi_sum_reduce_init(s,v,type,op,elem)
@define mpi_sum_reduce_array(s,v,type,op,elem)
#elif _MPI
static bool in_prof = false;
static double prof_start, _prof;
@def prof_start(name)
assert (!in_prof); in_prof = true;
prof_start = MPI_Wtime();
@
@def prof_stop()
assert (in_prof); in_prof = false;
_prof = MPI_Wtime();
mpi_time += _prof - prof_start;
@
#if FAKE_MPI
@define mpi_all_reduce(v,type,op)
@define mpi_all_reduce_array(v,type,op,elem)
@define mpi_sum_reduce_init(s,v,type,op,elem)
@define mpi_sum_reduce_array(s,v,type,op,elem)
#else // !FAKE_MPI
trace
int mpi_all_reduce0 (void *sendbuf, void *recvbuf, int count,
MPI_Datatype datatype, MPI_Op op, MPI_Comm comm)
{
return MPI_Allreduce (sendbuf, recvbuf, count, datatype, op, comm);
}
@def mpi_all_reduce(v,type,op) {
prof_start ("mpi_all_reduce");
union { int a; float b; double c;} global;
mpi_all_reduce0 (&(v), &global, 1, type, op, MPI_COMM_WORLD);
memcpy (&(v), &global, sizeof (v));
prof_stop();
}
@
static size_t mpi_data_type_size (MPI_Datatype datatype)
{
if (datatype == MPI_DOUBLE) return sizeof (double);
else if (datatype == MPI_INT) return sizeof (int);
else if (datatype == MPI_LONG) return sizeof (long);
else if (datatype == MPI_C_BOOL) return sizeof (bool);
else if (datatype == MPI_UNSIGNED_CHAR) return sizeof (unsigned char);
else {
fprintf (stderr, "unknown reduction type\n");
fflush (stderr);
abort();
}
return 0;
}
trace
void mpi_all_reduce_array (void * v, MPI_Datatype datatype, MPI_Op op, int elem)
{
size_t size = mpi_data_type_size (datatype);
void * global = malloc (elem*size), * tmp = malloc (elem*size);
memcpy (tmp, v, elem*size);
mpi_all_reduce0 (tmp, global, elem, datatype, op, MPI_COMM_WORLD);
memcpy (v, global, elem*size);
free (global), free (tmp);
}
void * mpi_sum_reduce_init0 (void * v, MPI_Datatype datatype, MPI_Op op, int elem)
{
size_t size = mpi_data_type_size (datatype);
void * s = malloc (elem*size);
memcpy (s, v, elem*size);
memset (v, 0, elem*size);
return s;
}
@define mpi_sum_reduce_init(s,v,datatype,op,elem) void * s = mpi_sum_reduce_init0(v,datatype,op,elem)
trace
void mpi_sum_reduce_array (void * s, void * v, MPI_Datatype datatype, MPI_Op op, int elem)
{
mpi_all_reduce_array (v, datatype, op, elem);
if (datatype == MPI_DOUBLE)
for (double * i = v, * j = s; elem; elem--, i++, j++)
*i += *j;
else if (datatype == MPI_INT)
for (int * i = v, * j = s; elem; elem--, i++, j++)
*i += *j;
else if (datatype == MPI_LONG)
for (long * i = v, * j = s; elem; elem--, i++, j++)
*i += *j;
else if (datatype == MPI_C_BOOL)
for (bool * i = v, * j = s; elem; elem--, i++, j++)
*i += *j;
else if (datatype == MPI_UNSIGNED_CHAR)
for (unsigned char * i = v, * j = s; elem; elem--, i++, j++)
*i += *j;
free (s);
}
#endif // !FAKE_MPI
@define QFILE FILE // a dirty trick to avoid qcc 'static FILE *' rule
FILE * qstderr (void)
{
static QFILE * fp = NULL;
if (!fp) {
if (mpi_rank > 0) {
char name[80];
sprintf (name, "log-%d", mpi_rank);
fp = fopen (name, "w");
}
else
fp = systderr;
}
return fp;
}
FILE * qstdout (void)
{
static QFILE * fp = NULL;
if (!fp) {
if (mpi_rank > 0) {
char name[80];
sprintf (name, "out-%d", mpi_rank);
fp = fopen (name, "w");
}
else
fp = systdout;
}
return fp;
}
static void finalize (void)
{
MPI_Finalize();
}
void mpi_init()
{
int initialized;
MPI_Initialized (&initialized);
if (!initialized) {
MPI_Init (NULL, NULL);
MPI_Comm_set_errhandler (MPI_COMM_WORLD, MPI_ERRORS_ARE_FATAL);
atexit (finalize);
}
MPI_Comm_rank (MPI_COMM_WORLD, &mpi_rank);
MPI_Comm_size (MPI_COMM_WORLD, &mpi_npe);
srand (mpi_rank + 1);
if (ferr == NULL) {
if (mpi_rank > 0) {
ferr = fopen ("/dev/null", "w");
fout = fopen ("/dev/null", "w");
}
else {
ferr = systderr;
fout = systdout;
}
char * etrace = getenv ("MALLOC_TRACE"), name[80];
if (etrace && mpi_rank > 0) {
sprintf (name, "%s-%d", etrace, mpi_rank);
setenv ("MALLOC_TRACE", name, 1);
}
#if MTRACE == 1
etrace = getenv ("MTRACE");
if (!etrace)
etrace = "mtrace";
if (mpi_rank > 0) {
sprintf (name, "%s-%d", etrace, mpi_rank);
pmtrace.fp = fopen (name, "w");
pmtrace.fname = systrdup(name);
}
else {
pmtrace.fp = fopen (etrace, "w");
pmtrace.fname = systrdup(etrace);
}
#endif
}
}
#else // not MPI, not OpenMP
@define tid() 0
@define pid() 0
@define npe() 1
@define mpi_all_reduce(v,type,op)
@define mpi_all_reduce_array(v,type,op,elem)
@define mpi_sum_reduce_init(s,v,type,op,elem)
@define mpi_sum_reduce_array(s,v,type,op,elem)
#endif // not MPI, not OpenMP
macro2 OMP_PARALLEL() {{...}}
@define OMP_PARALLEL(...) OMP(omp parallel S__VA_ARGS__)
@define NOT_UNUSED(x) (void)(x)
macro2 VARIABLES() { _CATCH; }
@define _index(a,m) (a.i)
@define val(a,k,l,m) data(k,l,m)[_index(a,m)]
double _val_higher_dimension = 0.;
/* undefined value */
/* Initialises unused memory with "signaling NaNs".
* This is probably not very portable, tested with
* gcc (Debian 4.4.5-8) 4.4.5 on Linux 2.6.32-5-amd64.
* This blog was useful:
* http://codingcastles.blogspot.co.nz/2008/12/nans-in-c.html
*/
@if (_GNU_SOURCE || __APPLE__) && !_OPENMP && !_CADNA
double undefined;
@ if __APPLE__
@ include <stdint.h>
@ include "fp_osx.h"
@ endif
@if _GPU
@ define enable_fpe(flags)
@else
@ define enable_fpe(flags) feenableexcept (flags)
@endif
@ define disable_fpe(flags) fedisableexcept (flags)
static void set_fpe (void) {
int64_t lnan = 0x7ff0000000000001;
assert (sizeof (int64_t) == sizeof (double));
memcpy (&undefined, &lnan, sizeof (double));
enable_fpe (FE_DIVBYZERO|FE_INVALID);
}
@else // !((_GNU_SOURCE || __APPLE__) && !_OPENMP && !_CADNA && !_GPU)
@ define undefined ((double) DBL_MAX)
@ define enable_fpe(flags)
@ define disable_fpe(flags)
static void set_fpe (void) {}
@endif
// Pipes
static FILE ** qpopen_pipes = NULL;
FILE * qpopen (const char * command, const char * type)
{
if (pid() > 0)
return fopen ("/dev/null", type);
FILE * fp = popen (command, type);
if (fp) {
FILE ** i = qpopen_pipes;
int n = 0;
while (i && *i) { n++; i++; }
qrealloc (qpopen_pipes, n + 2, FILE *);
qpopen_pipes[n] = fp;
qpopen_pipes[n+1] = NULL;
}
return fp;
}
int qpclose (FILE * fp)
{
if (pid() > 0)
return fclose (fp);
FILE ** i = qpopen_pipes;
while (i && *i) {
if (*i == fp)
*i = (FILE *) 1;
i++;
}
return pclose (fp);
}
static void qpclose_all()
{
FILE ** i = qpopen_pipes;
while (i && *i) {
if (*i != (FILE *) 1)
pclose (*i);
i++;
}
free (qpopen_pipes);
qpopen_pipes = NULL;
}
#define popen qpopen
#define pclose qpclose
// files with pid
FILE * lfopen (const char * name, const char * mode)
{
char fname[80];
sprintf (fname, "%s-%d", name, pid());
return fopen (fname, mode);
}
#include "../ast/symbols.h"
enum typedef_kind_t {
sym_SCALAR = sym_root + 1,
sym_VECTOR,
sym_TENSOR,
sym_COORD,
sym__COORD,
sym_VEC4,
sym_IVEC
};
@define attroffset(x) (offsetof(_Attributes,x))These are placeholders for internally-defined macros.
typedef int Reduce;
macro2 foreach_face (char flags = 0, Reduce reductions = None,
const char * order = "xyz")
{{...}}
macro2 einstein_sum() {{...}}
macro2 diagonalize (int a) {{...}}Macros overloaded by the interpreter.
@define dimensional(...)
#define show_dimension(...) show_dimension_internal (__VA_ARGS__ + 10293847566574839201.)
@define show_dimension_internal(...)
@define display_value(...)
@define interpreter_verbosity(...)