src/grid/tree.h

    typedef double real;
    
    #include "mempool.h"
    
    #define TWO_ONE 1 // enforce 2:1 refinement ratio
    #define GHOSTS  2
    
    struct _Point {
      /* the current cell index and level */
      int i;
    #if dimension >= 2
      int j;
    #endif
    #if dimension >= 3
      int k;
    #endif
      int level;
    #if LAYERS
      int l;
      @define _BLOCK_INDEX , point.l
    #else
      @define _BLOCK_INDEX
    #endif
    };
    
    #include "memindex/range.h"
    
    #if LAYERS
    # include "grid/layers.h"
    #endif
    
    /* By default only one layer of ghost cells is used on the boundary to
       optimise the cost of boundary conditions. */
    
    #ifndef BGHOSTS
    @ define BGHOSTS 1
    #endif
    
    #define _I     (point.i - GHOSTS)
    #if dimension >= 2
    # define _J    (point.j - GHOSTS)
    #endif
    #if dimension >= 3
    # define _K    (point.k - GHOSTS)
    #endif
    #define _DELTA (1./(1 << point.level))
    
    typedef struct {
      unsigned short flags;
      // number of refined neighbors in a 3^dimension neighborhood
      unsigned short neighbors;
      int pid; // process id
    } Cell;
    
    enum {
      active    = 1 << 0,
      leaf      = 1 << 1,
      border    = 1 << 2,
      vertex    = 1 << 3,
      user      = 4,
    
      face_x = 1 << 0
    #if dimension >= 2
      , face_y = 1 << 1
    #endif
    #if dimension >= 3
      , face_z = 1 << 2
    #endif
    };
    
    @define is_active(cell)  ((cell).flags & active)
    @define is_leaf(cell)    ((cell).flags & leaf)
    @define is_coarse()      ((cell).neighbors > 0)
    @define is_border(cell)  ((cell).flags & border)
    @define is_local(cell)   ((cell).pid == pid())
    @define is_vertex(cell)  ((cell).flags & vertex)
    
    // Caches
    
    typedef struct {
      int i;
    #if dimension >= 2
      int j;
    #endif
    #if dimension >= 3
      int k;
    #endif  
    } IndexLevel;
    
    typedef struct {
      IndexLevel * p;
      int n, nm;
    } CacheLevel;
    
    typedef struct {
      int i;
    #if dimension >= 2
      int j;
    #endif
    #if dimension >= 3
      int k;
    #endif  
      int level, flags;
    } Index;
    
    typedef struct {
      Index * p;
      int n, nm;
    } Cache;
    
    // Layer
    
    typedef struct {
      Memindex m; // the structure indexing the data
      Mempool * pool; // the memory pool actually holding the data
      long nc;     // the number of allocated elements
      int len;    // the (1D) size of the array
    } Layer;
    
    static size_t _size (size_t depth)
    {
      return (1 << depth) + 2*GHOSTS;
    }
    
    static size_t poolsize (size_t depth, size_t size)
    {
      // the maximum amount of data at a given level
    #if dimension == 1
      return _size(depth)*size;  
    #elif dimension == 2
      return sq(_size(depth))*size;
    #else
      return cube(_size(depth))*size;
    #endif
    }
    
    static Layer * new_layer (int depth)
    {
      Layer * l = qmalloc (1, Layer);
      l->len = _size (depth);
      if (depth == 0)
        l->pool = NULL; // the root layer does not use a pool
      else {
        size_t size = sizeof(Cell) + datasize;
        // the block size is 2^dimension*size because we allocate
        // 2^dimension children at a time
        l->pool = mempool_new (poolsize (depth, size), (1 << dimension)*size);
      }
      l->m = mem_new (l->len);
      l->nc = 0;
      return l;
    }
    
    static void destroy_layer (Layer * l)
    {
      if (l->pool)
        mempool_destroy (l->pool);
      mem_destroy (l->m, l->len);
      free (l);
    }
    
    // Tree
    
    typedef struct {
      Grid g;
      Layer ** L; /* the grids at each level */
    
      Cache        leaves;   /* leaf indices */
      Cache        faces;    /* face indices */
      Cache        vertices; /* vertex indices */
      Cache        refined;  /* refined cells */
      CacheLevel * active;   /* active cells indices for each level */
      CacheLevel * prolongation; /* halo prolongation indices for each level */
      CacheLevel * boundary;  /* boundary indices for each level */
      /* indices of boundary cells with non-boundary parents */
      CacheLevel * restriction;
      
      bool dirty;       /* whether caches should be updated */
    } Tree;
    
    #define tree ((Tree *)grid)
    
    static Point last_point;
    
    #define BSIZE 128
    
    static void cache_level_append (CacheLevel * c, Point p)
    {
      if (c->n >= c->nm) {
        c->nm += BSIZE;
        qrealloc (c->p, c->nm, IndexLevel);
      }
      c->p[c->n].i = p.i;
    #if dimension >= 2
      c->p[c->n].j = p.j;
    #endif
    #if dimension >= 3
      c->p[c->n].k = p.k;
    #endif
      c->n++;
    }
    
    static void cache_level_shrink (CacheLevel * c)
    {
      if (c->nm > (c->n/BSIZE + 1)*BSIZE) {
        c->nm = (c->n/BSIZE + 1)*BSIZE;
        assert (c->nm > c->n);
        c->p = (IndexLevel *) realloc (c->p, sizeof (Index)*c->nm);
      }
    }
    
    static void cache_append (Cache * c, Point p, unsigned short flags)
    {
      if (c->n >= c->nm) {
        c->nm += BSIZE;
        qrealloc (c->p, c->nm, Index);
      }
      c->p[c->n].i = p.i;
    #if dimension >= 2
      c->p[c->n].j = p.j;
    #endif
    #if dimension >= 3
      c->p[c->n].k = p.k;
    #endif  
      c->p[c->n].level = p.level;
      c->p[c->n].flags = flags;
      c->n++;
    }
    
    void cache_shrink (Cache * c)
    {
      cache_level_shrink ((CacheLevel *)c);
    }
    
    #undef BSIZE
    
    /* low-level memory management */
    #if dimension == 1
    @define allocated(k,l,n) (mem_allocated (tree->L[point.level]->m, point.i+k))
    @define NEIGHBOR(k,l,n)	(mem_data (tree->L[point.level]->m, point.i+k))
    @def PARENT(k,l,n) (mem_data (tree->L[point.level-1]->m,
    			      (point.i+GHOSTS)/2+k))
    @
    @def allocated_child(k,l,n) (level < depth() &&
    			     mem_allocated(tree->L[point.level+1]->m,
    					   2*point.i-GHOSTS+k))
    @
    @define CHILD(k,l,n)  (mem_data (tree->L[point.level+1]->m, 2*point.i-GHOSTS+k))
    #elif dimension == 2
    @def allocated(k,l,n) (mem_allocated (tree->L[point.level]->m,
    				      point.i+k, point.j+l))
    @
    @def NEIGHBOR(k,l,n)	(mem_data (tree->L[point.level]->m,
    				   point.i+k, point.j+l))
    @
    @def PARENT(k,l,n) (mem_data (tree->L[point.level-1]->m,
    			      (point.i+GHOSTS)/2+k, (point.j+GHOSTS)/2+l))
    @
    @def allocated_child(k,l,n)  (level < depth() &&
    			      mem_allocated (tree->L[point.level+1]->m,
    					     2*point.i-GHOSTS+k,
    					     2*point.j-GHOSTS+l))
    @
    @def CHILD(k,l,n)  (mem_data (tree->L[point.level+1]->m,
    			      2*point.i-GHOSTS+k, 2*point.j-GHOSTS+l))
    @
    #else // dimension == 3
    @def allocated(a,l,n) (mem_allocated (tree->L[point.level]->m,
    				      point.i+a, point.j+l, point.k+n))
    @
    @def NEIGHBOR(a,l,n)	(mem_data (tree->L[point.level]->m,
    				   point.i+a, point.j+l, point.k+n))
    @			
    @def PARENT(a,l,n) (mem_data (tree->L[point.level-1]->m,
    			      (point.i+GHOSTS)/2+a,
    			      (point.j+GHOSTS)/2+l,
    			      (point.k+GHOSTS)/2+n))
    @
    @def allocated_child(a,l,n)  (level < depth() &&
    			      mem_allocated (tree->L[point.level+1]->m,
    					     2*point.i-GHOSTS+a,
    					     2*point.j-GHOSTS+l,
    					     2*point.k-GHOSTS+n))
    @
    @def CHILD(a,l,n)  (mem_data (tree->L[point.level+1]->m,
    			      2*point.i-GHOSTS+a,
    			      2*point.j-GHOSTS+l,
    			      2*point.k-GHOSTS+n))
    @
    #endif // dimension == 3
    @define CELL(m) (*((Cell *)(m)))
    
    /***** Multigrid macros *****/
    @define depth()        (grid->depth)
    @define aparent(k,l,n) CELL(PARENT(k,l,n))
    @define child(k,l,n)   CELL(CHILD(k,l,n))
    
    /***** Tree macros ****/
    @define cell		 CELL(NEIGHBOR(0,0,0))
    @define neighbor(k,l,n)	 CELL(NEIGHBOR(k,l,n))
    @def neighborp(l,m,n) (Point) {
        point.i + l,
    #if dimension >= 2
        point.j + m,
    #endif
    #if dimension >= 3
        point.k + n,
    #endif
        point.level
        _BLOCK_INDEX
    }
    @
    
    /***** Data macros *****/
    @define data(k,l,n)     ((double *) (NEIGHBOR(k,l,n) + sizeof(Cell)))
    @define fine(a,k,p,n)   ((double *) (CHILD(k,p,n) + sizeof(Cell)))[_index(a,n)]
    @define coarse(a,k,p,n) ((double *) (PARENT(k,p,n) + sizeof(Cell)))[_index(a,n)]
    
    macro POINT_VARIABLES (Point point = point) {
      VARIABLES();
      int level = point.level; NOT_UNUSED(level);
    #if dimension == 1
      struct { int x; } child = { 2*((point.i+GHOSTS)%2)-1 };
    #elif dimension == 2
      struct { int x, y; } child = {
        2*((point.i+GHOSTS)%2)-1, 2*((point.j+GHOSTS)%2)-1
      };
    #else
      struct { int x, y, z; } child = {
        2*((point.i+GHOSTS)%2)-1, 2*((point.j+GHOSTS)%2)-1, 2*((point.k+GHOSTS)%2)-1
      };
    #endif
      NOT_UNUSED(child);
      Point parent = point;	NOT_UNUSED(parent);
      parent.level--;
      parent.i = (point.i + GHOSTS)/2;
    #if dimension >= 2
      parent.j = (point.j + GHOSTS)/2;
    #endif
    #if dimension >= 3
      parent.k = (point.k + GHOSTS)/2;
    #endif
    #if TRASH
      Cell * cellp = point.level <= depth() && allocated(0,0,0) ?
        (Cell *) NEIGHBOR(0,0,0) : NULL;
      NOT_UNUSED(cellp);
    #endif
    }
    
    #include "foreach_cell.h"
    
    #if dimension == 1
    macro1 foreach_child (Point point = point, break = (_k = 2)) {
      {
        int _i = 2*point.i - GHOSTS;
        point.level++;
        for (int _k = 0; _k < 2; _k++) {
          point.i = _i + _k;
          POINT_VARIABLES();
          {...}
        }
        point.i = (_i + GHOSTS)/2;
        point.level--;
      }
    }
    #elif dimension == 2
    macro1 foreach_child (Point point = point, break = (_k = _l = 2)) {
      {
        int _i = 2*point.i - GHOSTS, _j = 2*point.j - GHOSTS;
        point.level++;
        for (int _k = 0; _k < 2; _k++) {
          point.i = _i + _k;
          for (int _l = 0; _l < 2; _l++) {
    	point.j = _j + _l;
    	POINT_VARIABLES();
    	{...}
          }
        }
        point.i = (_i + GHOSTS)/2; point.j = (_j + GHOSTS)/2;
        point.level--;
      }
    }
    #else // dimension == 3
    macro1 foreach_child (Point point = point, break = (_l = _m = _n = 2)) {
      {
        int _i = 2*point.i - GHOSTS, _j = 2*point.j - GHOSTS, _k = 2*point.k - GHOSTS;
        point.level++;
        for (int _l = 0; _l < 2; _l++) {
          point.i = _i + _l;
          for (int _m = 0; _m < 2; _m++) {
    	point.j = _j + _m;
    	for (int _n = 0; _n < 2; _n++) {
    	  point.k = _k + _n;
    	  POINT_VARIABLES();
    	  {...}
    	}
          }
        }
        point.i = (_i + GHOSTS)/2;point.j = (_j + GHOSTS)/2;point.k = (_k + GHOSTS)/2;
        point.level--;
      }
    }
    #endif // dimension == 3
      
    #define update_cache() { if (tree->dirty) update_cache_f(); }
    
    #define is_refined(cell)      (!is_leaf (cell) && cell.neighbors && cell.pid >= 0)
    #define is_prolongation(cell) (!is_leaf(cell) && !cell.neighbors && cell.pid >= 0)
    #define is_boundary(cell)     (cell.pid < 0)
      
    @def is_refined_check() (is_refined(cell) &&
    			 point.i > 0 && point.i < (1 << level) + 2*GHOSTS - 1
    #if dimension > 1
    			 && point.j > 0 && point.j < (1 << level) + 2*GHOSTS - 1
    #endif
    #if dimension > 2
    			 && point.k > 0 && point.k < (1 << level) + 2*GHOSTS - 1
    #endif
    			 )
    @
    
    macro2 foreach_cache (Cache cache, Reduce reductions = None)
    {
      OMP_PARALLEL (reductions) {
        int ig = 0, jg = 0, kg = 0; NOT_UNUSED(ig); NOT_UNUSED(jg); NOT_UNUSED(kg);
        Point point = {0}; NOT_UNUSED (point);
        point.i = GHOSTS;
    #if dimension > 1
        point.j = GHOSTS;
    #endif
    #if dimension > 2
        point.k = GHOSTS;
    #endif
        int _k; unsigned short _flags; NOT_UNUSED(_flags);
        OMP(omp for schedule(static))
          for (_k = 0; _k < cache.n; _k++) {
    	point.i = cache.p[_k].i;
    #if dimension >= 2
    	point.j = cache.p[_k].j;
    #endif
    #if dimension >= 3
    	point.k = cache.p[_k].k;
    #endif
    	point.level = cache.p[_k].level;
    	_flags = cache.p[_k].flags;
    	{...}
          }
      }
    }
    
    macro2 foreach_cache_level (Cache cache, int _l, Reduce reductions = None)
    {
      OMP_PARALLEL (reductions) {
        int ig = 0, jg = 0, kg = 0; NOT_UNUSED(ig); NOT_UNUSED(jg); NOT_UNUSED(kg);
        Point point = {0}; NOT_UNUSED (point);
        point.i = GHOSTS;
    #if dimension > 1
        point.j = GHOSTS;
    #endif
    #if dimension > 2
        point.k = GHOSTS;
    #endif
        point.level = _l;
        int _k;
        OMP(omp for schedule(static))
          for (_k = 0; _k < cache.n; _k++) {
    	point.i = cache.p[_k].i;
    #if dimension >= 2
    	point.j = cache.p[_k].j;
    #endif
    #if dimension >= 3
    	point.k = cache.p[_k].k;
    #endif
    	{...}
          }
      }
    }
    
    static void update_cache_f (void);
    
    macro2 foreach_boundary_level (int _l, Reduce reductions = None)
    {
      if (_l <= depth()) {
        update_cache();
        CacheLevel _boundary = tree->boundary[_l];
        foreach_cache_level (_boundary, _l, reductions)
          {...}
      }
    }
    
    #define bid(cell) (- cell.pid - 1)
    
    macro2 foreach_halo_prolongation (int _l)
    {
      if (_l <= depth()) {
        update_cache();
        CacheLevel _cache = tree->prolongation[_l];
        foreach_cache_level (_cache, _l)
          {...}
      }
    }
    
    macro2 foreach_halo_restriction (int _l)
    {
      if (_l <= depth()) {
        update_cache();
        CacheLevel _cache = tree->restriction[_l];
        foreach_cache_level (_cache, _l)
          {...}
      }
    }
    
    #define foreach_halo(type, l) foreach_halo_##type(l)
    
    #include "neighbors.h"
    
    static inline bool has_local_children (Point point)
    {
      foreach_child()
        if (is_local(cell))
          return true;
      return false;
    }
    
    static inline void cache_append_face (Point point, unsigned short flags)
    {
      Tree * q = tree;
      cache_append (&q->faces, point, flags);
    #if dimension == 2
      if (!is_vertex(cell)) {
        cache_append (&q->vertices, point, 0);
        cell.flags |= vertex;
      }
      foreach_dimension()
        if ((flags & face_y) && !is_vertex(neighbor(1))) {
          cache_append (&q->vertices, neighborp(1), 0);
          neighbor(1).flags |= vertex;
        }
    #elif dimension == 3
      foreach_dimension()
        if (flags & face_x)
          for (int i = 0; i <= 1; i++)
    	for (int j = 0; j <= 1; j++)
    	  if (!is_vertex(neighbor(0,i,j))) {
    	    cache_append (&q->vertices, neighborp(0,i,j), 0);
    	    neighbor(0,i,j).flags |= vertex;
    	  }
    #endif
    }
    
    #define FBOUNDARY 1 // fixme: this should work with zero
    
    static void update_cache_f (void)
    {
      Tree * q = tree;
    
      foreach_cache (q->vertices)
        if (level <= depth() && allocated(0))
          cell.flags &= ~vertex;
      
      /* empty caches */
      q->leaves.n = q->faces.n = q->vertices.n = 0;
      for (int l = 0; l <= depth(); l++)
        q->active[l].n = q->prolongation[l].n =
          q->boundary[l].n = q->restriction[l].n = 0;
    #if FBOUNDARY
      const unsigned short fboundary = 1 << user;
      foreach_cell() {
    #else    
      foreach_cell_all() {
    #endif
        if (is_local(cell) && is_active(cell)) {
          // active cells
          //      assert (is_active(cell));
          cache_level_append (&q->active[level], point);
        }
    #if !FBOUNDARY
        if (is_boundary(cell)) {
          // boundary conditions
          bool has_neighbors = false;
          foreach_neighbor (BGHOSTS)
    	if (allocated(0) && !is_boundary(cell)) {
    	  has_neighbors = true; break;
    	}
          if (has_neighbors)
    	cache_level_append (&q->boundary[level], point);
          // restriction for masked cells
          if (level > 0 && is_local(aparent(0)))
    	cache_level_append (&q->restriction[level], point);
        }
    #else
        // boundaries
        if (!is_boundary(cell)) {
          // look in a 5x5 neighborhood for boundary cells
          foreach_neighbor (BGHOSTS)
    	if (allocated(0) && is_boundary(cell) && !(cell.flags & fboundary)) {
    	  cache_level_append (&q->boundary[level], point);
    	  cell.flags |= fboundary;
    	}
        }
        // restriction for masked cells
        else if (level > 0 && is_local(aparent(0)))
          cache_level_append (&q->restriction[level], point);
    #endif
        if (is_leaf (cell)) {
          if (is_local(cell)) {
    	cache_append (&q->leaves, point, 0);
    	// faces
    	unsigned short flags = 0;
    	foreach_dimension()
    	  if (is_boundary(neighbor(-1)) || is_prolongation(neighbor(-1)) ||
    	      is_leaf(neighbor(-1)))
    	    flags |= face_x;
    	if (flags)
    	  cache_append (&q->faces, point, flags);
    	foreach_dimension()
    	  if (is_boundary(neighbor(1)) || is_prolongation(neighbor(1)) ||
    	      (!is_local(neighbor(1)) && is_leaf(neighbor(1))))
    	    cache_append (&q->faces, neighborp(1), face_x);
    	// vertices
    	for (int i = 0; i <= 1; i++)
            #if dimension >= 2
    	  for (int j = 0; j <= 1; j++)
            #endif
              #if dimension >= 3
    	    for (int k = 0; k <= 1; k++)
    	  #endif
    	      if (!is_vertex(neighbor(i,j,k))) {
    		cache_append (&q->vertices, neighborp(i,j,k), 0);
    		neighbor(i,j,k).flags |= vertex;
    	      }
    	// halo prolongation
            if (cell.neighbors > 0)
    	  cache_level_append (&q->prolongation[level], point);
          }
          else if (!is_boundary(cell) || is_local(aparent(0))) { // non-local
    	// faces
    	unsigned short flags = 0;
    	foreach_dimension()
    	  if (allocated(-1) &&
    	      is_local(neighbor(-1)) && is_prolongation(neighbor(-1)))
    	    flags |= face_x;
    	if (flags)
    	  cache_append_face (point, flags);
    	foreach_dimension()
    	  if (allocated(1) && is_local(neighbor(1)) &&
    	      is_prolongation(neighbor(1)))
    	    cache_append_face (neighborp(1), face_x);
          }
    #if FBOUNDARY // fixme: this should always be included
          continue; 
    #endif
        }
      }
    
      /* optimize caches */
      cache_shrink (&q->leaves);
      cache_shrink (&q->faces);
      cache_shrink (&q->vertices);
      for (int l = 0; l <= depth(); l++) {
        cache_level_shrink (&q->active[l]);
        cache_level_shrink (&q->prolongation[l]);
        cache_level_shrink (&q->boundary[l]);
        cache_level_shrink (&q->restriction[l]);
    }
      
      q->dirty = false;
    
    #if FBOUNDARY
      for (int l = depth(); l >= 0; l--)
        foreach_boundary_level (l)
          cell.flags &= ~fboundary;
    #endif
      
      // mesh size
      grid->n = q->leaves.n;
      // for MPI the reduction operation over all processes is done by balance()
    @if !_MPI
      grid->tn = grid->n;
      grid->maxdepth = grid->depth;
    @endif
    }
    
    macro2 foreach (char flags = 0, Reduce reductions = None) {
      update_cache();
      foreach_cache (tree->leaves, reductions)
        {...}
    }
    
    macro2 foreach_face_generic (char flags = 0, Reduce reductions = None,
    				const char * order = "xyz")
    {
      update_cache();
      foreach_cache (tree->faces, reductions)
        {...}
    }
    
    macro1 is_face_x (unsigned short _f = _flags) {
      if (_f & face_x) {
        int ig = -1; NOT_UNUSED(ig);
        {...}
      }
    }
    
    #if dimension >= 2
    macro1 is_face_y (unsigned short _f = _flags) {
      if (_f & face_y) {
        int jg = -1; NOT_UNUSED(jg);
        {...}
      }
    }
    #endif
    #if dimension >= 3
    macro1 is_face_z (unsigned short _f = _flags) {
      if (_f & face_z) {
        int kg = -1; NOT_UNUSED(kg);
        {...}
      }
    }
    #endif
    
    #if dimension == 3
    # define foreach_edge(...)			\
      foreach_vertex (__VA_ARGS__)			\
        foreach_dimension()				\
        if (is_vertex(neighbor(1)))
    #else // dimension < 3
    # define foreach_edge(...) foreach_face(y,x,__VA_ARGS__)
    #endif
    
    macro2 foreach_level (int l, char flags = 0, Reduce reductions = None) {
      if (l <= depth()) {
        update_cache();
        CacheLevel _active = tree->active[l];
        foreach_cache_level (_active, l, reductions)
          {...}
      }
    }
    
    macro2 foreach_coarse_level (int l, char flags = 0, Reduce reductions = None) {
      foreach_level(l, flags, reductions)
        if (!is_leaf(cell))
          {...}
    }
    
    macro2 foreach_level_or_leaf (int l, char flags = 0, Reduce reductions = None) {
      for (int _l1 = l; _l1 >= 0; _l1--)
        foreach_level(_l1, flags, reductions)
          if (_l1 == l || is_leaf (cell))
    	{...}
    }
    
    @if TRASH
    @ undef trash
    @ define trash(list) reset(list, undefined)
    @endif
    
    void reset (void * alist, double val)
    {
      scalar * list = (scalar *) alist;
      Tree * q = tree;
      /* low-level memory management */
      for (int l = 0; l <= depth(); l++) {
        Layer * L = q->L[l];
        foreach_mem (L->m, L->len, 1) {
          point.level = l;
          for (scalar s in list) {
    	if (!is_constant(s))
    	  for (int b = 0; b < s.block; b++)
    	    data(0,0,0)[s.i + b] = val;
          }
        }
      }
    }
    
    static CacheLevel * cache_level_resize (CacheLevel * name, int a)
    {
      for (int i = 0; i <= depth() - a; i++)
        free (name[i].p);
      free (name);
      return qcalloc (depth() + 1, CacheLevel);
    }
      
    static void update_depth (int inc)
    {
      Tree * q = tree;
      grid->depth += inc;
      q->L = &(q->L[-1]);
      qrealloc (q->L, grid->depth + 2, Layer *);
      q->L = &(q->L[1]);
      if (inc > 0)
        q->L[grid->depth] = new_layer (grid->depth);
      q->active = cache_level_resize (q->active, inc);
      q->prolongation = cache_level_resize (q->prolongation, inc);
      q->boundary = cache_level_resize (q->boundary, inc);
      q->restriction = cache_level_resize (q->restriction, inc);
    }
    
    #if dimension == 1
    typedef void (* PeriodicFunction) (Memindex, int, int, void *);
    
    static void periodic_function (Memindex m, int i, int len, void * b,
    			       PeriodicFunction f)
    {
      f(m, i, len, b);
      if (Period.x) {
        int nl = len - 2*GHOSTS;
        for (int l = - 1; l <= 1; l += 2)
          for (int n = i + l*nl; n >= 0 && n < len; n += l*nl)
    	f(m, n, len, b);
      }
    }
    			       
    static void assign_periodic (Memindex m, int i, int len, void * b)
    {
      periodic_function (m, i, len, b, mem_assign);
    }
    
    static void free_periodic (Memindex m, int i, int len)
    {
      periodic_function (m, i, len, NULL, (PeriodicFunction) mem_free);
    }
    #elif dimension == 2
    typedef void (* PeriodicFunction) (Memindex, int, int, int, void *);
    
    static void periodic_function (Memindex m, int i, int j, int len, void * b,
    			       PeriodicFunction f)
    {
      f(m, i, j, len, b);
      if (Period.x) {
        int nl = len - 2*GHOSTS;
        for (int l = - 1; l <= 1; l += 2)
          for (int n = i + l*nl; n >= 0 && n < len; n += l*nl)
    	f(m, n, j, len, b);
        if (Period.y)
          for (int l = - 1; l <= 1; l += 2)
    	for (int n = j + l*nl; n >= 0 && n < len; n += l*nl) {
    	  f(m, i, n, len, b);
    	  for (int o = - 1; o <= 1; o += 2)
    	    for (int p = i + o*nl; p >= 0 && p < len; p += o*nl)
    	      f(m, p, n, len, b);
    	}
      }
      else if (Period.y) {
        int nl = len - 2*GHOSTS;
        for (int l = - 1; l <= 1; l += 2)
          for (int n = j + l*nl; n >= 0 && n < len; n += l*nl)
    	f(m, i, n, len, b);
      }
    }
    			       
    static void assign_periodic (Memindex m, int i, int j, int len, void * b)
    {
      periodic_function (m, i, j, len, b, mem_assign);
    }
    
    static void free_periodic (Memindex m, int i, int j, int len)
    {
      periodic_function (m, i, j, len, NULL, mem_free);
    }
    #else // dimension == 3
    typedef void (* PeriodicFunction) (Memindex, int, int, int, int, void *);
    
    static void periodic_function (Memindex m, int i, int j, int k, int len,
    			       void * b, PeriodicFunction f)
    {
      f(m, i, j, k, len, b);
      if (Period.x) {
        int nl = len - 2*GHOSTS;
        for (int l = - 1; l <= 1; l += 2)
          for (int n = i + l*nl; n >= 0 && n < len; n += l*nl)
    	f(m, n, j, k, len, b);
        if (Period.y) {
          for (int l = - 1; l <= 1; l += 2)
    	for (int n = j + l*nl; n >= 0 && n < len; n += l*nl) {
    	  f(m, i, n, k, len, b);
    	  for (int o = - 1; o <= 1; o += 2)
    	    for (int p = i + o*nl; p >= 0 && p < len; p += o*nl)
    	      f(m, p, n, k, len, b);
    	}
          if (Period.z)
    	for (int l = - 1; l <= 1; l += 2)
    	  for (int n = k + l*nl; n >= 0 && n < len; n += l*nl) {
    	    f(m, i, j, n, len, b);
    	    for (int q = - 1; q <= 1; q += 2)
    	      for (int r = j + q*nl; r >= 0 && r < len; r += q*nl)
    		f(m, i, r, n, len, b);
    	    for (int o = - 1; o <= 1; o += 2)
    	      for (int p = i + o*nl; p >= 0 && p < len; p += o*nl) {
    		f(m, p, j, n, len, b);
    		for (int q = - 1; q <= 1; q += 2)
    		  for (int r = j + q*nl; r >= 0 && r < len; r += q*nl)
    		    f(m, p, r, n, len, b);
    	      }
    	  }
        }
        else if (Period.z)
          for (int l = - 1; l <= 1; l += 2)
    	for (int n = k + l*nl; n >= 0 && n < len; n += l*nl) {
    	  f(m, i, j, n, len, b);
    	  for (int o = - 1; o <= 1; o += 2)
    	    for (int p = i + o*nl; p >= 0 && p < len; p += o*nl)
    	      f(m, p, j, n, len, b);
    	}
      }
      else if (Period.y) {
        int nl = len - 2*GHOSTS;
        for (int l = - 1; l <= 1; l += 2)
          for (int n = j + l*nl; n >= 0 && n < len; n += l*nl)
    	f(m, i, n, k, len, b);
        if (Period.z)
          for (int l = - 1; l <= 1; l += 2)
    	for (int n = k + l*nl; n >= 0 && n < len; n += l*nl) {
    	  f(m, i, j, n, len, b);
    	  for (int o = - 1; o <= 1; o += 2)
    	    for (int p = j + o*nl; p >= 0 && p < len; p += o*nl)
    	      f(m, i, p, n, len, b);
    	}
      }
      else if (Period.z) {
        int nl = len - 2*GHOSTS;
        for (int l = - 1; l <= 1; l += 2)
          for (int n = k + l*nl; n >= 0 && n < len; n += l*nl)
    	f(m, i, j, n, len, b);
      }
    }
    
    static void assign_periodic (Memindex m, int i, int j, int k, int len, void * b)
    {
      periodic_function (m, i, j, k, len, b, mem_assign);
    }
    
    static void free_periodic (Memindex m, int i, int j, int k, int len)
    {
      periodic_function (m, i, j, k, len, NULL, (PeriodicFunction) mem_free);
    }
    #endif // dimension == 3
    
    static void alloc_children (Point point)
    {
      if (point.level == grid->depth)
        update_depth (+1);
      else if (allocated_child(0,0,0))
        return;
      
      /* low-level memory management */
      Layer * L = tree->L[point.level + 1];
      L->nc++;
      size_t len = sizeof(Cell) + datasize;
      char * b = (char *) mempool_alloc0 (L->pool);
      int i = 2*point.i - GHOSTS;
      for (int k = 0; k < 2; k++, i++) {
    #if dimension == 1
        assign_periodic (L->m, i, L->len, b);
        b += len;
    #elif dimension == 2
        int j = 2*point.j - GHOSTS;
        for (int l = 0; l < 2; l++, j++) {
          assign_periodic (L->m, i, j, L->len, b);
          b += len;
        }
    #else // dimension == 3
        int j = 2*point.j - GHOSTS;
        for (int l = 0; l < 2; l++, j++) {
          int m = 2*point.k - GHOSTS;
          for (int n = 0; n < 2; n++, m++) {
    	assign_periodic (L->m, i, j, m, L->len, b);
    	b += len;
          }
        }
    #endif
      }
      
      int pid = cell.pid;
      foreach_child() {
        cell.pid = pid;
    @if TRASH
        for (scalar s in all)
          s[] = undefined;
    @endif    
      }
    }
    
    #if dimension == 1 
    static void free_children (Point point)
    {
      /* low-level memory management */
      Layer * L = tree->L[point.level + 1];
      int i = 2*point.i - GHOSTS;
      assert (mem_data (L->m,i));
      mempool_free (L->pool, mem_data (L->m,i));
      for (int k = 0; k < 2; k++, i++)
        free_periodic (L->m, i, L->len);
      if (--L->nc == 0) {
        destroy_layer (L);
        assert (point.level + 1 == grid->depth);
        update_depth (-1);
      }
    }
    #elif dimension == 2
    static void free_children (Point point)
    {
      /* low-level memory management */
      Layer * L = tree->L[point.level + 1];
      int i = 2*point.i - GHOSTS, j = 2*point.j - GHOSTS;
      assert (mem_data (L->m,i,j));
      mempool_free (L->pool, mem_data (L->m,i,j));
      for (int k = 0; k < 2; k++)
        for (int l = 0; l < 2; l++)
          free_periodic (L->m, i + k, j + l, L->len);
      if (--L->nc == 0) {
        destroy_layer (L);
        assert (point.level + 1 == grid->depth);
        update_depth (-1);
      }
    }
    #else // dimension == 3
    static void free_children (Point point)
    {
      /* low-level memory management */
      Layer * L = tree->L[point.level + 1];
      int i = 2*point.i - GHOSTS;
      assert (mem_data (L->m,i,2*point.j - GHOSTS,2*point.k - GHOSTS));
      mempool_free (L->pool, mem_data (L->m,
    				   i,2*point.j - GHOSTS,2*point.k - GHOSTS));
      for (int k = 0; k < 2; k++, i++) {
        int j = 2*point.j - GHOSTS;
        for (int l = 0; l < 2; l++, j++) {
          int m = 2*point.k - GHOSTS;
          for (int n = 0; n < 2; n++, m++)
    	free_periodic (L->m, i, j, m, L->len);
        }
      }
      if (--L->nc == 0) {
        destroy_layer (L);
        assert (point.level + 1 == grid->depth);
        update_depth (-1);
      }
    }
    #endif // dimension == 3
    
    void increment_neighbors (Point point)
    {
      tree->dirty = true;
      if (cell.neighbors++ == 0)
        alloc_children (point);
      foreach_neighbor (GHOSTS/2)
        if (cell.neighbors++ == 0)
          alloc_children (point);
      cell.neighbors--;
    }
    
    void decrement_neighbors (Point point)
    {
      tree->dirty = true;
      foreach_neighbor (GHOSTS/2)
        if (allocated(0)) {
          cell.neighbors--;
          if (cell.neighbors == 0)
    	free_children (point);
        }
      if (cell.neighbors) {
        int pid = cell.pid;
        foreach_child() {
          cell.flags = 0;
          cell.pid = pid;
        }
      }
    }
    
    void realloc_scalar (int size)
    {
      /* low-level memory management */
      Tree * q = tree;
      size_t oldlen = sizeof(Cell) + datasize;
      size_t newlen = oldlen + size;
      datasize += size;
      /* the root level is allocated differently */
      Layer * L = q->L[0];
      foreach_mem (L->m, L->len, 1) {
    #if dimension == 1
        char * p = (char *) realloc (mem_data (L->m, point.i), newlen*sizeof(char));
        assign_periodic (L->m, point.i, L->len, p);
    #elif dimension == 2
        char * p = (char *) realloc (mem_data (L->m, point.i, point.j),
    				 newlen*sizeof(char));
        assign_periodic (L->m, point.i, point.j, L->len, p);
    #else
        char * p = (char *) realloc (mem_data (L->m, point.i, point.j, point.k),
    				 newlen*sizeof(char));
        assign_periodic (L->m, point.i, point.j, point.k, L->len, p);
    #endif
      }
      /* all other levels */
      for (int l = 1; l <= depth(); l++) {
        Layer * L = q->L[l];
        Mempool * oldpool = L->pool;
        L->pool = mempool_new (poolsize (l, newlen), (1 << dimension)*newlen);
        foreach_mem (L->m, L->len, 2) {
          char * new = (char *) mempool_alloc (L->pool);
    #if dimension == 1
          for (int k = 0; k < 2; k++) {
    	memcpy (new, mem_data (L->m, point.i + k), oldlen);
    	assign_periodic (L->m, point.i + k, L->len, new);
    	new += newlen;
          }
    #elif dimension == 2
          for (int k = 0; k < 2; k++)
    	for (int o = 0; o < 2; o++) {
    	  memcpy (new, mem_data (L->m, point.i + k,point.j + o), oldlen);
    	  assign_periodic (L->m, point.i + k, point.j + o, L->len, new);
    	  new += newlen;
    	}
    #else // dimension == 3
          for (int l = 0; l < 2; l++)
    	for (int m = 0; m < 2; m++)
    	  for (int n = 0; n < 2; n++) {
    	    memcpy (new, mem_data (L->m, point.i + l, point.j + m, point.k + n),
    		    oldlen);
    	    assign_periodic (L->m, point.i + l, point.j + m, point.k + n,
    				 L->len, new);
    	    new += newlen;
    	  }
    #endif // dimension == 3
        }
        mempool_destroy (oldpool);
      }
    }
    
    /* Boundaries */
    
    @define VN v.x
    @define VT v.y
    @define VR v.z
    
    #define is_neighbor(...) (allocated(__VA_ARGS__) &&		\
    			  !is_boundary(neighbor(__VA_ARGS__)))
    
    @if _MPI // fixme
    @ define disable_fpe_for_mpi() disable_fpe (FE_DIVBYZERO|FE_INVALID)
    @ define enable_fpe_for_mpi()  enable_fpe (FE_DIVBYZERO|FE_INVALID)
    @else
    @ define disable_fpe_for_mpi()
    @ define enable_fpe_for_mpi()
    @endif
    
    static inline void no_restriction (Point point, scalar s);
    
    static bool normal_neighbor (Point point, scalar * scalars, vector * vectors)
    {
      for (int k = 1; k <= BGHOSTS; k++)
        foreach_dimension()
          for (int i = -k; i <= k; i += 2*k)
    	if (is_neighbor(i)) {
    	  Point neighbor = neighborp(i);
    	  int id = bid(cell);
    	  for (scalar s in scalars)
    	    foreach_block()
    	      s[] = s.boundary[id](neighbor, point, s, NULL);
    	  for (vector v in vectors)
    	    foreach_block() {
    	      scalar vn = VN;
    	      v.x[] = vn.boundary[id](neighbor, point, v.x, NULL);
    #if dimension >= 2
    	      scalar vt = VT;
    	      v.y[] = vt.boundary[id](neighbor, point, v.y, NULL);
    #endif
    #if dimension >= 3
    	      scalar vr = VR;
    	      v.z[] = vr.boundary[id](neighbor, point, v.z, NULL);
    #endif
    	    }
    	  return true;
    	}
      return false;
    }
    
    static bool diagonal_neighbor_2D (Point point,
    				  scalar * scalars, vector * vectors)
    {
    #if dimension >= 2
      for (int k = 1; k <= BGHOSTS; k++)
    #if dimension == 3
        foreach_dimension()
    #endif
          for (int i = -k; i <= k; i += 2*k)
    	for (int j = -k; j <= k; j += 2*k)
    	  if (allocated(i,j) && is_neighbor(i,j) &&
    	      allocated(i,0) && is_boundary(neighbor(i,0)) &&
    	      allocated(0,j) && is_boundary(neighbor(0,j))) {
    	    Point n = neighborp(i,j),
    	      n1 = neighborp(i,0), n2 = neighborp(0,j);
    	    int id1 = bid(neighbor(i,0)), id2 = bid(neighbor(0,j));
    	    for (scalar s in scalars)
    	      foreach_block()
    		s[] = (s.boundary[id1](n,n1,s,NULL) +
    		       s.boundary[id2](n,n2,s,NULL) -
    		       s[i,j]);
    	    for (vector v in vectors)
    	      foreach_block() {
    		scalar vt = VT, vn = VN;
    		v.x[] = (vt.boundary[id1](n,n1,v.x,NULL) +
    			 vn.boundary[id2](n,n2,v.x,NULL) -
    			 v.x[i,j]);
    		v.y[] = (vn.boundary[id1](n,n1,v.y,NULL) +
    			 vt.boundary[id2](n,n2,v.y,NULL) -
    			 v.y[i,j]);
    #if dimension == 3
    		scalar vr = VR;
    		v.z[] = (vr.boundary[id1](n,n1,v.z,NULL) +
    			 vr.boundary[id2](n,n2,v.z,NULL) -
    			 v.z[i,j]);
    #endif
    	      }
    	    return true;
    	  }
    #endif // dimension >= 2
      return false;
    }
    
    static bool diagonal_neighbor_3D (Point point,
    				  scalar * scalars, vector * vectors)
    {
    #if dimension == 3
      for (int n = 1; n <= BGHOSTS; n++)
        for (int i = -n; i <= n; i += 2*n)
          for (int j = -n; j <= n; j += 2*n)
    	for (int k = -n; k <= n; k += 2*n)
    	  if (is_neighbor(i,j,k) &&
    	      is_boundary(neighbor(i,j,0)) &&
    	      is_boundary(neighbor(i,0,k)) &&
    	      is_boundary(neighbor(0,j,k))) {
    	    Point
    	      n0 = neighborp(i,j,k),
    	      n1 = neighborp(i,j,0),
    	      n2 = neighborp(i,0,k),
    	      n3 = neighborp(0,j,k);
    	    int
    	      id1 = bid(neighbor(i,j,0)),
    	      id2 = bid(neighbor(i,0,k)),
    	      id3 = bid(neighbor(0,j,k));
    	    for (scalar s in scalars)
    	      foreach_block()
    		s[] = (s.boundary[id1](n0,n1,s,NULL) +
    		       s.boundary[id2](n0,n2,s,NULL) +
    		       s.boundary[id3](n0,n3,s,NULL) -
    		       2.*s[i,j,k]);
    	    for (vector v in vectors)
    	      foreach_block() {
    		scalar vt = VT, vn = VN, vr = VR;
    		v.x[] = (vt.boundary[id1](n0,n1,v.x,NULL) +
    			 vt.boundary[id2](n0,n2,v.x,NULL) +
    			 vn.boundary[id3](n0,n3,v.x,NULL) -
    			 2.*v.x[i,j,k]);
    		v.y[] = (vt.boundary[id1](n0,n1,v.y,NULL) +
    			 vn.boundary[id2](n0,n2,v.y,NULL) +
    			 vt.boundary[id3](n0,n3,v.y,NULL) -
    			 2.*v.y[i,j,k]);
    		v.z[] = (vn.boundary[id1](n0,n1,v.z,NULL) +
    			 vr.boundary[id2](n0,n2,v.z,NULL) +
    			 vr.boundary[id3](n0,n3,v.z,NULL) -
    			 2.*v.z[i,j,k]);
    	      }
    	    return true;
    	  }
    #endif
      return false;
    }
    
    #if dimension > 1
    foreach_dimension()
    static Point tangential_neighbor_x (Point point, bool * zn)
    {
      for (int k = 1; k <= BGHOSTS; k++)
        for (int j = -k; j <= k; j += 2*k) {
          if (is_neighbor(0,j) || is_neighbor(-1,j)) {
    	*zn = false;
    	return neighborp(0,j);
          }
    #if dimension == 3
          // fixme: what about diagonals?
          if (is_neighbor(0,0,j) || is_neighbor(-1,0,j)) {
    	*zn = true;
    	return neighborp(0,0,j);
          }
    #endif // dimension == 3
        }
      return (Point){.level = -1};
    }
    #endif // dimension > 1
    
    static inline bool is_boundary_point (Point point) {
      return is_boundary (cell);
    }
     
    static void box_boundary_level (const Boundary * b, scalar * list, int l)
    {
      disable_fpe_for_mpi();
      scalar * scalars = NULL;
      vector * vectors = NULL, * faces = NULL;
      for (scalar s in list)
        if (!is_constant(s) && s.refine != no_restriction) {
          if (s.v.x.i == s.i) {
    	if (s.face)
    	  faces = vectors_add (faces, s.v);
    	else
    	  vectors = vectors_add (vectors, s.v);
          }
          else if (s.v.x.i < 0 && s.boundary[0])
    	scalars = list_add (scalars, s);
        }
      
      foreach_boundary_level (l) {
        if (!normal_neighbor (point, scalars, vectors) &&
    	!diagonal_neighbor_2D (point, scalars, vectors) &&
    	!diagonal_neighbor_3D (point, scalars, vectors)) {
          // no neighbors
          for (scalar s in scalars)
    	foreach_block()
    	  s[] = undefined;
          for (vector v in vectors)
    	foreach_block()
    	  foreach_dimension()
    	    v.x[] = undefined;
        }
        if (faces) {
          int id = bid(cell);
          foreach_dimension()
    	for (int i = -1; i <= 1; i += 2) {
    	  // normal neighbor for faces
    	  if (is_neighbor(i)) {
    	    Point neighbor = neighborp(i);
    	    for (vector v in faces) {
    	      scalar vn = VN;
    	      if (vn.boundary[id])
    		foreach_block()
    		  v.x[(i + 1)/2] = vn.boundary[id](neighbor, point, v.x, NULL);
    	    }
    	  }
    #if dimension > 1
    	  else if (i == -1) {
    	    // tangential neighbor
    	    bool zn;
    	    Point neighbor = tangential_neighbor_x (point, &zn);
    	    if (neighbor.level >= 0) {
    	      int id = is_boundary_point (neighbor) ?
    		bid(neighbor(-1)) : bid(cell);
    	      for (vector v in faces) {
    #if dimension == 2
    		scalar vt = VT;
    #else // dimension == 3
    		scalar vt = zn ? VT : VR;
    #endif
    		foreach_block()
    		  v.x[] = vt.boundary[id](neighbor, point, v.x, NULL);
    	      }
    	    }
    	    else
    	      // no neighbor
    	      for (vector v in faces)
    		foreach_block()
    		  v.x[] = 0.;
    	  }
    #endif // dimension > 1
    	}
        }
      }
    
      free (scalars);
      free (vectors);
      free (faces);
      enable_fpe_for_mpi();
    }
    
    #undef is_neighbor
     
    @undef VN
    @undef VT
    @define VN _attribute[s.i].v.x
    @define VT _attribute[s.i].v.y
    
    static double masked_average (Point point, scalar s)
    {
      double sum = 0., n = 0.;
      foreach_child()
        if (!is_boundary(cell) && s[] != nodata)
          sum += s[], n++;
      return n ? sum/n : nodata;
    }
    
    foreach_dimension()
    static double masked_average_x (Point point, scalar s)
    {
      double sum = 0., n = 0.;
      foreach_child()
        if (child.x < 0 && (!is_boundary(cell) || !is_boundary(neighbor(1))) &&
    	s[1] != nodata)
          sum += s[1], n++;
      return n ? sum/n : nodata;
    }
    
    static void masked_boundary_restriction (const Boundary * b,
    					 scalar * list, int l)
    {
      scalar * scalars = NULL;
      vector * faces = NULL;
      for (scalar s in list)
        if (!is_constant(s) && s.refine != no_restriction) {
          if (s.v.x.i == s.i && s.face)
    	faces = vectors_add (faces, s.v);
          else
    	scalars = list_add (scalars, s);
        }
      
      foreach_halo (restriction, l) {
        for (scalar s in scalars)
          s[] = masked_average (parent, s);
        for (vector v in faces)
          foreach_dimension() {
    	double average = masked_average_x (parent, v.x);
    	if (is_boundary(neighbor(-1)))
    	  v.x[] = average;
    	if (is_boundary(neighbor(1)))
    	  v.x[1] = average;
          }
      }
    
      free (scalars);
      free (faces);  
    }
    
    macro mask (double func) {
      foreach_cell_post(!is_leaf(cell)) {
        if (is_leaf(cell)) {
          int bid = (func);
          if (bid >= 0)
    	cell.pid = - bid - 1;
        }
        else { /* not a leaf */
          int pid = -1;
          foreach_child()
    	if (cell.pid >= 0 || pid < 0)
    	  pid = cell.pid;
          cell.pid = pid;
          if (pid < 0) {
    	/* fixme: call coarsen_cell()? */
    	cell.flags |= leaf;
    	decrement_neighbors (point);
          }
        }
      }
      tree->dirty = true;
    }
    
    static void free_cache (CacheLevel * c)
    {
      for (int l = 0; l <= depth(); l++)
        free (c[l].p);
      free (c);
    }
    
    void free_grid (void)
    {
      if (!grid)
        return;
      free_boundaries();
      Tree * q = tree;
      free (q->leaves.p);
      free (q->faces.p);
      free (q->vertices.p);
      free (q->refined.p);
      /* low-level memory management */
      /* the root level is allocated differently */
      Layer * L = q->L[0];
      foreach_mem (L->m, L->len, 1) {
    #if dimension == 1
        free (mem_data (L->m, point.i));
    #elif dimension == 2
        free (mem_data (L->m, point.i, point.j));
    #else // dimension == 3
        free (mem_data (L->m, point.i, point.j, point.k));
    #endif // dimension == 3
      }
      for (int l = 0; l <= depth(); l++)
        destroy_layer (q->L[l]);
      q->L = &(q->L[-1]);
      free (q->L);
      free_cache (q->active);
      free_cache (q->prolongation);
      free_cache (q->boundary);
      free_cache (q->restriction);
      free (q);
      grid = NULL;
    }
    
    static void refine_level (int depth);
    
    trace
    void init_grid (int n)
    {
      // check 64 bits structure alignment
      assert (sizeof(Cell) % 8 == 0);
      
      free_grid();
      int depth = 0;
      while (n > 1) {
        if (n % 2) {
          fprintf (stderr, "tree: N must be a power-of-two\n");
          exit (1);
        }
        n /= 2;
        depth++;
      }
      Tree * q = qcalloc (1, Tree);
      grid = (Grid *) q;
      grid->depth = 0;
    
      /* low-level memory management */
      q->L = qmalloc (2, Layer *);
      /* make sure we don't try to access level -1 */
      q->L[0] = NULL; q->L = &(q->L[1]);
      /* initialise the root cell */
      Layer * L = new_layer (0);
      q->L[0] = L;
    #if dimension == 1
      for (int i = Period.x*GHOSTS; i < L->len - Period.x*GHOSTS; i++)
        assign_periodic (L->m, i, L->len, 
    		     (char *) calloc (1, sizeof(Cell) + datasize));
      CELL(mem_data (L->m,GHOSTS)).flags |= leaf;
      if (pid() == 0)
        CELL(mem_data (L->m,GHOSTS)).flags |= active;
      for (int k = -GHOSTS*(1 - Period.x); k <= GHOSTS*(1 - Period.x); k++)
        CELL(mem_data (L->m,GHOSTS+k)).pid = (k < 0 ? -1 - left :
    					  k > 0 ? -1 - right :
    					  0);
      CELL(mem_data (L->m,GHOSTS)).pid = 0;
    #elif dimension == 2
      for (int i = Period.x*GHOSTS; i < L->len - Period.x*GHOSTS; i++)
        for (int j = Period.y*GHOSTS; j < L->len - Period.y*GHOSTS; j++)
          assign_periodic (L->m, i, j, L->len,
    		       (char *) calloc (1, sizeof(Cell) + datasize));
      CELL(mem_data (L->m,GHOSTS,GHOSTS)).flags |= leaf;
      if (pid() == 0)
        CELL(mem_data (L->m,GHOSTS,GHOSTS)).flags |= active;
      for (int k = - GHOSTS*(1 - Period.x); k <= GHOSTS*(1 - Period.x); k++)
        for (int l = -GHOSTS*(1 - Period.y); l <= GHOSTS*(1 - Period.y); l++)
          CELL(mem_data (L->m,GHOSTS+k,GHOSTS+l)).pid =
    	(k < 0 ? -1 - left :
    	 k > 0 ? -1 - right :
    	 l > 0 ? -1 - top :
    	 l < 0 ? -1 - bottom :
    	 0);
      CELL(mem_data (L->m,GHOSTS,GHOSTS)).pid = 0;
    #else // dimension == 3
      for (int i = Period.x*GHOSTS; i < L->len - Period.x*GHOSTS; i++)
        for (int j = Period.y*GHOSTS; j < L->len - Period.y*GHOSTS; j++)
          for (int k = Period.z*GHOSTS; k < L->len - Period.z*GHOSTS; k++)
    	assign_periodic (L->m, i, j, k, L->len,
    			 (char *) calloc (1, sizeof(Cell) + datasize));
      CELL(mem_data (L->m,GHOSTS,GHOSTS,GHOSTS)).flags |= leaf;
      if (pid() == 0)
        CELL(mem_data (L->m,GHOSTS,GHOSTS,GHOSTS)).flags |= active;
      for (int k = - GHOSTS*(1 - Period.x); k <= GHOSTS*(1 - Period.x); k++)
        for (int l = -GHOSTS*(1 - Period.y); l <= GHOSTS*(1 - Period.y); l++)
          for (int n = -GHOSTS*(1 - Period.z); n <= GHOSTS*(1 - Period.z); n++)
    	CELL(mem_data (L->m,GHOSTS+k,GHOSTS+l,GHOSTS+n)).pid =
    	  (k > 0 ? -1 - right :
    	   k < 0 ? -1 - left :
    	   l > 0 ? -1 - top :
    	   l < 0 ? -1 - bottom :
    	   n > 0 ? -1 - front :
    	   n < 0 ? -1 - back :
    	   0);
      CELL(mem_data (L->m,GHOSTS,GHOSTS,GHOSTS)).pid = 0;
    #endif // dimension == 3
      q->active = qcalloc (1, CacheLevel);
      q->prolongation = qcalloc (1, CacheLevel);
      q->boundary = qcalloc (1, CacheLevel);
      q->restriction = qcalloc (1, CacheLevel);
      q->dirty = true;
      N = 1 << depth;
    @if _MPI
      void mpi_boundary_new();
      mpi_boundary_new();
    @endif
      // boundaries
      Boundary * b = qcalloc (1, Boundary);
      b->level = box_boundary_level;
      b->restriction = masked_boundary_restriction;
      add_boundary (b);
      refine_level (depth);
      reset (all, 0.);
      update_cache();
    }
    
    #if dimension == 2
    void check_two_one (void)
    {
      foreach_leaf()
        if (level > 0)
          for (int k = -1; k <= 1; k++)
    	for (int l = -1; l <= 1; l++) {
    	  /* fixme: all this mess is just to ignore ghost cells */
    	  int i = (point.i + GHOSTS)/2 + k;
    	  int j = (point.j + GHOSTS)/2 + l;
    	  double Delta = 1./(1 << point.level);
    	  double x = ((i - GHOSTS + 0.5)*Delta*2. - 0.5);
    	  double y = ((j - GHOSTS + 0.5)*Delta*2. - 0.5);
    	  if (x > -0.5 && x < 0.5 && y > -0.5 && y < 0.5 && 
    	      !(aparent(k,l).flags & active)) {
    	    FILE * fp = fopen("check_two_one_loc", "w");
    	    fprintf (fp,
    		     "# %d %d\n"
    		     "%g %g\n%g %g\n",
    		     k, l,
    		     ((point.i - GHOSTS + 0.5)* - 0.5),
    		     ((point.j - GHOSTS + 0.5)*Delta - 0.5),
    		     x, y);
    	    fclose (fp);
    #if 0
    	    fp = fopen("check_two_one", "w");
    	    output_cells (fp);
    	    fclose (fp);
    #endif
    	    assert (false);
    	  }
    	}
    }
    #endif
    
    Point locate (double xp = 0., double yp = 0., double zp = 0.)
    {
      for (int l = depth(); l >= 0; l--) {
        Point point = {0};
        point.level = l;
        int n = 1 << point.level;
        point.i = (xp - X0)/L0*n + GHOSTS;
    #if dimension >= 2
        point.j = (yp - Y0)/L0*n + GHOSTS;
    #endif
    #if dimension >= 3
        point.k = (zp - Z0)/L0*n + GHOSTS;
    #endif
        if (point.i >= 0 && point.i < n + 2*GHOSTS
    #if dimension >= 2
    	&& point.j >= 0 && point.j < n + 2*GHOSTS
    #endif
    #if dimension >= 3
    	&& point.k >= 0 && point.k < n + 2*GHOSTS
    #endif
    	) {
          if (allocated(0) && is_local(cell) && is_leaf(cell))
    	return point;
        }
        else
          break;
      }
      Point point = {0};
      point.level = -1;
      return point;
    }
    
    // return true if the tree is "full" i.e. all the leaves are at the
    // same level
    bool tree_is_full()
    {
      update_cache();
      return (grid->tn == 1L << grid->maxdepth*dimension);
    }
    
    #include "variables.h"
    
    macro2 foreach_boundary (int _b, Reduce reductions = None) {
      for (int _l = depth(); _l >= 0; _l--)
        foreach_boundary_level (_l, reductions) {
          POINT_VARIABLES();
          if (bid(cell) == _b)
    	for (int _d = 0; _d < dimension; _d++) {
    	  for (int _i = -1; _i <= 1; _i += 2) {
    	    if (_d == 0) ig = _i; else if (_d == 1) jg = _i; else kg = _i;
    	    if (allocated(-ig,-jg,-kg) &&
    		is_leaf (neighbor(-ig,-jg,-kg)) &&
    		!is_boundary(neighbor(-ig,-jg,-kg)) &&
    		is_local(neighbor(-ig,-jg,-kg))) {
    	      point.i -= ig; x -= ig*Delta/2.; 
    #if dimension >= 2
    	      point.j -= jg; y -= jg*Delta/2.; 
    #endif
    #if dimension >= 3
    	      point.k -= kg; z -= kg*Delta/2.;
    #endif
    	      {...}
    	      point.i += ig; x += ig*Delta/2.;   
    #if dimension >= 2
    	      point.j += jg; y += jg*Delta/2.; 
    #endif
    #if dimension >= 3
    	      point.k += kg; z += kg*Delta/2.;
    #endif
                }
    	  }
    	  ig = jg = kg = 0;
    	}
        }
    }
    
    macro2 foreach_vertex (char flags = 0, Reduce reductions = None)
    {
      update_cache();
      foreach_cache (tree->vertices, reductions) {
        int ig = -1; NOT_UNUSED (ig);
    #if dimension >= 2
        int jg = -1; NOT_UNUSED (jg);
    #endif
    #if dimension >= 3
        int kg = -1; NOT_UNUSED (kg);
    #endif    
        {...}
      }
    }
    
    #include "tree-common.h"
    
    // overload the default periodic() function
    void tree_periodic (int dir)
    {
      int depth = grid ? depth() : -1;
      if (grid)
        free_grid();
      periodic (dir);
      if (depth >= 0)
        init_grid (1 << depth);
    }
    #define periodic(dir) tree_periodic(dir)
     
    @if _MPI
    #include "tree-mpi.h"
    #include "balance.h"
    @else // !_MPI
    void mpi_boundary_refine  (scalar * list){}
    void mpi_boundary_coarsen (int a, int b){}
    void mpi_boundary_update  (scalar * list) {
      for (scalar s in list)
        s.dirty = true;
      boundary (list);
    }
    @endif // !_MPI