src/grid/gpu/Benchmarks/turbulence

    cd $BASILISK/examples
    
    CFLAGS='-DSINGLE_PRECISION -DBENCHMARK=1' make turbulence.ctst
    for i in 128 256 512 1024 2048 4096; do 
      OMP_NUM_THREADS=8 turbulence/turbulence $i 2> /tmp/log; 
    done
    
    # Multigrid, 2008 steps, 13.5656 CPU, 1.698 real, 1.94e+07 points.step/s, 8 var
    # Multigrid, 2073 steps, 37.6603 CPU, 4.71 real, 2.88e+07 points.step/s, 8 var
    # Multigrid, 2363 steps, 125.084 CPU, 15.64 real, 3.96e+07 points.step/s, 8 var
    # Multigrid, 2887 steps, 566.771 CPU, 70.87 real, 4.27e+07 points.step/s, 8 var
    # Multigrid, 3010 steps, 2484.88 CPU, 310.7 real, 4.06e+07 points.step/s, 8 var
    # Multigrid, 3110 steps, 10083.9 CPU, 1261 real, 4.14e+07 points.step/s, 8 var
    
    CFLAGS='-DBENCHMARK=1' make turbulence.gpu.tst
    
    OpenGL renderer string: Mesa Intel(R) UHD Graphics (TGL GT1) (0x9a60)
    Video memory: 3072MB
    
    for i in 128 256 512 1024 2048 4096; do 
      OMP_NUM_THREADS=1 turbulence.gpu/turbulence.gpu $i 2> /tmp/log; 
    done
    
    # Multigrid (GPU), 2009 steps, 2.59921 CPU, 5.478 real, 6.01e+06 points.step/s, 8 var
    # Multigrid (GPU), 2008 steps, 1.992 CPU, 8.546 real, 1.54e+07 points.step/s, 8 var
    # Multigrid (GPU), 2328 steps, 4.43931 CPU, 25.36 real, 2.41e+07 points.step/s, 8 var
    # Multigrid (GPU), 2767 steps, 10.0743 CPU, 97.95 real, 2.96e+07 points.step/s, 8 var
    # Multigrid (GPU), 2991 steps, 17.2899 CPU, 391.4 real, 3.21e+07 points.step/s, 8 var
    
    OpenGL renderer string: NVIDIA GeForce RTX 3050 Ti Laptop GPU/PCIe/SSE2
    Dedicated video memory: 4096 MB
    
    for i in 128 256 512 1024 2048 4096; do 
      OMP_NUM_THREADS=1 nvidia turbulence.gpu/turbulence.gpu $i 2> /tmp/log; 
    done
    
    # Multigrid (GPU), 2009 steps, 1.53716 CPU, 1.537 real, 2.14e+07 points.step/s, 8 var
    # Multigrid (GPU), 2008 steps, 1.63853 CPU, 1.639 real, 8.03e+07 points.step/s, 8 var
    # Multigrid (GPU), 2328 steps, 3.30781 CPU, 3.308 real, 1.84e+08 points.step/s, 8 var
    # Multigrid (GPU), 2767 steps, 11.4863 CPU, 11.49 real, 2.53e+08 points.step/s, 8 var
    # Multigrid (GPU), 2991 steps, 45.4397 CPU, 45.44 real, 2.76e+08 points.step/s, 8 var
    # Multigrid (GPU), 3011 steps, 179.499 CPU, 179.5 real, 2.81e+08 points.step/s, 8 var
    
    OpenGL renderer string: Quadro RTX 6000/PCIe/SSE2
    Dedicated video memory: 24576 MB
    
    for i in 128 256 512 1024 2048 4096; do 
      OMP_NUM_THREADS=1 turbulence.gpu/turbulence.gpu $i 2> /tmp/log; 
    done
    
    # Multigrid (GPU), 2009 steps, 1.32091 CPU, 1.321 real, 2.49e+07 points.step/s, 8 var
    # Multigrid (GPU), 2008 steps, 1.45009 CPU, 1.45 real, 9.07e+07 points.step/s, 8 var
    # Multigrid (GPU), 2328 steps, 2.27639 CPU, 2.276 real, 2.68e+08 points.step/s, 8 var
    # Multigrid (GPU), 2767 steps, 4.9143 CPU, 4.915 real, 5.9e+08 points.step/s, 8 var
    # Multigrid (GPU), 2991 steps, 14.5549 CPU, 14.56 real, 8.62e+08 points.step/s, 8 var
    # Multigrid (GPU), 3011 steps, 53.9424 CPU, 53.95 real, 9.36e+08 points.step/s, 8 var
    
    CFLAGS='-DTRACE=2 -DBENCHMARK=1' make turbulence.gpu.tst
    
    __NV_PRIME_RENDER_OFFLOAD=1 __GLX_VENDOR_LIBRARY_NAME=nvidia OMP_NUM_THREADS=1 turbulence.gpu/turbulence.gpu 256
    
    # Multigrid (GPU), 2008 steps, 5.22215 CPU, 5.223 real, 2.52e+07 points.step/s, 8 var
       calls    total     self   % total   function
        4025     4.64     4.11     78.6%   mg_cycle():/src/poisson.h:92
      336353     0.56     0.55     10.5%   setup_shader():/src/grid/gpu/grid.h:1721
        6034     0.14     0.14      2.7%   gpu_reduction():/src/poisson.h:381
        2009     4.97     0.14      2.6%   mg_solve():/src/poisson.h:230
        2009     0.09     0.08      1.6%   tracer_fluxes():/src/bcg.h:61
    
    __NV_PRIME_RENDER_OFFLOAD=1 __GLX_VENDOR_LIBRARY_NAME=nvidia OMP_NUM_THREADS=1 turbulence.gpu/turbulence.gpu 512
    # Multigrid (GPU), 2328 steps, 8.17009 CPU, 8.17 real, 7.47e+07 points.step/s, 8 var
       calls    total     self   % total   function
        4670     6.88     6.17     75.6%   mg_cycle():/src/poisson.h:92
      440039     0.75     0.73      9.0%   setup_shader():/src/grid/gpu/grid.h:1721
        2329     7.58     0.33      4.1%   mg_solve():/src/poisson.h:230
        6999     0.26     0.26      3.1%   gpu_reduction():/src/poisson.h:381
        2329     0.25     0.23      2.8%   tracer_fluxes():/src/bcg.h:61
        2329     0.34     0.09      1.1%   advection():/src/bcg.h:99
        2329     0.08     0.08      1.0%   gpu_reduction():/src/timestep.h:6
        2329     0.08     0.08      1.0%   gpu_reduction():/src/poisson.h:163
        2329     0.17     0.08      1.0%   velocity():/src/advection.h:54
    
    __NV_PRIME_RENDER_OFFLOAD=1 __GLX_VENDOR_LIBRARY_NAME=nvidia OMP_NUM_THREADS=1 turbulence.gpu/turbulence.gpu 1024
    # Multigrid (GPU), 2767 steps, 17.2732 CPU, 17.27 real, 1.68e+08 points.step/s, 8 var
       calls    total     self   % total   function
        5559    13.17    12.23     70.8%   mg_cycle():/src/poisson.h:92
        2768    15.34     1.15      6.7%   mg_solve():/src/poisson.h:230
      574414     0.99     0.97      5.6%   setup_shader():/src/grid/gpu/grid.h:1721
        2768     0.85     0.83      4.8%   tracer_fluxes():/src/bcg.h:61
        8327     0.76     0.76      4.4%   gpu_reduction():/src/poisson.h:381
        2768     1.17     0.32      1.8%   advection():/src/bcg.h:99
        2768     0.50     0.26      1.5%   velocity():/src/advection.h:54
        2768    15.59     0.25      1.4%   velocity_0():/src/navier-stokes/stream.h:87
        2768     0.24     0.24      1.4%   gpu_reduction():/src/timestep.h:6
        2768     0.23     0.23      1.3%   gpu_reduction():/src/poisson.h:163