src/maxruntime.h

    Controlling the maximum runtime

    On parallel machines, runs are often not allowed to exceed a maximum duration (typically 24 hours). To avoid having the job terminated brutally by the queueing system, this module adds the option to exit gracefully after a given runtime.

    The maximum runtime is given as a command-line argument (typically in the running script given to the queueing system) using the standard format H:M:S (hours, minutes and seconds). This should match the wall-clock time requested from the queueing system.

    When this time minus 5 minutes (to allow for clean termination) is exceeded the state of the simulation is dumped in the “restart” file and the program terminates.

    static double _maxruntime = HUGE;
    
    event runtime (i += 10) {
      mpi_all_reduce (perf.t, MPI_DOUBLE, MPI_MAX);
      if (perf.t >= _maxruntime - 300) { // we allow 5 minutes for termination
        dump (file = "restart"); // so that we can restart
        return 1; // exit
      }
    }
    
    void maxruntime (int * argc, char * argv[])
    {
      for (int i = 0; i < *argc; i++)
        if (!strcmp (argv[i], "--maxruntime") || !strcmp (argv[i], "-m")) {
          if (i + 1 < *argc) {
    	char * s = strtok (argv[i + 1], ":");
    	int n = 0;
    	_maxruntime = 0;
    	do {
    	  _maxruntime = 60*_maxruntime + atoi(s);
    	  n++;
    	} while ((s = strtok (NULL, ":")));
    	if (n > 3) {
    	  fprintf (ferr, "maxruntime: TIME format must be H:M:S\n");
    	  exit (1);
    	}
          }
          else {
    	fprintf (ferr, "usage: %s TIME\n", argv[i]);
    	exit (1);
          }
          *argc -= 2;
          for (int j = i; j < *argc; j++)
    	argv[j] = argv[j + 2];
        }
    }

    Usage

    Examples