#!/usr/bin/env perl
#eval 'exec perl $0 $*'
#    if 0;

$prefix = '@prefix@';
$exec_prefix = "@exec_prefix@";
$bindir = "@bindir@";
$scdatadir = "@scdatadir@";

use POSIX;

if ($prefix eq '@' . 'prefix@') {
    $configurerun = 0;
}
else {
    $configurerun = 1;
}

# The path to the MPQC executable
if ($configurerun) {
    $mpqc = "$bindir/mpqc";
}
else {
    $mpqc = "mpqc";
}

# The path to the MPQC executable if it is to be run from the object
# directory.
if (!$configurerun) {
    $mpqcobj = "mpqc";
}
else {
    $mpqcobj = '@compiledir@/src/bin/mpqc/mpqc';
}

# The threadgrp specialization
$threadgrp = "none";

# The memorygrp specialization
$memorygrp = "none";

# The message specialization
$messagegrp = "none";
$always_use_mpi = '@ALWAYS_USE_MPI@';
if ("$always_use_mpi" eq "yes") {
    $messagegrp = "mpi";
}

# The integral specialization
$integral = "none";

# The mpi launch command
$launch = '@LAUNCH@';

# The number of tasks
$ntask = 1;

# A filename with a list of nodes.
$nodefile = "";

# A command line argument with a list of nodes.
$nodes = "";

# A format string to convert a node number to a node name
$nodename = "%d";

# The number of nodes each job will use.
$nnodeperjob = "nnode";

# The number of threads each process will use.
$nthreadperproc = 1;

# The number of processes to run on each node.
$nprocpernode = 1;

# The number of processes that each job should use.
$nprocperjob = "all";

# Files machining the given regex will be run sequentially,
# in alphabetical order.
$seq = "";

# A regex for files to exclude from the runs
$exclude = "";

# If true, then mpqc with all inputs in the current directory.
$readdir = "";

# The directory where the input file is to be found.
$inputprefix = "";

# The directory where the output file is to be placed.
$outputprefix = "";

# If this is set to one, then a set of extra exclusions will be
# included.  This is only useful for the running test suite.
$small = 0;

# If true, then print out a list of files that would be run
$printfiles = 0;

# If true, then print out the number of files that would be run
$count = 0;

# If true, then print out help info and exit
$help = 0;

# If true, then don't actually run anything
$debug = 0;

# If true, then print out extra info.
$verbose = 0;

# If true, then overwrite output files that seem up-to-date
$rerun = 0;

# If true, then do not overwrite any output file.
$onlynew = 0;

# If true, then generate unique output file names.
$uniqout = 0;

# If true, then generate output file names.
$autoout = 0;

# If true, then do not append nproc, nthread, nnode info to filename.
$simpout = 0;

# If true, run mpqc from the object directory
$objdir = 0;

######################################################################

use Getopt::Long;

if (!GetOptions("small!" => \$small,
                "launch=s" => \$launch,
                "threadgrp=s" => \$threadgrp,
                "memorygrp=s" => \$memorygrp,
                "messagegrp=s" => \$messagegrp,
                "integral=s" => \$integral,
                "nnodeperjob=i" => \$nnodeperjob,
                "nthreadperproc=i" => \$nthreadperproc,
                "nprocpernode=i" => \$nprocpernode,
                "nprocperjob=i" => \$nprocperjob,
                "seq=s" => \$seq,
                "mpqc=s" => \$mpqc,
                "readdir=s" => \$readdir,
                "outputprefix=s" => \$outputprefix,
                "inputprefix=s" => \$inputprefix,
                "nodefile=s" => \$nodefile,
                "nodes=s" => \$nodes,
                "nodename=s" => \$nodename,
                "exclude=s" => \$exclude,
                "help!" => \$help,
                "printfiles!" => \$printfiles,
                "rerun!" => \$rerun,
                "onlynew!" => \$onlynew,
                "uniqout!" => \$uniqout,
                "autoout!" => \$autoout,
                "simpout!" => \$simpout,
                "objdir!" => \$objdir,
                "count!" => \$count,
                "debug!" => \$debug,
                "verbose!" => \$verbose,
      )) {
    $help=1;
}

if ("$launch" eq '@' . 'LAUNCH@') {
    if ("$messagegrp" eq "mpi") {
        $launch = "mpirun [-hf %NODEFILE%] -n %NPROC% %MPQCRUNPROC% %MPQC% [-o %OUTPUT%] %INPUT%";
    }
    else {
        $launch = "%MPQC% [-o %OUTPUT%] %INPUT%";
    }
}

######################################################################

if ($help) {
    print  "Usage: $ARGV[0] [options] [file1.in] [file2.in] ...\n";
    print  "Options:\n";
    print  "  --mpqc path        the mpqc executable to use (value: $mpqc)\n";
    print  "  --objdir           run the mpqc exec in the object dir (value: $objdir)\n";
    print  "  --small            skip big runs in verification suite (value: $small)\n";
    print  "  --nnodeperjob n    run with n nodes per job (value: $nnodeperjob)\n";
    print  "  --nprocpernode n   run with n procs per node (value: $nprocpernode)\n";
    print  "  --nprocperjob n    run with n procs in each job (value: $nprocperjob)\n";
    print  "  --nthreadperproc n use n threads per process (value: $nthreadperproc)\n";
    print  "  --threadgrp grp    use the given threading layer (value: $threadgrp)\n";
    print  "                     none: uses MPQC's default\n";
    print  "                     proc: does a single threaded run\n";
    print  "                     posix: use POSIX threads\n";
    print  "  --messagegrp grp   use the given communication layer (value: $messagegrp)\n";
    print  "                     none: uses MPQC's default\n";
    print  "                     proc: does a single processor run\n";
    print  "                     mpi: use MPI\n";
    print  "  --memorygrp grp    use the given remote memory layer (value: $messagegrp)\n";
    print  "                     none: uses MPQC's default\n";
    print  "                     proc: does a single processor run\n";
    print  "                     mtmpi: use multi-threaded MPI\n";
    print  "                     armci: use ARMCI\n";
    print  "  --integral intgl   use the given integral package (value: $integral)\n";
    print  "                     none: uses MPQC's default\n";
    print  "                     intv3: this package is distributed with MPQC\n";
    print  "                     cints: the libint package (required for MP2-R12)\n";
    printf "  --launch cmd       use the given cmd to launch jobs--see below (value: %s)\n",
          (($launch eq "")?"<not set>":$launch);
    printf "  --nodefile file    a file listing nodes to use (value: %s)\n",
          (($nodefile eq "")?"<not set>":$nodefile);
    printf "  --nodes nodes      a command line list of machines to use (value: %s)\n",
          (($nodes eq "")?"<not set>":$nodes);
    printf "                     groups can be given as 8-10,12,15-17 for example\n";
    printf "  --nodename fmt     converts node num to name (value: %s)\n", $nodename;
    printf "  --seq regex        run inputs matching reqex sequentially (value: %s)\n",
          (("$seq" eq "")?"<not set>":"$seq");
    printf "  --exclude regex    exclude files matching regex (value: %s)\n",
          (("$exclude" eq "")?"<not set>":"$exclude");
    printf "  --readdir dir      run mpqc on dir/*.in files (value: %s)\n",
          (($readdir eq "")?"<not set>":$readdir);
    print  "  --count            print the number of input files that would be run\n";
    print  "  --printfiles       print the list of input files that would be run\n";
    print  "  --rerun            overwrite output file, even if up-to-date\n";
    print  "  --onlynew          do not overwrite output file, even if not up-to-date\n";
    print  "  --uniqout          generate unique output filenames\n";
    print  "  --autoout          generate output filenames\n";
    print  "  --verbose          print out what action is to be taken on each file\n";
    print  "  --debug            don't actually run mpqc\n";
    print  "  --help             print this help\n";
    print  "\n";
    print  "The launch command can contain special strings that will be substituted.\n";
    print  "These are:\n";
    print  "  %MPQC%     The MPQC program.\n";
    print  "  %INPUT%    The input filename.\n";
    print  "  %OUTPUT%   The output filename.\n";
    print  "  %NPROC%    The number of processes to start.\n";
    print  "  %NODEFILE% The name of a file containing the node names.\n";
    print  "  %NODELIST% A comma separated list of node names.\n";
    print  "  For these last two, if they are contained within square brackets\n";
    print  "  and a substitution is not available, then everything within the\n";
    print  "  the brackets is removed.\n";
    print  "Examples of the launch argument:\n";
    print  "  mpirun [-hf %NODEFILE%] -n %NPROC% %MPQC% [-o %OUTPUT%] %INPUT%\n";
    print  "  mpirun [-H %NODELIST%] -n %NPROC% %MPQC% [-o %OUTPUT%] %INPUT%\n";
    exit 0;
}

######################################################################

if ($objdir) {
    $mpqc = $mpqcobj;
}

######################################################################

@nodelist = ();
if ($nodes ne "") {
    $nodes =~ s/-/../g;
    foreach my $i (eval $nodes) {
        $nodelist[$#nodelist + 1] = sprintf "$nodename", $i;
    }
}
elsif ("$nodefile" eq "" && exists($ENV{"PBS_NODEFILE"})) {
    $nodefile=$ENV{"PBS_NODEFILE"};
}
if ("$nodefile" ne "" && -f "$nodefile") {
    my %nodesfound = {};
    open(NODEFILE,"<$nodefile");
    while(<NODEFILE>) {
        if (/(\S+)/) {
            my $nodename = $1;
            if (!exists($nodesfound{$nodename})) {
                $nodelist[$#nodelist + 1] = $nodename;
                $nodesfound{$nodename} = 1;
            }
        }
    }
    close(NODEFILE);
}

if ($#nodelist == -1) {
    $nnode = 1;
}
else {
    $nnode = $#nodelist + 1;
}

if ($nnodeperjob eq "nnode") {
    $nnodeperjob = $nnode;
}

if ($nprocperjob eq "all") {
    $nprocperjob = $nnodeperjob * $nprocpernode;
}

if ($messagegrp eq "none" && $nprocperjob > 1) {
    $messagegrp = "mpi";
}

if ($messagegrp eq "proc") {
  $ENV{"MESSAGEGRP"} = "<ProcMessageGrp>:()";
}
elsif ($messagegrp eq "mpi") {
  $ENV{"MESSAGEGRP"} = "<MPIMessageGrp>:()";
}

$nnodeperjob = POSIX::ceil($nprocperjob / $nprocpernode);

@jobnodes = ();
%jobnnodes = {};
%nodelist = {};
%nodefile = {};
$maxjobs = 0;
while (($maxjobs + 1) * $nnodeperjob <= $nnode) {
    $jobnnodes[$maxjobs++] = $nnodeperjob;
}

if ($maxjobs == 0) {
    die "requested $nnodeperjob nodes but have $nnode nodes";
}

$nodesbegin = 0;
foreach my $i (0..$maxjobs-1) {
    my $nodesend = $nodesbegin + $jobnnodes[$i];
    my @slice = (@nodelist)[$nodesbegin..($nodesend-1)];
    $jobnodes{$i} = \@slice;
    $nodesbegin = $nodesend;
    foreach my $j (@slice) {
        if ($nodelist{$i} eq "") { $nodelist{$i} = $j; }
        else { $nodelist{$i} = sprintf "%s,%s", $nodelist{$i}, $j; }
    }
    $nodefile{$i} = ".tmp.nodefile.$$.$i";
    open(NODEFILE,">" . $nodefile{$i});
    foreach my $proc (1..$nprocpernode) {
      foreach my $j (@slice) {
          printf NODEFILE "%s\n", $j;
      }
    }
    close(NODEFILE);
}

######################################################################

if ($threadgrp eq "none" && $nthreadperproc > 1) {
    $threadgrp = "posix";
}

if ($threadgrp eq "proc") {
  $ENV{"THREADGRP"} = "<ProcThreadGrp>:()";
}
elsif ($threadgrp eq "posix") {
  $ENV{"THREADGRP"} = "<PthreadThreadGrp>:(num_threads=$nthreadperproc)";
}

######################################################################

if ($memorygrp eq "proc") {
  $ENV{"MEMORYGRP"} = "<ProcMemoryGrp>:()";
}
elsif ($memorygrp eq "mtmpi") {
  $ENV{"MEMORYGRP"} = "<MTMPIMemoryGrp>:()";
}
elsif ($memorygrp eq "armci") {
  $ENV{"MEMORYGRP"} = "<ARMCIMemoryGrp>:()";
}

######################################################################

if ($integral eq "cints") {
  $ENV{"INTEGRAL"} = "<IntegralCints>:()";
}
elsif ($integral eq "intv3") {
  $ENV{"INTEGRAL"} = "<IntegralV3>:()";
}

######################################################################

$usingthreads = 0;
if ($maxjobs > 1) {
    require threads;
    require threads::shared;
    $usingthreads = 1;
}

######################################################################

# autoflush output
$| = 1;

@allfiles = reverse(get_file_list());

my @seqfiles : shared = ();
my @files : shared = ();
foreach my $file (@allfiles) {
    if ("$seq" ne "" && ($file =~ /$seq/)) {
        $seqfiles[$#seqfiles+1] = $file;
    }
    else {
        $files[$#files+1] = $file;
    }
}

if ($count) {
    printf "%d\n", $#allfiles + 1;
    exit;
}

if ($printfiles) {
    foreach my $i (@allfiles) {
        printf "%s\n", "$i";
    }
    exit;
}


printf "Running a maximum of %d jobs at a time.\n", $maxjobs;
printf "Running %d processes per job.\n", $nprocperjob;
printf "Running %d threads per process.\n", $nthreadperproc;
foreach my $i (0..$maxjobs-1) {
    print "Nodes in slot $i:";
    foreach my $j (@{$jobnodes{$i}}) {
        printf " \"%s\"", $j;
    }
    print "\n";
}
printenvvar("MESSAGEGRP");
printenvvar("THREADGRP");
printenvvar("MEMORYGRP");
printenvvar("SCLIBDIR");
printenvvar("INTEGRAL");

$thecount = 0;
$n = 0;

if ($usingthreads) {
    @threads = ();

    foreach my $jobnum (0..$maxjobs-1) {
        my $thr = threads->new(\&jobrunner, $jobnum);
        $threads[$#threads+1] = $thr;
    }

    foreach my $thr (@threads) {
        $thr->join();
    }

    @threads = ();
}
else {
    foreach my $jobnum (0..$maxjobs-1) {
        jobrunner($jobnum);
    }
}

foreach $i (values(%nodefile)) {
    unlink "$i";
}

sub get_next_file {
    my $jobnum = shift;
    lock(@seqfiles) if ($usingthreads);
    if ($#seqfiles >= 0 && $jobnum == 0) {
        return pop(@seqfiles);
    }
    lock(@files) if ($usingthreads);
    if ($#files >= 0) {
        return pop(@files);
    }
    return "";
}

sub jobrunner {
    my $jobslot = shift;

    my $file;

    while ( ($file = get_next_file($jobslot)) ne "") {
        my $out = outfile("$file");
        $out = "$outputprefix$out";
        my $in = "$inputprefix$file";
        my $cmd = "$launch";
        $cmd =~ s/%MPQC%/$mpqc/;
        $cmd =~ s/%NPROC%/$nprocperjob/;
        $cmd =~ s/%INPUT%/$in/;

	my $mpqcrunproc = "$scdatadir/mpqcrunproc $mpqc";
        if (exists($ENV{MESSAGEGRP})) {
           $mpqcrunproc = "$mpqcrunproc " . &isoencode("$ENV{MESSAGEGRP}");
        }
        else {
           $mpqcrunproc = "$mpqcrunproc none";
        }
        if (exists($ENV{THREADGRP})) {
           $mpqcrunproc = "$mpqcrunproc " . &isoencode("$ENV{THREADGRP}");
        }
        else {
           $mpqcrunproc = "$mpqcrunproc none";
        }
        if (exists($ENV{MEMORYGRP})) {
           $mpqcrunproc = "$mpqcrunproc " . &isoencode("$ENV{MEMORYGRP}");
        }
        else {
           $mpqcrunproc = "$mpqcrunproc none";
        }
        if (exists($ENV{INTEGRAL})) {
           $mpqcrunproc = "$mpqcrunproc " . &isoencode("$ENV{INTEGRAL}");
        }
        else {
           $mpqcrunproc = "$mpqcrunproc none";
        }
        $cmd =~ s|%MPQCRUNPROC%|$mpqcrunproc|;

        $cmd = substitute_optional_parameter($cmd, "%OUTPUT%", "$out");
        $cmd = substitute_optional_parameter($cmd, "%NODELIST%", $nodelist{$jobslot});
        $cmd = substitute_optional_parameter($cmd, "%NODEFILE%", $nodefile{$jobslot});

        printf "starting in slot %d: %s\n", $jobslot, "$cmd";
        $cmd = "true" if ($debug);
        $pid = fork();
        if ($pid == 0) {
            exec("$cmd");
            die "exec returned";
        }
        waitpid($pid,'');
    }
}

sub get_file_list {
    my @dirfiles;
    my @argfiles;

    if ($readdir ne "") {
        opendir(DIR,"$readdir");
        @tdirfiles = sort(readdir(DIR));
        foreach my $j (@tdirfiles) {
            if ($j =~ /\.in$/) {
                $dirfiles[$#dirfiles+1] = $j;
            }
        }
        closedir(DIR);
    }

    @argfiles = sort(@ARGV);

    my @allfiles = (@dirfiles, @argfiles);

    my @files;

    foreach my $infile (@allfiles) {
        my $out = outfile("$infile");
        $out = "$outputprefix$out";
        $in = "$inputprefix$infile";
        $issmall = (!($in =~ /ccpc?v[dtq5]z/));
        if ($in =~ /^(cl|hsos|u)scf_.*6311gss/) {
            $issmall = 0;
        }
        if ($in =~ /^basis2_/) {
            $issmall = 0;
        }
        if ($in =~ /^orthog_.*(hfs|zapt2|mp2)/) {
            $issmall = 0;
        }
        if ($in =~ /^dft_/) {
            $issmall = 0;
        }
        if ($in =~ /^symm1_cub/) {
            $issmall = 0;
        }
        if ($in =~ /^methods_/) {
            $issmall = 0;
        }
        if ($in =~ /^basis[12]_.*pc[234]/) {
            $issmall = 0;
        }
        if ($in =~ /^mbpt_mp2r12_c6h6_multipass/) {
            $issmall = 0;
        }
        if ($exclude ne "" && $in =~ /$exclude/) { 
            if ($verbose) {
                print "$in: excluded by --exclude option\n";
            }
        }
        if (!$rerun
            && (-f "$out")
            && ($onlynew
                || (-M "$out" < -M "$in" && (! -f "$mpqc" || -M "$out" < -M "$mpqc")))) {
            if ($verbose) {
                print "$in: skipping: $out up-to-date\n";
            }
        }
        elsif ($small && ! $issmall) {
            if ($verbose) {
                print "$in: skipping due to --small option\n";
            }
        }
        else {
            if ($verbose) {
                print "$in: will be run\n";
            }
            $files[$#files+1] = "$infile";
        }
    }

    return @files;
}

sub outfile {
    my $in = shift;

    my $outbase = "$in";
    $outbase =~ s/\.[^.]*$//;
    if ($simpout) {
        $outbase = sprintf "%s.out", "$outbase";
    }
    else {
        $outbase = sprintf "%s.out.%03d.%02d.%02d", "$outbase",
                           $nnodeperjob, $nprocpernode, $nthreadperproc;
    }

    my $out;
    if ($uniqout) {
        $out = "$outbase";
        my $outversion = 1;
        while (-f "$out") {
            $outversion++;
            $out = sprintf "%s.%02d", "$outbase", $outversion;
        }
    }
    elsif ($autoout) {
        $out = "$outbase";
    }
    else {
        $out = "";
    }

    return "$out";
}

sub printenvvar {
    my $envvar = shift;
    if (exists($ENV{$envvar})) {
        printf "Using %s = \"%s\"\n", $envvar, $ENV{$envvar};
    }
}

sub substitute_optional_parameter {
    my $str = shift;
    my $name = shift;
    my $value = shift;
    if ($value ne "") {
        $str =~ s/\[([^[]*$name[^[]*)\]/$1/;
        $str =~ s/$name/$value/;
    }
    else {
        $str =~ s/\[([^[]*$name[^[]*)\]//;
    }
    return $str;
}

sub isoencode {
    my $str = shift;
    $str =~ s/ /%20/g;
    $str =~ s/\</%3c/g;
    $str =~ s/\>/%3e/g;
    $str =~ s/\[/%5b/g;
    $str =~ s/\]/%5d/g;
    $str =~ s/\$/%24/g;
    $str =~ s/:/%38/g;
    $str =~ s/\(/%28/g;
    $str =~ s/\)/%29/g;
    return $str;
}
