#!/usr/bin/env perl
#--------------------------------------------------------------------
#    The MB-system:  mbm_makedatalist.perl  27/1/2006
#
#    Copyright (c) 2006-2023 by
#    David W. Caress (caress@mbari.org)
#      Monterey Bay Aquarium Research Institute
#      Moss Landing, California, USA
#    Dale N. Chayes 
#      Center for Coastal and Ocean Mapping
#      University of New Hampshire
#      Durham, New Hampshire, USA
#    Christian dos Santos Ferreira
#      MARUM
#      University of Bremen
#      Bremen Germany
#      
#    MB-System was created by Caress and Chayes in 1992 at the
#      Lamont-Doherty Earth Observatory
#      Columbia University
#      Palisades, NY 10964
#
#    See README.md file for copying and redistribution conditions.
#--------------------------------------------------------------------
#
# Command:
#   mbm_makedatalist
#
# Purpose:
#   Macro to generate an MB-System datalist file referencing all
#   identifiable swath files in the specified directory. If no directory
#   is specified with the -I option, then the current directory is used.
#   The resulting datalist will be named datalist.mb-1 by default.
#
# Basic Usage:
#   mbm_makedatalist [-Idirectory -Odatalist -H -V]
#
# Author:
#   David W. Caress
#   Monterey Bay Aquarium Research Institute
#   Moss Landing, CA 95039
#   January 27, 2006
#
#
#
$program_name = "mbm_makedatalist";

# Deal with command line arguments
$command_line = "@ARGV";
&MBGetopts('HhB:b:F:f:I:i:LlO:o:PpS:s:TtVv');

$size_threshold =      ($opt_B || $opt_b || 0);
$format_specified =    ($opt_F || $opt_f);
$help =                ($opt_H || $opt_h);
$directory =           ($opt_I || $opt_i);
$skiplatest =          ($opt_L || $opt_l);
$datalist =            ($opt_O || $opt_o || "datalist.mb-1");
$ignoreprocessed =     ($opt_P || $opt_p);
$suffix =              ($opt_S || $opt_s);
$disablesorting =      ($opt_T || $opt_t);
$verbose =             ($opt_V || $opt_v);

# print out help message if required
if ($help) {
  print "\n$program_name:\n";
  print "Macro to generate an MB-System datalist file referencing all\n";
  print "identifiable swath files in the specified directory. If no directory\n";
  print "is specified with the -I option, then the current directory is used.\n";
  print "The resulting datalist will be named datalist.mb-1 by default.\n";
  print "\fBMbm_makedatalist\fP is a macro to generate an MB-System datalist file\n";
  print "referencing all identifiable swath files in the specified target directory.\n";
  print "Datalists are fundamental structures in \fBMB-System\fP workflows because they\n";
  print "allow programs to operate on sets of swath data files.\n";
  print "Datalist files are text lists of swath data files and their format ids with each\n";
  print "file entry taking up a single line. These lists may contain references to other\n";
  print "datalists, making them recursive. Datalists may also contain comments and parsing\n";
  print "directives that, for example, determine whether parsing returns references to\n";
  print "raw or processed data files. See the \fBMB-System\fP  manual page for details\n";
  print "on the format and structure of datalists.  \n";
  print "\nBasic Usage:\n\tmbm_makedatalist [-Bsize -Fformat -Idirectory -L -Odatalist -P -Ssuffix -T -H -V]\n";
  exit 0;
}

# tell the world we got started
if ($verbose) {
  print "\nRunning $program_name...\n\n";
  if ($directory) {
        print " - Checking for swath files in directory $directory\n";
    } else {
    print " - Checking for swath files in the current directory\n";
  }
  if ($suffix) {
        print " - Checking for files with suffix $suffix\n";
    if ($ignoreprocessed) {
            print " - Ignoring files with specified suffix preceded by letter p, e.g. p$suffix\n";
        }

    } else {
    print " - Checking all files for those that meet swath data naming conventions\n";
  }
  if ($disablesorting) {
    if ($suffix eq ".all" || $suffix eq ".ALL" || $suffix eq ".KMALL"
            || $suffix eq ".kmall" || $suffix eq ".mb56" || $suffix eq ".mb57"
            || $suffix eq ".mb58" || $suffix eq ".mb59") {
      print " - Sorting Kongsberg multibeam files into time order is disabled.\n"
        }
    else {
      print " - Request to disable time sorting of Kongsberg multibeam files ignored because a Kongsberg multibeam file\n   suffix (.all or .ALL) has not been specified with the -S option\n"
    }
  } else {
    if ($suffix eq ".all" || $suffix eq ".ALL" || $suffix eq ".KMALL" || $suffix eq ".kmall"
      || $suffix eq ".mb56" || $suffix eq ".mb57" || $suffix eq ".mb58" || $suffix eq ".mb59") {
      print " - Attempting to sort Kongsberg multibeam files into time order based on filenames.\n"
    }
  }
  if ($format_specified) {
        print " - Assigning format id $format to all files\n";
    } else {
    print " - Using format ids consistent with filenames\n";
  }
}

# get list of files in specified directory, with desired suffix if specified
if ($directory) {
    if ($suffix) {
        $lsarg = "$directory/*$suffix";
    } else {
        $lsarg = "$directory/*";
    }
} elsif ($suffix) {
    $lsarg = "*$suffix";
} else {
    $lsarg = "";
}

if ($ignoreprocessed && $suffix) {
    @filelistraw = `/bin/ls -1 -s $lsarg | grep -v p$suffix`;
} else {
    @filelistraw = `/bin/ls -1 -s $lsarg`;
}

$size_threshold = 2 * $size_threshold;
foreach $line (@filelistraw) {
  ($size, $file) = $line =~ /(\S+)\s+(\S+)/;
  if ($size >= $size_threshold) {
    push(@filelist, $file);
  }
}

# loop over the list of files counting identifiable swath files
$count = 0;
foreach $file (@filelist) {
  if ($format_specified) {
    $format = $format_specified;
  } else {
    $format = `mbformat -L -I $file`;
    chop $format;
  }

  if (($suffix eq ".all" || $suffix eq ".ALL")
      && ($file eq "9999.all" || $file eq "9999.ALL")) {
print "File ignored:   file:$file format:$format\n";
  } elsif ($format != 0 && ($file ne $datalist || $directory ne ".")) {
    $count++;
    push(@outfilelist, $file);
    push(@outformatlist, $format);
    if ($verbose) {
      print "Adding to list: file:$file format:$format\n";
    }
  }
}

# If the suffix is *.all and -T is specified, attempt to time order the filenames
if (($suffix eq ".all" || $suffix eq ".ALL" || $suffix eq ".mb56"
     || $suffix eq ".mb57" || $suffix eq ".mb58" || $suffix eq ".mb59"
     || $suffix eq ".kmall" || $suffix eq ".KMALL" || $suffix eq ".mb261")
        && !$disablesorting) {
  # Loop over the list of files
  # Some Kongsberg SIS files look like 0124_20100908_191912_Healy.all with the
  # timestamp to seconds and others are like 0250_20220620_2201_sentrye.kmall
  # with the timestamp to minutes. The $case variable keeps track of which is
  # present
  $case = 0;
  for ($i = 0; $i < scalar(@outfilelist); $i++) {
    $file = $outfilelist[$i];
    $format = $outformatlist[$i];
    ($start, $yyyymmdd, $hhmmss, $end) = $file =~ /(\d{4})_(\d{8})_(\d{6})(\S+)/;
    if ($yyyymmdd == "" || $hhmmss == "") {
      ($start, $yyyymmdd, $hhmmss, $end) = $file =~ /(\d+)_(\d+)_(\d+)_(\S+)/;
      $case = 1;
    }
    $tmpname = $yyyymmdd . "^" . $hhmmss . "^" . $start . "^" . $end . "^" . $i . "^" . $format;
    push(@sortfilelist, $tmpname);
    $count++;
  }
  @sortedfilelist = sort @sortfilelist;
    $ii = 0;
  foreach $tmpname (@sortedfilelist) {
    ($yyyymmdd, $hhmmss, $start, $end, $i, $format) = $tmpname =~ /(\d+)\^(\d+)\^(\d+)\^(\S+)\^(\S+)\^(\S+)/;
    if ($case == 0) {
      $file = $start . "_" . $yyyymmdd . "_" . $hhmmss . $end;
    } else {
      $file = $start . "_" . $yyyymmdd . "_" . $hhmmss . "_" . $end;
    }
    if ($directory) {
        $outfilelist[$ii] = "$directory/$file";
    } else {
        $outfilelist[$ii] = $file;
    }
    $outformatlist[$ii] = $format;
    $ii++;
  }
}

# open output datalist
$count = scalar(@outfilelist);
if ($skiplatest && $count > 0) {
  $count = $count - 1;
}
if ($count > 0) {
  if (!open(DATALIST,">$datalist")) {
    die "\n$program_name:\nUnable to output output datalist file $datalist\nExiting...\n";
  }

  # loop over the list of files
  if ($verbose) {
        print "\nOutputting $count file listings to datalist file $datalist\n";
    }
  for ($i = 0; $i < $count; $i++) {
    print DATALIST "$outfilelist[$i] $outformatlist[$i]\n";
    if ($verbose) {
            print "$outfilelist[$i] $outformatlist[$i]\n";
        }
    }
  if ($verbose) {
        print "\nAll done!\n\n";
  }
  close DATALIST;

} else {
  if ($verbose) {
        print "No swath files identified therefore no datalist created...\n";
  }
}

# done
exit 0;

#-----------------------------------------------------------------------
# This version of Getopts has been augmented to support multiple
# calls to the same option. If an arg in argumentative is followed
# by "+" rather than ":",  then the corresponding scalar will
# be concatenated rather than overwritten by multiple calls to
# the same arg.
#
# Usage:
#      do Getopts('a:b+c'); # -a takes arg, -b concatenates args,
#          # -c does not take arg. Sets opt_* as a
#                           # side effect.

sub MBGetopts {
    local($argumentative) = @_;
    local(@args,$_,$first,$rest);
    local($errs) = 0;

    @args = split( / */, $argumentative );
    while(@ARGV && ($_ = $ARGV[0]) =~ /^-(.)(.*)/) {
  ($first,$rest) = ($1,$2);
  $pos = index($argumentative,$first);
  if($pos >= $[) {
      if($args[$pos+1] eq ':') {
    shift(@ARGV);
    if($rest eq '') {
        ++$errs unless @ARGV;
        $rest = shift(@ARGV);
    }
    eval "\$opt_$first = \$rest;";
    eval "\$flg_$first = 1;";
      }
      elsif($args[$pos+1] eq '+') {
    shift(@ARGV);
    if($rest eq '') {
        ++$errs unless @ARGV;
        $rest = shift(@ARGV);
    }
    if (eval "\$opt_$first") {
        eval "\$opt_$first = \$opt_$first
        . \":\" . \$rest;";
    }
    else {
        eval "\$opt_$first = \$rest;";
    }
    eval "\$flg_$first = 1;";
      }
      elsif($args[$pos+1] eq '%') {
    shift(@ARGV);
    if($rest ne '') {
        eval "\$opt_$first = \$rest;";
    }
    else {
        $rest = $ARGV[0];
        ($one) = $rest =~ /^-(.).*/;
        $pos = index($argumentative,$one);
        if(!$one || $pos < $[) {
      eval "\$opt_$first = \$rest;";
      shift(@ARGV);
        }
    }
    eval "\$flg_$first = 1;";
      }
      else {
    eval "\$opt_$first = 1";
    eval "\$flg_$first = 1;";
    if($rest eq '') {
        shift(@ARGV);
    }
    else {
        $ARGV[0] = "-$rest";
    }
      }
  }
  else {
      print STDERR "Unknown option: $first\n";
      ++$errs;
      if($rest ne '') {
    $ARGV[0] = "-$rest";
      }
      else {
    shift(@ARGV);
      }
  }
    }
    $errs == 0;
}
#-----------------------------------------------------------------------
