#!/usr/local/bin/perl -w 

### $Id$
### Author: Glenn Hamell 
### NAME: gen_LET_avg.pl 
###
### This software is Copyright (C) 2006,
### ACE Science Center, SRL, California Institute of Technology.
### All rights reserved. Unauthorized reproduction prohibited.
###
### The software is provided to you without charge,
### and with no warranty. You may give away copies of
### this software, including sources, provided that
### this notice is included in all the files.
###
sub usage { print <<eod;
###
### USAGE:  gen_LET_avg.pl 10MINAV/HOURLYAV/DAILYAV, year, s_doy, e_doy, species, 
###		     SC, <srcdir>, <hdrsfil>, <nbinsfil>, <destdir>, version, chkdythresh
###
###	<srcdir>   Full path to directory containing the source data.
###			e.g. /home/stereo/LETL1Data/ahead/2006/ 
###
###	<hdrsfil>  Fully qualified file name (path & filename) of file
###			containing the collection of header lines. 
###			e.g. /home/stereo/LETL1Data/LET_1hr_hdrs.txt 
###
###	<nbinsfil> Fully qualified file name (path & filename) of file
###			containing the number of bins for respective species. 
###			e.g. /home/stereo/LETL1Data/LET_1hr_nbins.txt 
###
###	<destdir>  Full path to directory where output file is to be placed.
###			e.g. /home/stereo/LETL1Data/ahead/2006/
eod
}
### PURPOSE:
###	Write an IDL or PERL routine to compute hourly or daily averaged intensities, 
###	reading in all the files for a given year-doy span(inclusive), and 
###	outputting the results to a yearly output file. 
###
### INPUT:
###	In   /home/stereo/LETL1Data/ahead/2006 (and 
###	also /home/stereo/LETL1Data/behind/2006)
###	are ascii files containing 1-minute averages of STEREO LET data.
###	Currently, only files containing Proton (H) data are present, but 
###	other species will be there in the future.
###	The filenames are in the format 
###		Species_SC_YYYY_DDD_level1_VV.txt
###	where Species is one of H, He3, He4, C, N, O, etc
###	      YYYY_DDD is year and day-of-year
###	      VV is a version number (01, 02, etc)
###	Each file has a header that describes the data, and 
###	a "BEGIN DATA" line that marks the beginning of the data records.
###	For the H data, there are 12 energy bins, but for other species 
###	the number of energy bins will be different.
###
### OUTPUT:
###	
###
### RETURN:
###	
###
### SPECIFICATION EVOLUTION / MODIFICATION HISTORY: 
### ==============================================
### 2006Dec05-Glenn Hamell Created. 
### 2006Dec14-GRH-Ver0 ready - Only missing production version of averaging.
### 2006Dec15-08:01GRH-Add'l specs fm Dr. Andrew Davis:
###	HI Glenn,
###	Output filename: H_SC_YYYY_1hr_level1_VV.txt
###	         where SC is either "ahead" or "behind".
###	You can add an argument on the commandline to indicate SC 
###	if you like.
###	
###	Remove nbins from the commandline args, and instead read in an
###	external table containing the nbins values for each species.
###	Use this to also build the fill-data string for each species, 
###	rather than hard-wiring it.
###	
###	For fill-record, the value for floating-point field should be 
###	-9999.9, not -999.9
###	The fractional part of DOY should correspond to the start of
###	the hour.
###	
###	I'll send you the averaging algorithm in a separate email.
###	
###	Cheers,
###	Andrew
### 2006Dec-15-10:50-Add'l info fm Dr.Davis:
###	I am trying to get a resolution on the preferred averaging method: 
###	see my email to the LET folks below...
###
###	Note that Ii = Ci / ( E * Li * Gi ) corresponds to the intensity 
###	values present in the 1-minute input files, and Ci corresponds to 
###	the counts values in those files.
###
###	>Date: Fri, 15 Dec 2006 10:56:50 -0800
###	>To: cohen@srl.caltech.edu, ral@srl.caltech.edu, 
###	>rmewaldt@srl.caltech.edu, ace@srl.caltech.edu, labrador@srl.caltech.edu
###	>From: Andrew Davis <ad@srl.caltech.edu>
###	>Subject: Calculating average fluxes for LET
###	>
###	>I still await written instructions from Rick Leske/Christina Cohen 
###	>for computing hourly/daily average intensities (and uncertainties) 
###	>from the 1-minute LET data.
###	>
###	>To spur a resolution, here is a simple intensity-averaging method, 
###	>which is probably not what you want. Please write down your preferred 
###	>method in these terms and deliver it to me.
###	>
###	>For a given minute, i, and a given species/energy bin:
###	>counts = Ni
###	>geom_fac = Gi   (time-dependent due to dynamic-threshold effects)
###	>Livetime = Li
###	>Ebin_width = E
###	>Intensity = Ii = Ci / ( E * Li * Gi )
###	>
###	>Let the number of minutes in the averaging interval  = M
###	>
###	>Therefore, the average intensity = I = Sum(Ii)/M
###	>
###	>Another possibility is I = (1/E) * Sum(Ci /Gi) / Sum(Li)
###	>
###	>Let me know how you really want the averages intensity calculated, 
###	>and its associated uncertainty.
###	>
###	>Andrew
###
### 2006Dec18-GRH-Mods incorporated per Dr.D.
### 2006Dec18-GRH-Added following species to the nbins file:
###		Ne, Mg, Si, S, Ar, Ca, Fe
###
### 2006Dec18-10:17-GRH-New specs fm Dr. Andrew Davis:
###	Do not average the livetime - 
###	report total livetime, do not divide by nlines.
###
### (A)	The average intensity should be computed very simply as Sum(Ii)/M, 
###	where Ii are the 1-minute intensities. 
###	and M is the number of 1-minute records in the averaging interval. 
###	I think you are already doing this...
###
###	The counts should be totalled, but NOT averaged, 
###	i.e. report total counts in each bin, not average counts, 
###	in the output file.
###
###	Add $nbins new "Uncertainty" fields to the output, and, for now, 
###	compute these fields as 1/sqrt(total counts).
###	This formula for calculating the uncertainty fields will be 
###	improved soon...
###
###	So, the output file should now contain
###	6 time fields,
###	1 total livetime field
###	$nbins average intensity fields
###	$nbins total counts fields
###	$nbins uncertainty fields (1/sqrt(total counts))
###
###	Finally, the text header needs modification:
###	         We are not including column 6 (dynamic threshold state)
###	         The counts are counts/hour
###	         There are $nbins new Uncertainty fields
###	If you like, you can create a new static header file 
###	containing the header info (one file for each species), 
###	excepting the File creation timestamp.
###
### 2006Dec18-14:55-GRH-Uncertainty formula fm Dr. Andrew Davis:
###	Hi Glenn.
###	We have a formula now for the uncertainties:
###
###	U(I) = sqrt ( sum ( Ii^2/Ni ) ) / M
###
###	U(I) is the uncertainty in the calculated average intensity
###	Ii is the intensity for minute i in one of the energy bins
###	Ii^2 = Ii * Ii  (in case the notation is confusing...)
###	Ni is the counts for minute i in one of the energy bins
###	M is the number of 1-minute records in the averaging interval.
###
###	Cheers,
###	Andrew
###
### 2006Dec19 14:35-GRH-Add'l spec info fm Dr.Davis:
###	I should have defined M as "the number of GOOD 1-minute records 
###	in the averaging interval".
###
###	Also, unfortunately, there is the possibility that the 1-minute data 
###	can contain records where some of the intensities are good, and some 
###	are bad.  I do not think any such records exist yet, but in the 
###	future we may decide a particular species/energy-range bin is no good 
###	in general, in which case the 1-minute data would be FILL for that bin 
###	(and OK for other bins for that species).
###
###	***BYTHEWAY***
###	In the software that generates the 1-minute data, I have defined the
###	following constants for FILL data:
###	FILL_L = -2147483647 ; 32-bit integer fill-data
###	FILL_D = -1.0E31 ; double fill-data
###
###	These are obviously different from the -9999 and -9999.9 values that
###	I have asked you to use.
###
###	Andrew
###
### 2007Jan02-17:09-GRH-Add'l spec info fm Dr.Davis:
###	Subject: LET hourly files header material
###	Here is an update on how the contents of the header material
###	should look.
###
###	The Timestamp and Version should be inserted by your program.
###
###	         File created: Tue Dec 26 11:57:38 PST 2006
###	         Created by: Caltech Space Radiation Lab
###	         Version: 01
###	         Time Resolution: 1 hour
###	         Column 1: Year
###	         Column 2: Day of year (fractional)
###	         Column 3: Hour of day
###	         Column 4: Minute of hour
###	         Column 5: Second of minute
###	         Column 6: LET Livetime (seconds)
###	         Column 7 - 18: 12 proton intensities (1/(cm^2 s sr MeV/nuc))
###	                 Probably some info inserted here about the energy 
###			 bin boundaries
###	         Column 19 - 30: 12 proton counts (counts/hour)
###	         Column 31 - 42: 12 proton statistical 
###				    uncertainties  (1/(cm^2 s sr MeV/nuc))
###	         BEGIN DATA
###
### 2007Jan02-GRH- per phone con w/Dr.Davis:
###	-Name change of this app.
###	-Single file containing header material (see above)
###	-Path/Name of file containing header material NOT to be hard coded into
###		this app....meaning the fully qualified filename will be 
###		included on the command line.
###
### 2007Jan03 05:02-GRH-Name Change: to "gen_LET_1hr.pl" from "gen_Avg_Inten.pl" 
###
### 2007Jan04-11:26-GRH-Add'l spec (Err Handling info) fm Dr.Davis:
###	Subject: Re: STEREO-LET Error Handling
###	Make the program quiet in general, i.e. no routine progress messages, etc.
###	Send all error messages to stdout, and exit with a non-zero exit code.
###	The calling program will handle it from there.
###	
### 2007Jan08-14:49-GRH-Add'l spec fm Dr.Davis (Zeros in Uncert.calcs):
###	Hi Glenn,
###	An hourly averaged intensity of zero is a valid result, so do not 
###	fill it, leave it as zero.
###	
###	Also, any minute where the Intensity and counts are zero (if one is 
###	zero, the other will be also...)
###	is perfectly valid, and should definitely increment M.
###	
###	If an Uncertainty for a given hour turns out to be zero, then let it be...
###	
###	BTW,
###	If, for a particular minute, the counts value for a particular energy 
###	bin is zero, then make
###	sure the contribution to the Uncertainty is also zero for that 
###	minute. (i.e. let Ii^2/Ni = 0 ).
###	
###	Cheers,
###	Andrew
###	
### 2007Jan18-GRH-Added appropriate handling of Error Condition Messages.
###	Msgs to be directed to STDOUT. The invoking pgm is responsible for
###	text returned to it via STDOUT.
###	
### 2007Jan25-27-GRH-Mod existing cmdline param for species 
###	headers file (LET_1hr_hdrs.txt) to become  <pathdir/filename>.
###	- Add similar cmd line arg <pathdir/filename> for LET_nbins.txt.
###	- Fix pgm output to ensure usage of cmdline param <destdir> .
###	- Fix timestamp values.
###	
### 2007Jan30-GRH-Incorp mods by Dr.Davis into production code
###	-new variable for version ID of THIS code (s/w ID).
###	-add the s/w ID to head of o/p file(s).
###	-fix fmt for livetime in o/p file(s).
###
### 2007Mar09-AJD-Add mod to extract DOY from filenames with slightly
###	different format
###
### 2007Aug13-per AJD - current version of gen_LET_1hr.pl runs with this set of 
###	cmd-line options, producing hourly averages from day 1 of 2007 until
###	end of 2007, or end of input data files
###
###	./gen_LET_1hr.pl HOURLYAV 2007 C ahead input ./LET_1hr_hdrs_ahead.txt ./LET_nbins.txt output 08
###
###
###	New version should accept two new cmd-line options: 
###		start_doy and end_doy, 
###	and should only use input files between year/start_doy and 
###	year/end_doy (inclusive).
###	Glenn should modify the get_filelist() subroutine to implement this 
###	capability, leaving the rest of the application as-is (except for 
###	$parm_check() and the $rp structure).
###
###	Example new run command ( for 2007-010 through 2007-095 )
###	./gen_LET_1hr.pl HOURLYAV 2007 10 95 C ahead input ./LET_1hr_hdrs_ahead.txt ./LET_nbins.txt output 08
###
###	2007Aug20-AJD-Mod to compute daily or hourly averages, depending on command-line param.
###
######################################################

# Version of this software
my $VERSION = 4;

# You MUST remember THIS:  (not the song!!)
#	FALSE: in PERL is 0(zero) or "" or null
#	TRUE : is anything that is not FALSE
use strict;
use Cwd;

#use LEVEL2_SUBS;

### Initialization
my $dbug = 0;		# 0: Production mode
my $n_cmdline_args=12;	# Num Cmd Line Args less one (i.e. 1st arg is number zero)

if($#ARGV < $n_cmdline_args) {	    # Setup default cmdline paramaters
  if( $dbug ) {
    @ARGV=( "HOURLYAV", "2007", "10", "95", "C", "AheaD", "input", 
	"./LET_1hr_hdrs_ahead.txt", 
	"./LET_nbins.txt", 
	"output", "08" );
  } else {
    usage();
    exit(-1);
  }
}

my $rp = init( $dbug );		# Set up & init param hash struct

# Get number of bins for this species
$$rp{nbins} = get_LET_nbins( $$rp{nbinsfil} );
if( $$rp{nbins} == 0 ) {
  printf STDOUT "\nErr: $0: get_LET_nbins():\n";
  printf STDOUT "\tSpecies \"$$rp{species}\" not found in bin table file!\n\n";
  exit(-1);
}

# Get header lines
my @hdr_lines = get_hdr_lines( $rp );
if( $#hdr_lines < 0 ) {
  printf STDOUT "\nErr: $0: get_hdr_lines(): \n";
  printf STDOUT "\tNo header lines returned from get_hdr_lines()\n";
  exit(-1);
}

# filldata - Does NOT contain the time (1st 5) fields.
$$rp{filldata} = gen_filldata( $$rp{nbins} );

# Check parameters on the cmdline
if( ! parm_check() ) {
  printf STDOUT "\nErr: $0: parm_check() \n";
  printf STDOUT "\tError detected on cmdline.\n";
  usage();
  exit(-1);
}

# Make a list of all avail input files (per year, species, version)
my @listofallfiles = get_filelist();


# printf "listofallfiles: $#listofallfiles\n @listofallfiles \n";
printf "listofallfiles: \n @listofallfiles \n" if( $dbug );


if( $#listofallfiles < 1 ) {
  # No files in list!!!
  printf STDOUT "\nErr: $0: get_filelist(): \n";
  printf STDOUT "\tEmpty filename list\n";
  exit(-1);
}


# list of latest versions only
my @listoffiles=CullFilenames( @listofallfiles );

#printf "\nlistoffiles: $#listoffiles\n @listoffiles \n";
printf "\nlistoffiles: \n @listoffiles \n" if( $dbug );

if( $#listoffiles < 1 ) {
  # No files in culled list!!!
  printf STDOUT "\nErr: $0: CullFilenames(): \n";
  printf STDOUT "\tEmpty filename list\n";
  exit(-1);
}

# printf "listoffiles: \n @listoffiles \n";

# check for old versions and issue warnings...
# chk_ver( @listoffiles );


# GENERATE OUTPUT FILENAME
#   OPEN   OUTPUT FILENAME

#	Output filename: H_SC_YYYY_1hr_level1_VV.txt
#	         Path to destination directory is given on cmd line. 
# my $op_filename = "$$rp{species}_$$rp{SC}_$$rp{year}_";
# $op_filename .= "$$rp{s_doy}_$$rp{e_doy}_1hr_level1_$$rp{ver}.txt";

my $op_filename = '';
if ($$rp{avgtype} eq "HOURLYAV") {
   $op_filename = "$$rp{species}_$$rp{SC}_$$rp{year}_1hr_level1_$$rp{ver}.txt";
} elsif ($$rp{avgtype} eq "DAILYAV") {
   $op_filename = "$$rp{species}_$$rp{SC}_$$rp{year}_1day_level1_$$rp{ver}.txt";
} else {
   $op_filename = "$$rp{species}_$$rp{SC}_$$rp{year}_10min_level1_$$rp{ver}.txt";
}

$$rp{destdir} =~ s/\/$// ;		# Remove trailing / if any
$$rp{outfilename} = "$$rp{destdir}/$op_filename";

if( ! open FOUT, ">$$rp{outfilename}" ) {
  printf STDOUT "\nErr: $0: Can't create: $$rp{outfilename} \n\t $!\n";
  exit(-1);
}
	 
print FOUT @hdr_lines;		# Write out HEADER lines

# PRODUCE Avgs for Each Day

foreach my $file ( @listoffiles ) {
  chomp $file;
  $$rp{infilename} = $file;

  # get DOY fm file name
  my @flds = split "_", $file;
  $$rp{DOY} = sprintf "%03d", $flds[ $#flds -2];

  # For year 2011, unset chkdyth flag after LET patch upload
  if ( ($$rp{year} == 2011) && ($$rp{DOY} > 288) ) {
     $$rp{chkdyth} = 0;
  }
  # Set chkdy1521C flag, to kill the public 15-21 MeV/nuc C bin
  # during non-zero dynamic-threshold periods.
  # Note: Summed and Sectored data do not have a species="C"
  if ( (($$rp{year} == 2006)||($$rp{year} >= 2010)) && ($$rp{public}) && ($$rp{species} eq "C") ) {
     $$rp{chkdy1521C} = 1;
  }  

  if ($$rp{avgtype} eq "HOURLYAV") {  
     if( gen_file_hrly_avgd( "$$rp{srcdir}/$file") ) {
     }
  } elsif ($$rp{avgtype} eq "DAILYAV") {
     if( gen_file_daily_avgd( "$$rp{srcdir}/$file") ) {
     }
  } else {
     if( gen_file_10min_avgd( "$$rp{srcdir}/$file") ) {
     }
  }
}
close FOUT;
printf STDOUT "$0: Done.\n\n" if( $dbug );
###------  end  main  -------------------------


###------  start  get_hdr_lines  -----------------------
sub get_hdr_lines {
  # Expected format of the Header File:
  #	SPECIES: H
  #	BEGIN HEADER
  #	File created: Thu Dec 21 20:26:03 PST 2006
  #	Created by: Caltech Space Radiation Lab
  #	Version: 01
  #	Time Resolution: 1 hour
  #	Column 1: Year
  #	Column 2: Day of year (fractional)
  #	Column 3: Hour of day
  #	Column 4: Minute of hour
  #	Column 5: Second of minute
  #	Column 6: LET Livetime (seconds)
  #	Column 7 - 18: 12 proton intensities (1/(cm^2 s sr MeV/nuc))
  #	Column 19 - 30: 12 proton counts (counts/minute)
  #	Column 31 - 42: 12 proton statistical 
  #	                   uncertainties  (1/(cm^2 s sr MeV/nuc))
  #	BEGIN DATA
  #	END HEADER
  #
  my $rp = $_[0];	# retrieve reference to param hash-struct
  my @lines = ();
  my $species = $$rp{species};	# get species of interest
  if( ! open HDRS,"<$$rp{hdrsfil}" ) {
    printf STDOUT "\nErr: $0: get_hdr_lines:\n\tCannot open $$rp{hdrsfil}\n\n";
    exit(-1);
  }
  # scan for species ID line
  my $linein = "";
  my @flds = ();
  
 SPECIES: 
  while( $linein = <HDRS> ) {

    @flds = split ' ', $linein;
    next if($#flds != 1);
#    print "$#flds $flds[0] $flds[1]\n";
    next if( ($flds[0] ne "SPECIES:") || ($flds[1] ne $species) );

    # scan for begin header
    while( $linein = <HDRS> ) {
      next if( $linein !~ "BEGIN HEADER" ) ;

      # capture all lines up to the next END HEADER line
    ENDLINE:
      while( $linein = <HDRS> ) {
	if( $linein !~ "END HEADER" ) {
	  push @lines, $linein;
	  next ENDLINE;
	}
	last SPECIES;	# exit the species loop
      }
    }
  }	# bottom of SPECIES loop
  close HDRS;

  # Make date and version lines current
  $lines[2]="File Created: " . `date`;
  if ($$rp{avgtype} eq "HOURLYAV") {
     $lines[4]="Hourly-averaging Software Version: $$rp{ver_this_sware}\n";
  } elsif ($$rp{avgtype} eq "DAILYAV") {
     $lines[4]="Daily-averaging Software Version: $$rp{ver_this_sware}\n";
  } else {
     $lines[4]="10Min-averaging Software Version: $$rp{ver_this_sware}\n";
  }

  return( @lines );
}
###------  end  get_hdr_lines  -----------------------


###------  start  init  -----------------------
sub init {
  my $dummy = shift;	# gobbles up $_[0] passed in call

my %p = (				# init_param_hash_struct
	 dbug		=> $dbug,		# scalar, 0=Production
	 threshold	=> 0,
	 rtnval		=> 0,
#	 pathbase	=> "/home/stereo/LETL1Data",
	 avgtype	=> $ARGV[0],
	 year		=> $ARGV[1],	 
	 s_doy		=> $ARGV[2],
	 e_doy		=> $ARGV[3],
	 species	=> $ARGV[4],
	 SC		=> ( lc($ARGV[5]) ),
	 nbins		=> 0,		# see call get_LET_nbins() below
	 srcdir		=> $ARGV[6],
	 hdrsfil	=> $ARGV[7],
	 nbinsfil	=> $ARGV[8],
	 destdir	=> $ARGV[9],
	 chkdyth        => $ARGV[11],
	 public         => $ARGV[12],	# Set to 1 if public data
	 chkdy1521C	=> 0,		# check dyth state for the 15-21 MeV C bin
	 ver_this_sware	=> "",		# Software version for this software
	 ver		=> "",		# Level 1 processing software version
	 fullpath	=> "",		# pathbase/<SC>/<year>
	 infilename	=> "",		# basename (ie, w/o fullpath)
	 outfilename	=> "",
	 DOY		=> "",
	 fillflt	=> -9.9999E+03,
	 fillint	=> -9999,
	 onemin_fillflt => -1.0E31,	# double fill-data
	 onemin_fillint => -2147483647,	# 32-bit integer fill
	 sum_livetime	=> 0.0,		# used to accum livetime over a period
	 filldata	=> "",
	 periods	=> [ qw( 12m/ 1d/ 1h/ 2h/ ) ], # array
	 dir		=> "" ,        # string
	 DOY_tbl	=> [()],       # defines & inits arrays
	);

  $p{ver} = sprintf "%02d", $ARGV[10] ;
  $p{ver_this_sware} = sprintf "%02d", $VERSION ;

  return(\%p);		# Rtn ref to hash struct
}
###------  end  init  -----------------------


###------  start  get_LET_nbins  -----------------------
sub get_LET_nbins {
  my $filein = $_[0];
  my $linein = "";
  my @flds = ();
  open FIN, "<$filein" 
	or die "Can't open $filein: \n\t$!\n";
  while( $linein = <FIN> ) {
    @flds = split ",", $linein;
    next if( $flds[0] ne $$rp{species} );
    # Found match
    close FIN;
    # print "$flds[0] $flds[1] $$rp{species}\n";
    return $flds[1];	# number of bins
  }
  # Should not get here...species not found
  close FIN;
  return(0);
}
###------  end  get_LET_nbins  -----------------------


###------  start  gen_filldata  -----------------------
sub gen_filldata {
  my $nbins = $_[0];
#  my $fill = "$$rp{fillint} $$rp{fillflt}";		# Threshold & Livetime
  my $fill = sprintf("%7.2f",$$rp{fillflt});			# Livetime
  $fill = $fill . (sprintf(" % 10.4E",$$rp{fillflt})) x $nbins;	# Flt fields (Intensities)
  $fill = $fill . (sprintf(" % 7d",$$rp{fillint})) x $nbins;	# Int fields (Counts)
  $fill = $fill . (sprintf(" % 10.4E",$$rp{fillflt})) x $nbins;	# Flt fields (Uncertainties)
  return( $fill );
}
###------  end  gen_filldata  -----------------------


####------  start  write_hdr_lines  -----------------------
#sub write_hdr_lines {
#  my $filein = $_[0];
#  chomp $filein;
#  my @hdrs   = ();
#  my $line = "";

#  if( ! open FIN, "<$$rp{fullpath}/$filein" ) {
#    printf STDOUT "\nErr: $0: write_hdr_lines(): \n";
#    printf STDOUT "\tCan't open \"$$rp{fullpath}/$filein\": \n\t$!\n";
#    exit(-1);
#  }

#  # read through the "BEGIN DATA" line, saving header lines
#  while( $line = <FIN> ) {
#    push @hdrs, $line;
#    last if( $line =~ "BEGIN DATA" );
#  }
#  close FIN;

#  $hdrs[0] =~ s/-\d+// ;
#  $line = "File created: " . `date` ;	# Date line
#  $hdrs[1] = $line;
#  $hdrs[3] =~ s/minute.*/hour/ ;

#  # WRITE HEADER LINES
#  for( my $i=0; $i<=$#hdrs; $i++) {
#    printf FOUT $hdrs[$i];
#  }
#  return;
#}
####------  end  write_hdr_lines  -----------------------


###------  start  gen_file_daily_avgd  -----------------------
sub gen_file_daily_avgd {
  # Generate a file of daily (averaged over the day) records

#For each Day:
#    Make a list of records whose timestamp falls within the Day
#       If (nelements(list) eq 0) OR (Sum(livetimes) lt Threshold)
#          write a fill-record to the output file
#       else
#          For each energy-bin
#             compute the Daily average intensity 
#    Write the hourly averages to the output file
#
#Sum(livetimes):
#     is the sum of the livetime data (Column 7, counting 
#     from 1) for each record in the list for the currentday.
#Threshold:
#     is a constant, let it be Zero for now.
#

  # Retrieve args passed in call:
  #      ref pntr to %p, filename of day to process
  my $dbug = $$rp{dbug};
  my $filein  = $_[0] ;
  my( $YEAR,$DOY,$HOD, $rest ) ;
  my $lineout = "";

  if( ! open FIN, "<$filein" ) {
    printf STDOUT "\nErr: $0: gen_file_daily_avgd:\n";
    printf STDOUT "\tCan't open $filein: \n\t$!\n";
    exit(-1);
  }

  my @all_lines = <FIN>;	# read in all lines
  close FIN;

printf STDERR "@all_lines \n" if( $dbug );

  # read past the "BEGIN DATA" line
  my $idx = 0;
  my $linein = "";

  while( $idx <= $#all_lines ) {  
     if ( $all_lines[$idx] !~ "BEGIN DATA" ) {
        $idx++;
     } else {
        last;
     }
  }
  $idx++;		      # move to 1st data line
  # printf STDOUT "$all_lines[$idx]\n";

    my $DOY_fract = sprintf "%010.6f", $$rp{DOY} ;
    my $time_flds_filler = "$$rp{year} $DOY_fract" . " 00 00 00";
    my @lns4avg = ();		# array of lines for averaging
    $$rp{sum_livetime} = 0.0;	# reset accum livetime

    # Get first/next line
    my $dyOK = 1;
    my $dy1521C_OK = 1;
LINE:
    while( $idx <= $#all_lines ) {

      # Get hour value from next data line
      ( $YEAR,$DOY,$HOD, $rest ) = split " ", $all_lines[$idx], 4 ;
printf STDERR "$YEAR, $DOY, $HOD\n" if($dbug);

      if( $$rp{DOY} == (int $DOY) ) {	# if same, save for processing

	# INCLUDE THIS LINE for averaging
	push @lns4avg, $all_lines[$idx] ;
	my @flds = split " ", $all_lines[$idx], 8;
	$$rp{sum_livetime} += $flds[6];	# Accum livetime value
	$idx++;
        if (($flds[5]) && ($$rp{chkdyth})) {   # If dyth>0 and chkdyth==TRUE
           $dyOK=0;
        }
        if (($flds[5]) && ($$rp{chkdy1521C})) { 
           $dy1521C_OK=0;
        }
	next LINE;
      }

    } # bottom of next line loop

    # EOF...All data lines have been read.
    # Average lines collected for this day...rtn single string for o/p
    if( ($#lns4avg >= 0) && ($dyOK) ) {	# Any data lines accumulated ?
      my $lineout = avg_lines( \@lns4avg, $time_flds_filler, $dy1521C_OK );
      printf FOUT "$lineout\n";
printf STDERR "$lineout\n" if( $dbug );
    } else {
      # make up & print out a fill rcrd here
      my $fill_rcrd = "$time_flds_filler $$rp{filldata}";
printf STDOUT "|$fill_rcrd|\n" if( $dbug );
      printf FOUT "$fill_rcrd\n";
    }

  return(1);
}
###------  end  gen_file_daily_avgd  -----------------------


###------  start  gen_file_hrly_avgd  -----------------------
sub gen_file_hrly_avgd {
  # Generate a file of hourly (averaged over the hour) records

#For each Hour in the Day:
#    Make a list of records whose timestamp falls within the hour
#       If (nelements(list) eq 0) OR (Sum(livetimes) lt Threshold)
#          write a fill-record to the output file
#       else
#          For each energy-bin
#             compute the hourly average intensity using a TBD algorithm
#    Write the hourly averages to the output file
#
#Sum(livetimes):
#     is the sum of the livetime data (Column 7, counting 
#     from 1) for each record in the list for the current hour.
#Threshold:
#     is a constant, let it be Zero for now.
#
#I will supply the TBD averaging algorithm in a separate email.

  # Retrieve args passed in call:
  #      ref pntr to %p, filename of day to process
  my $dbug = $$rp{dbug};
  my $filein  = $_[0] ;
  my( $YEAR,$DOY,$HOD, $rest ) ;
  my $lineout = "";

  if( ! open FIN, "<$filein" ) {
    printf STDOUT "\nErr: $0: gen_file_hrly_avgd:\n";
    printf STDOUT "\tCan't open $filein: \n\t$!\n";
    exit(-1);
  }

  my @all_lines = <FIN>;	# read in all lines
  close FIN;

printf STDERR "@all_lines \n" if( $dbug );

  # read past the "BEGIN DATA" line
  my $idx = 0;
  my $linein = "";
  while( $all_lines[$idx] !~ "BEGIN DATA" ) {
    $idx++;
  }
  $idx++;		      # move to 1st data line

HOUR:  
  for( my $hr=0; $hr<24; $hr++) {
#    my $DOY_fract = sprintf "%010.6f", ($$rp{DOY} + ($hr/24.0)) ;
    my $DOY_fract = sprintf "%010.6f", ($$rp{DOY} + ($hr*0.041666666)) ;
    my $time_flds_filler = "$$rp{year} $DOY_fract" . sprintf(" %02d",$hr) . " 00 00";
    my @lns4avg = ();		# array of lines for averaging
    $$rp{sum_livetime} = 0.0;	# reset accum livetime

    # Get first/next line
    my $dyOK = 1;
    my $dy1521C_OK = 1;
LINE:
    while( $idx <= $#all_lines ) {

      # Get hour value from next data line
      ( $YEAR,$DOY,$HOD, $rest ) = split " ", $all_lines[$idx], 4 ;
printf STDERR "$YEAR, $DOY, $HOD\n" if($dbug);

      if( $hr == $HOD ) {	# if same, save for processing

	# INCLUDE THIS LINE for averaging
	push @lns4avg, $all_lines[$idx] ;
	my @flds = split " ", $all_lines[$idx], 8;
	$$rp{sum_livetime} += $flds[6];	# Accum livetime value
        if (($flds[5]) && ($$rp{chkdyth})) {   # If dyth>0 and chkdyth==TRUE
           $dyOK=0;
        }
        if (($flds[5]) && ($$rp{chkdy1521C})) {  
           $dy1521C_OK=0;
        }
	$idx++;
	next LINE;
      }

      if( $hr < $HOD ) {	# Finished all lines pertaining to hour # hr
				#      or ALL Lines missing for this hr?
	if(( $#lns4avg >= 0 )	# Any data lines accumulated ?
	  and ( $$rp{sum_livetime} >= $$rp{threshold} )
	  and ( $dyOK )) {

	  # Average all lines collected for this hour...
	  #     rtn single string for o/p
#	  my $lineout = avg_lines( \@lns4avg );
	  my $lineout = avg_lines( \@lns4avg, $time_flds_filler, $dy1521C_OK );
	  printf FOUT "$lineout\n";
printf STDERR "$lineout\n" if( $dbug );
	  next HOUR;
	}

	# make up & print out a fill rcrd here
	my $fill_rcrd = "$time_flds_filler $$rp{filldata}";
printf STDOUT "|$fill_rcrd|\n" if( $dbug );
	printf FOUT "$fill_rcrd\n";
	next HOUR;
      }

    } # bottom of next line loop

    # EOF...All data lines have been read but...
    #     "hr" hour loop not satisfied.
    # Average lines collected for this hour...rtn single string for o/p
    if( ($#lns4avg >= 0) && ($dyOK) ) {	# Any data lines accumulated ?
#     my $lineout = avg_lines( \@lns4avg );
      my $lineout = avg_lines( \@lns4avg, $time_flds_filler, $dy1521C_OK );
      printf FOUT "$lineout\n";
printf STDERR "$lineout\n" if( $dbug );
      next HOUR;

    } else {
      # make up & print out a fill rcrd here
      my $fill_rcrd = "$time_flds_filler $$rp{filldata}";
printf STDOUT "|$fill_rcrd|\n" if( $dbug );
      printf FOUT "$fill_rcrd\n";
      next HOUR;
    }
  } # bottom of hour loop ( $hr=0; $hr<24; $hr++)

  return(1);
}
###------  end  gen_file_hrly_avgd  -----------------------

###------  start  gen_file_10min_avgd  -----------------------
sub gen_file_10min_avgd {
  # Generate a file of 10minute averaged records

  # Retrieve args passed in call:
  #      ref pntr to %p, filename of day to process
  my $dbug = $$rp{dbug};
  my $filein  = $_[0] ;
  my( $YEAR,$DOY,$HOD,$MOD, $rest ) ;
  my $lineout = "";
  my $hr = 0;

  if( ! open FIN, "<$filein" ) {
    printf STDOUT "\nErr: $0: gen_file_10min_avgd:\n";
    printf STDOUT "\tCan't open $filein: \n\t$!\n";
    exit(-1);
  }

  my @all_lines = <FIN>;	# read in all lines
  close FIN;

printf STDERR "@all_lines \n" if( $dbug );

  # read past the "BEGIN DATA" line
  my $idx = 0;
  my $linein = "";
  while( $all_lines[$idx] !~ "BEGIN DATA" ) {
    $idx++;
  }
  $idx++;		      # move to 1st data line

MIN10:  
  for( my $tenmin=0; $tenmin<144; $tenmin++) {
    my $DOY_fract = sprintf "%010.6f", ($$rp{DOY} + ($tenmin*0.00694444)) ;
    $hr = int($tenmin/6);
    my $time_flds_filler = "$$rp{year} $DOY_fract" . sprintf(" %02d",$hr) . sprintf(" %02d",10*($tenmin - ($hr*6))) . " 00";
    my @lns4avg = ();		# array of lines for averaging
    $$rp{sum_livetime} = 0.0;	# reset accum livetime

    # Get first/next line
    my $dyOK = 1;
    my $dy1521C_OK = 1;
LINE:
    while( $idx <= $#all_lines ) {

      # Get hour, min values from next data line
      ( $YEAR,$DOY,$HOD,$MOD, $rest ) = split " ", $all_lines[$idx], 5 ;
printf STDERR "$YEAR, $DOY, $HOD, $MOD\n" if($dbug);

      my $ctenmin = ($HOD*6) + int($MOD/10);
      if( $tenmin == $ctenmin) {	# if same, save for processing

	# INCLUDE THIS LINE for averaging
	push @lns4avg, $all_lines[$idx] ;
	my @flds = split " ", $all_lines[$idx], 8;
	$$rp{sum_livetime} += $flds[6];	# Accum livetime value
        if (($flds[5]) && ($$rp{chkdyth})) {   # If dyth>0 and chkdyth==TRUE
           $dyOK=0;
        }
        if (($flds[5]) && ($$rp{chkdy1521C})) {  
           $dy1521C_OK=0;
        }
	$idx++;
	next LINE;
      }

      if( $tenmin < $ctenmin ) {	# Finished all lines pertaining to hour # hr
					#      or ALL Lines missing for this hr?
	if(( $#lns4avg >= 0 )	# Any data lines accumulated ?
          and ( $$rp{sum_livetime} >= $$rp{threshold} )
          and ( $dyOK )) {

	  # Average all lines collected for this hour...
	  #     rtn single string for o/p
#	  my $lineout = avg_lines( \@lns4avg );
	  my $lineout = avg_lines( \@lns4avg, $time_flds_filler, $dy1521C_OK );
	  printf FOUT "$lineout\n";
printf STDERR "$lineout\n" if( $dbug );
	  next MIN10;
	}

	# make up & print out a fill rcrd here
	my $fill_rcrd = "$time_flds_filler $$rp{filldata}";
printf STDOUT "|$fill_rcrd|\n" if( $dbug );
	printf FOUT "$fill_rcrd\n";
	next MIN10;
      }

    } # bottom of next line loop

    # EOF...All data lines have been read but...
    #     "hr" hour loop not satisfied.
    # Average lines collected for this hour...rtn single string for o/p
    if( ($#lns4avg >= 0) && ($dyOK) ) { # Any data lines accumulated ?
#     my $lineout = avg_lines( \@lns4avg );
      my $lineout = avg_lines( \@lns4avg, $time_flds_filler, $dy1521C_OK );
      printf FOUT "$lineout\n";
printf STDERR "$lineout\n" if( $dbug );
      next MIN10;

    } else {
      # make up & print out a fill rcrd here
      my $fill_rcrd = "$time_flds_filler $$rp{filldata}";
printf STDOUT "|$fill_rcrd|\n" if( $dbug );
      printf FOUT "$fill_rcrd\n";
      next MIN10;
    }
  } # bottom of 10min loop ( $tenmin=0; $tenmin<24; $tenmin++)

  return(1);
}
###------  end  gen_file_10min_avgd  -----------------------



###------  start  gen_tm_flds  -----------------------
sub gen_tm_flds {
  # rtns string of time fields for a fill record
  my $str = "$$rp{year} $$rp{DOY}, $$rp{hour}";
  return($str);
}
###------  end  gen_tm_flds  -----------------------


###------  start  avg_lines  -----------------------
sub avg_lines {
  my $rdata = $_[0];	# ref to array of lines (data for subject period)
  my $tmflds= $_[1];	# string fm $time_flds_filler
  my $dy1521C_OK = $_[2];	# 
  my $nlines = $#$rdata + 1;
printf STDERR "Num of lines to avg for this hour = $nlines \n" if($$rp{dbug});
  # init
  my $nbins  = $$rp{nbins};
  my $fillflt = $$rp{fillflt};
  my $fillint = $$rp{fillint};
  my @flds   = ();
  my $livtim = 0.0;
  my $Ii = 0.0;
  my $Ni = 0;
  my @I_bin = (0.0) x $nbins;		# Intensity bins
  my @C_bin = (0)   x $nbins;		# Counts bins
  my @U_bin = (0.0) x $nbins;		# Uncertainty bins
  my @n_I_data=(0) x $nbins;		# num of rcrds contributing to Intensity sum
  my @n_N_data=(0) x $nbins;		# num of rcrds contributing to Counts sum
  my @sum_I_sq_over_Counts=(0.0) x $nbins; # summing term used in the Uncert. calc
  my $val ;

  # loop thru array of data lines (i.e. 1min.rcrds read for this hour)
  for( my $d=0; $d<$nlines; $d++ ) {
    @flds = split " ", $$rdata[$d];
    $livtim += $flds[6];		# accum livetimes

    for( my $b=0; $b<$nbins; $b++ ) {	# loop thru bins
      # Index into record data fields is offset by 5 time fields 
      #	plus 2 for Thresh.& Livetime fields
      my $idx=$b + 7;
      my $cdx = $idx + $nbins;		# index into Counts fields
      $Ii = $flds[$idx];		# Local var for Intensity data
      $Ni = $flds[$cdx];		# Local var for Counts data
      if( $Ii == 0 ) {			# if  one is zero, the other s/b zero also
	$Ni = 0 ;
      }
      if( $Ni == 0 ) {
	$Ii = 0 ;
      }
      if( $Ii >= 0 ) {		# accum valid intensities
	$I_bin[$b] += $Ii;
	$n_I_data[$b]++;		# accum num of Intensity addends(M)
	# Accum summing term to be used in the Uncert. calc
	#    i.e.:  sum( Ii^2/Ni )
	if( $Ni > 0 ) {
	  $sum_I_sq_over_Counts[$b] += ($Ii * $Ii) / $Ni;
	}
      }
      if( $Ni >= 0 ) {			# accum valid Counts
	$C_bin[$b] += $Ni;
        $n_N_data[$b]++;		# accum num addends in Counts 
      }
      
    }
  }

  my $rtnstr = "";
#  @flds = split " ", $$rdata[0];	# get time fields fm a data line
#  @flds[3 .. 4] = ("0") x 2;		# Zero out minutes & seconds

  # Start construction of return string
  $rtnstr = sprintf "%s % 8.2f", $tmflds, $livtim;
		 
  for( my $b=0; $b<$nbins; $b++ ) {	# loop thru array of INTENSITY bins
    if( $n_I_data[$b] > 0 ) {
      $val = $I_bin[$b] / $n_I_data[$b];
    } else {
      $val = $fillflt;
    }
    if (($b==9)&&(! $dy1521C_OK)) {
      $val = $fillflt;
    }
    $rtnstr = $rtnstr . sprintf " % 10.4E", $val ;
  }
  for( my $b=0; $b<$nbins; $b++ ) {	# loop thru array of COUNTS bins
    if( $n_N_data[$b] > 0 ) {
       $val = $C_bin[$b];
    } else {
      $val = $fillint;
    }
    if (($b==9)&&(! $dy1521C_OK)) {
      $val = $fillint;
    }
    $rtnstr = $rtnstr . sprintf " % 7d", $val ;
  }

  # Add the UNCERTAINTIES fields
  # Start with calcs for UNCERTAINTIES:   U(I) = sqrt( sum(Ii^2/Ni) )/M
  # Index into o/p record Uncert. fields is offset by 5 time fields
  # plus 1 for the Livetime field
  # plus 2xnbins to get past I fields and Counts fields
  for( my $b=0; $b<$nbins; $b++ ) {	# loop thru Uncert. bins
    if( $n_I_data[$b] > 0 ) {		# if # of I values ...
     $val = sqrt( $sum_I_sq_over_Counts[$b] ) / $n_I_data[$b] ;
    } else {
      $val = $fillflt;
    }
    if (($b==9)&&(! $dy1521C_OK)) {
      $val = $fillflt;
    }
    $rtnstr = $rtnstr . sprintf " % 10.4E", $val ;
  }

  return($rtnstr);
}
###------  end  avg_lines  -----------------------


###------  start  get_filelist  -----------------------
sub get_filelist {

  # DEVELOP MASK TO EXTRACT NAMES OF AVAILABLE DATA FILES

  my $fmask = "";
  my $dir = "";                 # save curr working dir
  my $cmd = "";                 # dev ls cmd
  my @Filenames = ();           # return list
  my @t = ();                   # temp list used by sort ensuring DOY order


  # Find all Days in Year this species
  $fmask = $$rp{species} . "_" . $$rp{SC} . "_" . $$rp{year} .
                "_*_level1_*" . ".txt";
  $cmd = "ls -1 $fmask";

  $dir = getcwd();              # Where am I
  chdir "$$rp{srcdir}"; # change to data src directory
  @t = `$cmd` ;
  @t = sort @t;         # Ensure the order

  # Capture filenames in DOY range (inclusive)
  my $i = 0;
  while( $i <= $#t ) {		# scan list for start of DOY range
    $t[$i] =~ /_(\d\d\d)_/ ;	# Capture DOY as $1
    last if( $1 >= $$rp{s_doy} );
    $i++;
  }
  if( $i > $#t ) {		# none found?
    printf "No files at or after $$rp{s_doy}\n";
    return @Filenames ;		# return an empty filename list
  }
  # $i is index of 1st filename...save filename
  chomp $t[$i] ;
  push @Filenames, $t[$i] ;
  $i++;

  
  # Scan for end of DOY range accumulating filenames
  while( $i <= $#t ) {		# scan list for start of DOY range
    $t[$i] =~ /_(\d\d\d)_/ ;	# Capture DOY value
    last if( $1 > $$rp{e_doy} );
    chomp $t[$i] ;
    push @Filenames, $t[$i] ;
    $i++;
  }

  chdir "$dir";                 # rtn to prev dir

  return @Filenames ;
}
###------  end  get_filelist  -----------------------


sub CullFilenames {
  my @old= sort @_;
  return(-1) if( $#old == -1 );
  # sort. Brings lowest ver no. to top of each day.

  #Init
  my @new = ();
  my @DOY = ();
  my @ver = ();

  # Create numeric lists 
  #	assumptions: 
  #	   format of the 3 right most "_" delimited fields is constant
  #	   i.e.   "_ddd_<anytext>_dd.txt"
  foreach (@old) {
    my @flds = split "_", $_ ;
    my $ilastfld = $#flds ;
    push @DOY, $flds[$ilastfld-2] ;
    push @ver, substr($flds[$ilastfld],0,2) ;
  }

  my $last_i = $#DOY ;
  # Handle 1st line info
  my $o_doy = $DOY[0] ;
  my $o_ver = $ver[0] ;

NAME:
  for (my $i=1; $i<=$last_i; $i++) {
#    if( $DOY[$i] > $o_doy ) {	# New or same DOY?
    if( $i == $last_i ) {	# Handle last line
      if( $DOY[$i] != $o_doy ) {
         push @new, $old[$i-1] ;
         push @new, $old[$i] ;
      } else {
         push @new, $old[$i] ;
      }
      return(@new);		# Normal return via this stmnt
    }
    if( $DOY[$i] != $o_doy ) {	# New DOY? (handle year spanning)
      push @new, $old[$i-1] ;
      $o_doy = $DOY[$i] ;
      $o_ver = $ver[$i] ;
      next NAME;
    }
    if( $ver[$i] > $o_ver ) {	# Same DOY...save Ver. value
      $o_ver = $ver[$i] ;
      next NAME;
    }
  }
}


sub chk_ver {
  my $chkflg = 0;
  foreach my $fn ( @_ ) {
    my @flds = split "_", $fn ;
    if( substr($flds[$#flds],0,2) != $$rp{ver} ) {
      chomp $fn ;
      printf "VERSION Check on file: '$fn' \n";
      $chkflg=1;
    }
  }
  printf "\n" if $chkflg;
}


###------  start  parm_check  -----------------------
sub parm_check {
  # Currently only chks # of parms (no fmt or expected value chking)
  return(1) if( $#ARGV == $n_cmdline_args ); 

  return(0);	# Got a cmdline prob.
}
###------  end  parm_check  -----------------------


###------  start  newsubname  -----------------------
sub newsubname {

  return(1);
}
###------  end  newsubname  -----------------------
