#!/usr/bin/perl

##############################################################################
#
# Tool to ensure that a DSR or CDM message that has been created with a
# spreadsheet application (which may add or remove empy cells to the right
# of a DSR/CDM message file).
#
# Usage: dsr_cdm_length.pl <input> [<output>]
#        with <input> being a DSR or CDM message file created by a spreadsheet
#        and  <output> being the filename for a corrected message file
#        (the output parameter is optional)
#
# Note: this script was written and tested on a UNIX computer
# Note: this script can be extended to also handle CDM
#
# (c) 2022 Digital Data Exchnage, LLC
#
##############################################################################

# definition of all the record type lengths in DSR
# this only lists the length for DSR Part 8 in version 1.4

my %recordtypes = (
		   "HEAD", 16,
		   "FOOT", 6,
		   "SRFO", 3,
		   
		   "SY01.02", 15,
		   "SY02.03", 22,
		   "SY03.01", 23,
		   "SY04.01", 20,
		   "SY05.03", 26,
		   "SY06.01", 18,
		   "SY07.01", 21,
		   "SY08.02", 25,
		   "SY09.01", 22,
		   "SY10.01", 44,
		   "SY11.01",  9,
		   "SY12",    17,

		   "RE01.02", 16,
		   "RE02",     6,
		   "RE03.02", 17,		   
		   "RE04",    22,
		   
		   "AS01.02", 14,
		   "AS02.03", 24,
		   "AS03.04", 32,
		   "AS04",     4,
		   "AS05",    25,
		   "AS06",    39,
		   "LC01",    23,

		   "MW01.02", 19,
		   "MW02",    21,
		   "CU01.01", 17,
		   "CU01",    19,
		   "RC01",     7,


		   "SU01.02", 13,
		   "SU02.02", 11,
		   "SU03.02", 11,
		   "SU04.02", 24,
		   "SU05.01", 10,
		   "SU06",     7,
		   "RU01.02",  7,
		   "RU02.02",  9,
		   "LI01.03", 13,
		   "ST01",     5,
		   
		   "ML01.01",  8,

		   "SR01.01", 43,
		   "SR02.01", 39,
		   "SR03.01", 46,
		   "SR04.01", 54,
		   "SR05.01", 52,
		   "SR06.01", 25,
		   "SR07.01", 41,
		   "SR08.01", 49,

# definition of all the record type lengths in CDM 
# this only lists the length for CDM Part 2 in version 1.4

		   "CDMH",    16,
		   #"SRFO" is already defined above
		   
		   "CS01",    23,
		   "CS02",    20,
		   "CS03",    15,
		   "CDS1",    10,
		   
		   "CD01",    28,
		   "CD02",    45,
		   "CD03.01", 19,
		   "CD04",    25,

		   "CX01",    11,		   

		   "CDD1",    14,
		   "CDD2",    16,
		   "CDD3",    14
		  );

# set $debug to 0 if the script should be comparatively silent
# set $debug to 1 if the script should be comparatively vocal
my $debug = 1;

# This may need to be commented out for some systems to correctly handle CR/LF issues
# $/ = "\r";     

# check if we have one or two parameters
my $error = "\nTool to enforce the correct numbers of tab characters in a DSR or CDM message.\nSyntax: dsr_cdm_length.pl <input> [<output>]\n(c) 2022 DDEX\n\n";
die $error if ($#ARGV < 0);
die $error if ($#ARGV > 1);

# set the filenames for the input and output files
my $input = $ARGV[0];
my $output;

if ($#ARGV == 1) {
  $output = $ARGV[1];
} else {
  $output = $ARGV[0] . "." . out;  
}

print "Processing $input into $output\n";
print "---------------------------------------------------------------------\n" if ($debug);

open (IN, $input) or die "  Error: input file '$input' not found.\n" . $error;
open (OUT, ">", $output) or die "  Error: cannot open output file '$output'.\n" . $error;

foreach (<IN>) {

  # just copy empty lines or lines that start with a hash symbol
  if ( ((length)< 1) || (/^\#/) ) {
    print OUT $_;
    next;
  }
  
  # split the line into an array
  my @cells = split /\t/;

  # one special case DExx (we don't know the length)
  if ( $cells[0] =~ /^DE/ ) {
    print OUT $_;
    next;
  }

  # if we find an unknown record type in the first cell: die
  die " Error: Unknown record type $cells[0] found.\n$error" unless (exists $recordtypes{$cells[0]});

  # how many cells are in the read line?
  my $row_length = @cells;

  # how many cells should there have been in the read line?
  my $std_length = $recordtypes{$cells[0]};

  # if we have the right nunber of tabs, just copy things over
  if ($row_length == $std_length) {
    print " Notice:  Found record type $cells[0] which was as long as required.\n" if ($debug);
    print OUT $_,
  }
  else {

    # if there is a discrepancy
    print " Warning: Found record type $cells[0] with $row_length cells (should be $std_length).\n" if ($debug);    
    my $output = "";
    
    if ($row_length > $std_length) {      
      # there are more cells in the file than in the standard
      for my $i (0 .. $std_length-1) {
	$output = $output . $cells[$i] . "\t";	
      }
      chop $output;  # we added one too many...
      $output = $output . "\n";
    } else {
      # there are more cells in the standard than in the file
      my $output = $_;
      for my $i (1 ..  $std_length - $row_length) {
	$output = $output . "\t";      
      }
      chop $output;  # remove last tab
      $output = $output . "\n";
    }
    print OUT $output;
  }
}

print "All done.\n" if ($debug);
