#!/free/bin/perl 

# Created by Chris Paver on 20120725
# Last used on 20120821
# This program converts the originator data files to NetCDF.

use Fcntl;
use PDL;
use PDL::NetCDF;
use PDL::Char;
use Date::Manip;
use File::Basename;

sub resetvars();
sub createNetCDF();

my $incomingfile;
resetvars();

while($incomingfile = shift(@ARGV)) {
	$incomingfile =~ s/\.\.//g;
	if($incomingfile =~ /^([\w\-\.\/]{1,20})$/) {
		$incomingfile = $1;
		die("Can't read: $incomingfile") unless(-r $1);
	}
	else {
		die("Invalid filename") unless defined($incomingfile);
	}
	open(FILE, $incomingfile) or die ("couldn't open: $incomingfile");
	while(my $line = <FILE>) {
		chomp $line;

		if ($line =~ /^\s*Cast\s*lat\s*long\s*jday\s*mo\/da\/yr\s*gmt/i && $found == 0 && $header1 ==0) {
			print "reading $ARGV\n";
			$header1 = 1;
		}
#		Get the temporal and spacial information.
		elsif ($header1 == 1) {
			my $row = $line;
			chomp($row);
			$row =~ s/^\s+//;
			$row =~ s/\s+/,/g;
			my @head1 = split(/,/,$row);
			
#			Format the date and time
			my @date = split(/\//,$head1[4]);
			my $yyyy = '20'.$date[2];
			my $mm = sprintf("%02d",$date[0]);
			my $dd = sprintf("%02d",$date[1]);
			my $hh = substr($head1[5],0,2);
			my $mn = substr($head1[5],2,2);
			my $sc = 0;
			my $isoDateTime = $yyyy."-".$mm."-".$dd."T".$hh.":".$mn."Z";
			my $secs = Date_SecsSince1970GMT($mm,$dd,$yyyy,$hh,$mn,$sc);

			$prof_data = $prof_data->append($head1[0]);
			$lat_data = $lat_data->append($head1[1]);
			$lon_data = $lon_data->append($head1[2]);
			$time_data = $time_data->append($secs);
			$global{'time_coverage_start'} = $isoDateTime;
			$global{'time_coverage_end'} = $isoDateTime;

			$header1 = 0;
		}

		elsif ($line =~ /^seas_version:\s*([0-9]+\.[0-9]+)\s*seas\s*id:\s*([0-9a-zA-Z]+)/i && $header1 == 0) {
			$global{'seas_version'} = $1;
			$global{'seas_id'} = $2;
		}

		elsif ($line =~ /^recorder_type:\s*(.+)\s*recorder_code:\s*([0-9]+)/i && $header1 == 0) {
			my $recorder = $1;
			my $code = $2;
			$recorder =~ s/\s*$//g;
			$inst2{'long_name'} = "xbt recorder";
			$inst2{'make_model'} = $recorder;
			$inst2{'wmo_code'} = sprintf("%02d",$code);
			$inst2{'wmo_code_table'} = '4770';
		}

		elsif ($line =~ /^probe_type:\s*(.+)\s*probe_code:\s*([0-9]+)\s*probe_serial_num:\s*(.+)/i && $header1 == 0) {
			my $make = $1;
			my $code = $2;
			my $sn = $3;
			$make =~ s/\s*$//g;
			$inst1{'long_name'} = 'Expendable Bathythermograph (XBT)';
			$inst1{'make_model'} = $make;
			$code = $inst1{'wmo_code'} = sprintf("%03d",$code);
			$inst1{'wmo_code_table'} = '1770';
			$inst1{'serial_number'} = $sn;
			$inst1{'fall_rate_equation'} = 'z = at + (10^-3)bt^2';

			my %coefficients = (
				'021' => [ 6.390, -1.82 ],
				'052' => [ 6.691, -2.25 ],
			);

			$inst1{'coefficient_a'} = @{$coefficients{$code}}[0];
			$inst1{'coefficient_b'} = @{$coefficients{$code}}[1];
		}

		elsif ($line =~ /^ship_name:\s*(.+)\s*call_sign:\s([0-9a-zA-Z]+)\s*lloyd_number:\s*([0-9]+)/i && $header1 == 0) {
			my $plat = $1;
			my $callSign = $2;
			my $platImo = $3;

			$plat =~ s/\s*$//g;

			my %platformIces = (
				'Almirante Saboia' => 'not available',
				'Excellent' => '48EL',
				'Fragata Constituica' => 'not available',
				'Horizon Navigator' => '3280',
				'La Superba' => '48LB',
				'Madrid Express' => '74HK',
				'M/V ROME EXPRESS' => '74EP',
				'Rome Express' => '74EP',
			);

			my %platformName = (
				'Almirante Saboia' => 'Almirante Saboia',
				'Excellent' => 'EXCELLENT',
				'Fragata Constituica' => 'Fragata Constituicao',
				'Horizon Navigator' => 'HORIZON NAVIGATOR',
				'La Superba' => 'LA SUPERBA',
				'Madrid Express' => 'Madrid Express',
				'M/V ROME EXPRESS' => 'ROME EXPRESS',
				'Rome Express' => 'ROME EXPRESS',
			);

			$platform{'ices_code'} = $platformIces{$plat};
			$platform{'long_name'} = $platformName{$plat};
			die "Vessel not identified for $plat" unless (defined $platform{'ices_code'} &&
				defined $platform{'long_name'});
			$platform{'call_sign'} = $callSign;
			$platform{'imo_code'} = $platImo;

			$global{'platform'} = $platform{'long_name'};
		}

		elsif ($line =~ /^xbt\sdata\sfrom\s([0-9a-zA-Z]+)\strack\.\s*bottom_depth:\s*([0-9]+[.]*[0-9]*)\s*([a-zA-Z]+)/i
			   && $found == 0) {
			$global{'woce_line'} = $1;
			$bottom{'long_name'} = "bottom depth";
			$bottom{'units'} = $3;

			$bottom_data = $bottom_data->append($2);
		}

		elsif ($line =~ /^(Salinity obtained from Levitus 01)/ && $found == 0) {
			my $salComments = $1;
			
			$sa{'comment'} = $salComments;
		}

#		Column format of the data.
		elsif ($line =~ /^columns represent\s*:/i || $line =~ /^\s*pr\s*te\s*th\s*sa\s*ht\s*de\s*ox/i
			   || $line =~ /^\s*$/ && $found == 0) {
			next;
		}

#		Units of the data.
		elsif ($line =~ /^\s*dbars\s*deg\sc\s*deg\sc\s*psu\s*dyn\.\scm\s*meters/i && $found == 0) {
			$found = 1;
		}

#		Extract the data from the file.
		elsif ($found == 1) {
			$row = $line;
			chomp($row);
			$row =~ s/^\s+//;
			$row =~ s/\s+/,/g;
			@values = split(/,/, $row);

#			Format the data values.
			my $pres = sprintf("%.4f",$values[0]);
			my $temp = sprintf("%.4f",$values[1]);
			my $th = sprintf("%.4f",$values[2]);
			my $sa = sprintf("%.4f",$values[3]);
			my $ht = sprintf("%.4f",$values[4]);
			my $z = sprintf("%.4f",$values[5]);
			my $ox = sprintf("%.4f",$values[6]);

			$pres_data = $pres_data->append($pres);
			$temp_data = $temp_data->append($temp);
			$th_data = $th_data->append($th);
			$sa_data = $sa_data->append($sa);
			$ht_data = $ht_data->append($ht);
			$z_data = $z_data->append($z);
			$ox_data = $ox_data->append($ox);
		}

		else {
			print "Does not match known values: $line\n";
		}
	}

	close(FILE);

#	Executes when the entire file has been processed.
#	Creates the corresponding directory structure for the NetCDF files to go.
	my $filename = basename($incomingfile);
	my $dirname = dirname($incomingfile);
	my $basedir = $dirname;
	my $spec1dir = $dirname;
	my $spec2dir = $dirname;
	$basedir =~ s/^(\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\/)[^\/]*\/[^\/]*\/[^\/]*$/$1/;
	$spec1dir =~ s/^\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*(\/[^\/]*)\/[^\/]*$/$1/;
	$spec2dir =~ s/^\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*\/[^\/]*(\/[^\/]*)$/$1/;
	my $gendir = $basedir."1-data";
	my $newdir = $gendir.$spec1dir;
	$outdir = $newdir.$spec2dir;
	unless ( -e $gendir or mkdir($gendir)) {
		die "Unable to create directory $gendir\n";
	}
	unless ( -e $newdir or mkdir($newdir)) {
		die "Unable to create directory $newdir\n";
	}
	unless ( -e $outdir or mkdir($outdir)) {
		die "Unable to create directory $outdir\n";
	}
    print "writing netCDF for $filename\n";

	$global{'title'} = "Profile temperature data from an XBT cast aboard the $platform{'long_name'} on the WOCE line $global{'woce_line'} on $global{'time_coverage_start'} (NODC Accession 0092527)";
	$global{'id'} = "0092527".'_'.$filename;
	
#	Pass the metadata and data to the subroutine that creates the netCDF file.
	createNetCDF();

#	Reset all of the metadata and data variables for the next input file.
	resetvars();
}

sub resetvars() {
#	List of all the varaibles and attributes
	$prof_data = long([]);
	$time_data = long([]);
	$lat_data = double([]);
	$lon_data = double([]);
	$z_data = double([]);
	$pres_data = double([]);
	$bottom_data = double([]);
	$temp_data = double([[]]);
	$th_data = double([[]]);
	$sa_data = double([[]]);
	$ht_data = double([[]]);
	$ox_data = double([[]]);
	$plat_data = byte([0]);
	$inst1_data = byte([0]);
	$inst2_data = byte([0]);
	$crs_data = byte([0]);

	$latValidMin = double([-90]);
	$latValidMax = double([90]);
	$lonValidMin = double([-180]);
	$lonValidMax = double([180]);
 	$fillValue = double([-99999]);

	%profile = (
		long_name => 'originator station number',
		cf_role => 'profile_id',
		comment => undef,
	);

	%time = (
		long_name => 'time',
		standard_name => 'time',
		units => 'seconds since 1970-01-01 00:00:00 0:00',
		calendar => 'julian',
		axis => 'T',
		ancillary_variables => undef,
		comment => undef,
	);

	%lat = (
		long_name => 'latitude',
		standard_name => 'latitude',
		units => 'degrees_north',
		axis => 'Y',
		valid_min => $latValidMin,
		valid_max => $latValidMax,
		ancillary_variables => undef,
		comment => undef,
	);

	%lon = (
		long_name => 'longitude',
		standard_name => 'longitude',
		units => 'degrees_east',
		axis => 'X',
		valid_min => $lonValidMin,
		valid_max => $lonValidMax,
		ancillary_variables => undef,
		comment => undef,
	);

	%z = (
		long_name => 'instrument depth',
		standard_name => undef,
		units => 'meters',
		axis => 'Z',
		positive => 'down',
		valid_min => undef,
		valid_max => undef,
		ancillary_variables => undef,
		comment => undef,
	);

	%bottom = (
		long_name => undef,
		standard_name => undef,
		nodc_name => undef,
		units => undef,
		scale_factor => undef,
		add_offset => undef,
		valid_min => undef,
		valid_max => undef,
		coordinates => 'time lat lon',
		grid_mapping => 'crs',
		source => undef,
		references => undef,
		cell_methods => undef,
		ancillary_variables => undef,
		platform => undef,
		instrument => undef,
		comment => undef,
	);

	%pres = (
		long_name => 'water pressure',
		standard_name => undef,
		nodc_name => undef,
		units => 'dbars',
		scale_factor => undef,
		add_offset => undef,
		valid_min => undef,
		valid_max => undef,
		coordinates => 'time lat lon z',
		grid_mapping => 'crs',
		source => undef,
		references => undef,
		cell_methods => undef,
		ancillary_variables => undef,
		platform => undef,
		instrument => undef,
		comment => 'This data was not collected by the creator.',
	);

	%temp = (
		long_name => 'water temperature from XBT',
		standard_name => undef,
		nodc_name => undef,
		units => 'Celsius',
		scale_factor => undef,
		add_offset => undef,
		valid_min => undef,
		valid_max => undef,
		coordinates => 'time lat lon z',
		grid_mapping => 'crs',
		source => undef,
		references => undef,
		cell_methods => undef,
		ancillary_variables => undef,
		platform => 'platform',
		instrument => 'inst1, inst2',
		comment => undef,
	);

	%th = (
		long_name => undef,
		standard_name => undef,
		nodc_name => undef,
		units => 'Celsius',
		scale_factor => undef,
		add_offset => undef,
		valid_min => undef,
		valid_max => undef,
		coordinates => 'time lat lon z',
		grid_mapping => 'crs',
		source => undef,
		references => undef,
		cell_methods => undef,
		ancillary_variables => undef,
		platform => undef,
		instrument => undef,
		comment => 'This data was not collected by the creator.',
	);

	%sa = (
		long_name => 'sea water salinity',
		standard_name => undef,
		nodc_name => undef,
		units => 'psu',
		scale_factor => undef,
		add_offset => undef,
		valid_min => undef,
		valid_max => undef,
		coordinates => 'time lat lon z',
		grid_mapping => 'crs',
		source => undef,
		references => undef,
		cell_methods => undef,
		ancillary_variables => undef,
		platform => undef,
		instrument => undef,
		comment => undef,
	);

	%ht = (
		long_name => 'dynamic height',
		standard_name => undef,
		nodc_name => undef,
		units => 'dyn. cm',
		scale_factor => undef,
		add_offset => undef,
		valid_min => undef,
		valid_max => undef,
		coordinates => 'time lat lon z',
		grid_mapping => 'crs',
		source => undef,
		references => undef,
		cell_methods => undef,
		ancillary_variables => undef,
		platform => undef,
		instrument => undef,
		comment => 'This data was not collected by the creator.',
	);

	%ox = (
		long_name => 'dissolved oxygen',
		standard_name => undef,
		nodc_name => undef,
		units => undef,
	   	scale_factor => undef,
	   	add_offset => undef,
		valid_min => undef,
		valid_max => undef,
		coordinates => 'time lat lon z',
		grid_mapping => 'crs',
		source => undef,
		references => undef,
		cell_methods => undef,
		ancillary_variables => undef,
		platform => undef,
		instrument => undef,
		comment => 'This data was not collected by the creator.',
	);

	%inst1 = (
		long_name => undef,
		make_model => undef,
		serial_number => undef,
		calibration_date => undef,
		factory_calibrated => undef,
		user_calibrated => undef,
		calibration_report => undef,
		accuracy => undef,
		valid_range => undef,
		precision => undef,
		comment => undef,
	);

	%inst2 = (
		long_name => undef,
		make_model => undef,
		serial_number => undef,
		calibration_date => undef,
		factory_calibrated => undef,
		user_calibrated => undef,
		calibration_report => undef,
		accuracy => undef,
		valid_range => undef,
		precision => undef,
		comment => undef,
	);

	%platform = (
		long_name => undef,
		call_sign => undef,
		ices_code => undef,
		wmo_code => undef,
		imo_code => undef,
		comment => undef,
	);

	%crs = (
		grid_mapping_name => 'latitude_longitude',
		epsg_code => 'EPSG:4326',
		semi_major_axis => 6378137.0,
		inverse_flattening => 298.257223563,
		comment => 'This data is supplied by NODC, and not by the creator.',
	);

	%global = (
		title => undef,
		summary => undef,
		platform => undef,
		instrument => undef,
		time_coverage_start => undef,
		time_coverage_end => undef,
		time_coverage_resolution => undef,
		sea_name => undef,
		geospatial_lat_min => undef,
		geospatial_lat_max => undef,
		geospatial_lat_units => 'degrees_north',
		geospatial_lat_resolution => undef,
		geospatial_lon_min => undef,
		geospatial_lon_max => undef,
		geospatial_lon_units => 'degrees_east',
		geospatial_lon_resolution => undef,
		geospatial_vertical_min => undef,
		geospatial_vertical_max => undef,
		geospatial_vertical_units => undef,
		geospatial_vertical_resolution => undef,
		geospatial_vertical_positive => 'down',
		source => undef,
		institution => undef,
		creator_name => 'Dr. Molly Baringer',
		creator_url => 'http://www.aoml.noaa.gov/phod/people/baringer.html',
		creator_email => 'Molly.Baringer@noaa.gov',
		project => 'High Density XBT Transects',
		processing_level => 'processed',
		references => 'http://www.aoml.noaa.gov/phod/hdenxbt/index.php',
		keywords_vocabulary => undef,
		keywords => undef,
		acknowledgment => undef,
		comment => undef,
		contributor_name => undef,
		contributor_role => undef,
		date_created => undef,
		date_modified => undef,
		publisher_name => 'US National Oceanographic Data Center',
		publisher_email => 'NODC.Services@noaa.gov',
		publisher_url => 'www.nodc.noaa.gov',
		history => undef,
		license => undef,
		metadata_link => 'http://accession.nodc.noaa.gov/0092527',
		id => undef,
		naming_authority => 'gov.noaa.nodc',
		uuid => undef,
		Conventions => 'CF-1.6',
		Metadata_Conventions => 'Unidata Dataset Discovery v1.0',
		featureType => 'profile',
		cdm_data_type => 'Profile',
		nodc_template_version => 'NODC_NetCDF_Profile_Orthogonal_Template_v1.0',
		standard_name_vocabulary => 'CF-1.6',
	);

	@globarray = ('title', 'summary', 'platform', 'instrument', 'woce_line', 'seas_version', 'seas_id', 'time_coverage_start', 'time_coverage_end', 'time_coverage_resolution', 'sea_name', 'geospatial_lat_min', 'geospatial_lat_max', 'geospatial_lat_units', 'geospatial_lat_resolution',	'geospatial_lon_min', 'geospatial_lon_max', 'geospatial_lon_units',	'geospatial_lon_resolution', 'geospatial_vertical_min', 'geospatial_vertical_max', 'geospatial_vertical_units', 'geospatial_vertical_resolution', 'geospatial_vertical_positive', 'source', 'institution', 'creator_name', 'creator_url', 'creator_email', 'project', 'processing_level', 'references', 'keywords_vocabulary', 'keywords', 'acknowledgment', 'comment', 'contributor_name', 'contributor_role',	'date_created', 'date_modified', 'publisher_name', 'publisher_email', 'publisher_url', 'history', 'license', 'metadata_link', 'id', 'naming_authority', 'uuid', 'Conventions', 'Metadata_Conventions', 'featureType', 'cdm_data_type', 'nodc_template_version', 'standard_name_vocabulary');

	@variables = ('profile', 'time', 'lat', 'lon', 'z', 'bottom', 'pres', 'temp', 'th', 'sa', 'ht', 'ox', 'inst1', 'inst2', 'platform', 'crs');

	@varatts = ('long_name', 'standard_name', 'units', 'scale_factor', 'add_offset', 'calendar', 'axis', 'positive', 'valid_min', 'valid_max', '_FillValue', 'coordinates',	'grid_mapping', 'source', 'references', 'cell_methods', 'ancillary_variables', 'platform', 'instrument', 'cf_role', 'call_sign', 'ices_code', 'wmo_code', 'wmo_code_table', 'imo_code', 'grid_mapping_name', 'epsg_code', 'semi_major_axis', 'inverse_flattening', 'make_model', 'serial_number', 'calibration_date', 'factory_calibrated', 'user_calibrated', 'calibration_report', 'accuracy', 'valid_range', 'precision', 'fall_rate_equation', 'coefficient_a', 'coefficient_b', 'comment');

	$found = 0;
	$header1 = 0;
	$outdir;
}

sub createNetCDF() {
# Generate the uuid and id for the netCDF file
	my $uuid = `uuidgen`;
	chop($uuid);
	$global{'uuid'} = $uuid;

# Generate today's date in ISO8601 convention
	my @e = gmtime(time);
	$global{'date_created'} = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
		$e[5]+1900,$e[4]+1,$e[3],$e[2],$e[1],$e[0]);

# Set dynamic attributes
    $global{'history'} = "$global{'date_created'} - This file generated from submitter's ascii file format.";

# These attributes are set automatically if the variables exist.
	$global{'geospatial_lat_min'}  = min($lat_data);
	$global{'geospatial_lat_max'}  = max($lat_data);
	$global{'geospatial_lon_min'}  = min($lon_data);
	$global{'geospatial_lon_max'}  = max($lon_data);
    $global{'geospatial_vertical_min'} = min($z_data);
    $global{'geospatial_vertical_max'} = max($z_data);
    $global{'geospatial_vertical_units'} = $z{'units'};

# Create the netCDF file
	my $filename = $outdir.'/'.$global{'id'}.'.nc';

	my $nc = PDL::NetCDF->new($filename, {
		MODE => O_CREAT,
		NC_FORMAT => PDL::NetCDF::NC_FORMAT_NETCDF4,
		REVERSE_DIMS => 1});

# Write Global attributes to NetCDF
	for (my $i = 0; $i <= $#globarray; $i++) {
		if (defined $global{$globarray[$i]}) {
			$nc->putatt($global{$globarray[$i]}, $globarray[$i]);
		} 
	}

# Write data to NetCDF
	$nc->put('profile', ['profile'], $prof_data);
	$nc->put('time', ['profile'], $time_data);
	$nc->put('lat', ['profile'], $lat_data);
	$nc->put('lon', ['profile'], $lon_data);
	$nc->put('bottom', ['profile'], $bottom_data);
	$nc->put('z', ['z'], $z_data);
	$nc->put('pres', ['z'], $pres_data);
	$nc->put('temp', ['z','profile'], $temp_data);
	$nc->put('th', ['z','profile'], $th_data);
	$nc->put('sa', ['z','profile'], $sa_data);
	$nc->put('ht', ['z','profile'], $ht_data);
	$nc->put('ox', ['z','profile'], $ox_data);
	$nc->put('inst1', ['profile'], $inst1_data);
	$nc->put('inst2', ['profile'], $inst2_data);
	$nc->put('platform', ['profile'], $plat_data);
	$nc->put('crs', ['profile'], $crs_data);

#	Write attributes to NetCDF
	for (my $j = 0; $j <= $#variables; $j++) {
		for (my $k = 0; $k <= $#varatts; $k++) {
			my $variable = $variables[$j];
			my $varatt = $varatts[$k];
			if (exists $$variable{$varatt}  && defined $$variable{$varatt}) {
				$nc->putatt($$variable{$varatt}, $varatts[$k], $variables[$j]);
			}
		}
	}

	$nc->close;
}
