#!/usr/bin/mawk -We
# *********************************************************************
# gregorian: translate selected date columns from Julian to calendar
#	     format.
# Copyright (c) 2001,2006 Carlo Strozzi
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# *********************************************************************
# $Id: gregorian,v 1.5 2006/03/10 11:26:13 carlo Exp $

BEGIN {
  NULL = ""; FS = OFS = "\t"; fmt = "c"

  # Get local settings.
  nosql_install = ENVIRON["NOSQL_INSTALL"]
  stdout = ENVIRON["NOSQL_STDOUT"]
  stderr = ENVIRON["NOSQL_STDERR"]

  # Set default values if necessary.
  if (nosql_install == NULL) nosql_install = "/usr/local/nosql"
  if (stdout == NULL) stdout = "/dev/stdout"
  if (stderr == NULL) stderr = "/dev/stderr"

  y_start = 1; y_len = 4
  m_start = 5
  d_start = 7

  while (ARGV[++i] != NULL) {
    if (ARGV[i] == "-l" || ARGV[i] == "--last") pick_last = 1
    else if (ARGV[i] == "-N" || ARGV[i] == "--no-header") no_hdr = 1
    else if (ARGV[i] == "-c" || ARGV[i] == "--computer") fmt = "c"
    else if (ARGV[i] == "-I" || ARGV[i] == "--iso") fmt = "i"
    else if (ARGV[i] == "-e" || ARGV[i] == "--europe") fmt = "e"
    else if (ARGV[i] == "-u" || ARGV[i] == "--us") fmt = "u"
    else if (ARGV[i] == "-i" || ARGV[i] == "--input") i_file = ARGV[++i]
    else if (ARGV[i] == "-o" || ARGV[i] == "--output") o_file = ARGV[++i]
    else if (ARGV[i] == "-h" || ARGV[i] == "--help") {
       system("grep -v '^#' " nosql_install "/help/gregorian.txt")
       exit(rc=1)
    }
    else if (ARGV[i] == "-S" || ARGV[i] == "--short") {
       y_start = 3; y_len = 2
       m_start = 3
       d_start = 5
    }
    else if (ARGV[i] == "-s" || ARGV[i] == "--separator") {
       sep = ARGV[++i]
       if (sep ~ /(\n|\t)/) sep = " "		# prevent broken table.
    }
    else if (ARGV[i] == "--show-copying") {
       system("cat " nosql_install "/doc/COPYING")
       exit(rc=1)
    }
    else if (ARGV[i] == "--show-warranty") {
       system("cat " nosql_install "/doc/WARRANTY")
       exit(rc=1)
    }
    else target_cols[ARGV[i]] = ARGV[i]
  }

  ARGC = 1					# Fix argv[]

  if (o_file == NULL) o_file = stdout
  if (i_file != NULL) { ARGV[1] = i_file; ARGC = 2 }
}

#
# Main loop
#

NR == 1 {

  gsub(/[\001 ]+/, "")			# Remove SOH and blanks.

  # Load the column position array.
  while (++p <= NF) {

    # Unless '-l' was specified, make sure we pick the first occurrence
    # of duplicated column names (it may happen after a join).

    if (P[$p] == NULL) auto_col = auto_col " " $p

    if (pick_last) { P[$p] = p; N[p] = $p }
    else {
      if (P[$p] == NULL) { P[$p] = p; N[p] = $p }
    }
  }

  if (!no_hdr) {
     printf("\001"); gsub(/\t/,"\t\001"); print > o_file
  }
  next
}

# Table body.
{
  for (i=1; i<=NF; i++) {
      if (i > 1) printf(OFS) > o_file
      if (target_cols[N[i]] == NULL) {
	 printf("%s", $i) > o_file
	 continue
      }
      $i += 0				# Cast to number.
      if (!$i) {			# Zero, or not a number.
	 printf("%s", $i) > o_file
	 continue			# Not a julian date.
      }
      out = jd_to_cal($i)
      if (fmt == "c") printf("%s", out) > o_file
      else if (fmt == "i") {
         printf("%s-%s-%s",substr(out,y_start,y_len), \
		substr(out,m_start,2),substr(out,d_start,2)) > o_file
      }  else if (fmt == "e") {
	 if (sep=="") sep = "."
         printf("%s%c%s%c%s",substr(out,d_start,2),sep, \
		substr(out,m_start,2),sep,substr(out,y_start,y_len)) > o_file
      }  else {
	 if (sep=="") sep = "/"
	 printf("%s%c%s%c%s",substr(out,m_start,2),sep, \
		substr(out,d_start,2),sep,substr(out,y_start,y_len)) > o_file
      }
  }
  printf("\n") > o_file
}

# ---------------------------------------------------------------------
# Convert Julian date to calendar date
# (algorithm adopted from Press et al.)
# Note: although this function can handle hh:mm:ss as well, I'm
# currently only interested in yyyymmdd, so I do not return the formers.
# ---------------------------------------------------------------------
function jd_to_cal(jd,		j1,j2,j3,j4,j5,intgr,gregjd,tmp, \
				dayfrac,frac,hr,mn,sc,f,y,m,d) {

   # Make sure args cast to numbers, or rounding errors may occur.
   jd += 0

   #  get the date from the Julian day number

   intgr   = int(jd)
   frac    = jd - intgr
   gregjd  = 2299161

   if (intgr >= gregjd) {		# Gregorian calendar correction
      tmp = int(((intgr - 1867216) - 0.25) / 36524.25)
      j1 = intgr + 1 + tmp - int(0.25*tmp)
   }  else j1 = intgr

   # correction for half day offset
   dayfrac = frac + 0.5
   if (dayfrac >= 1.0) {
      dayfrac -= 1.0
      ++j1
   }

   j2 = j1 + 1524
   j3 = int(6680.0 + ((j2 - 2439870) - 122.1)/365.25)
   j4 = int(j3*365.25)
   j5 = int((j2 - j4)/30.6001)

   d = int(j2 - j4 - int(j5*30.6001))
   m = int(j5 - 1)
   if (m > 12) m -= 12
   y = int(j3 - 4715)
   if (m > 2)   --y
   if (y <= 0)  --y

   #  get time of day from day fraction

   hr  = int(dayfrac * 24.0)
   mn  = int((dayfrac*24.0 - hr)*60.0)
   	 f = ((dayfrac*24.0 - hr)*60.0 - mn)*60.0
   sc  = int(f)
   f -= sc

   if (f > 0.5) ++sc

   if (y < 0) y = -y

   return sprintf("%04d%02d%02d", y, m, d)
}

# End of program.
