#!/usr/bin/php4 -q
<?php
/* ******************************************************************** */
/* CATALYST PHP Source Code                                             */
/* -------------------------------------------------------------------- */
/* This program is free software; you can redistribute it and/or modify */
/* it under the terms of the GNU General Public License as published by */
/* the Free Software Foundation; either version 2 of the License, or    */
/* (at your option) any later version.                                  */
/*                                                                      */
/* This program is distributed in the hope that it will be useful,      */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of       */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        */
/* GNU General Public License for more details.                         */
/*                                                                      */
/* You should have received a copy of the GNU General Public License    */
/* along with this program; if not, write to:                           */
/*   The Free Software Foundation, Inc., 59 Temple Place, Suite 330,    */
/*   Boston, MA  02111-1307  USA                                        */
/* -------------------------------------------------------------------- */
/*                                                                      */
/* Filename:    iso639-import.php                                       */
/* Author:      Paul Waite                                              */
/* Description: Importer for language codes defined by ISO639 in two    */
/*              different versions: ISO639-1 and ISO639-2. The former   */
/*              is a two-character set of codes, and the latter is a    */
/*              three-character set of codes.                           */
/*                                                                      */
/*              The source file is expected to be an ASCII content file */
/*              with data delimited by pipes "|" as per the following   */
/*              example:   ger|deu|de|German|allemand                   */
/*                                                                      */
/*              Field 1: 3-char code                                    */
/*              Field 2: 3-char terminologic code (when given)          */
/*              Field 3: 2-char code (when given)                       */
/*              Field 4: Language name in english                       */
/*              Field 5: Language name in french                        */
/*                                                                      */
/*       usage: ./iso639-import.php [--verbose] [--file=datafile]       */
/*                                                                      */
/*              --verbose  Shows detailed messages on import process    */
/*              --file     Over-ride import data dilename. Normally     */
/*                         this is taken from the ISO639-1 data source  */
/*                         URL in the database.                         */
/*                                                                      */
/* ******************************************************************** */
// In older versions of Php CLI Program working dir is always its
// own directory so we need to get into the website root directory..
if (PHP_VERSION < 4.2) {
  chdir("..");
}
include("application.php");
include_once("optlist-defs.php");

$verbose = false;
$ISO639_DATA_FILE = "";
$ISO639_DATA_URL = "";
$opts = new optlist();
$PROGNAME = $opts->progname;
if ($opts->optcount > 0) {
  // Debugging..
  if ($opts->opt_exists("verbose")) {
    debug_on(DBG_DEBUG);
  }
  if ($opts->opt_exists("file")) {
    $ISO639_DATA_FILE = $opts->opt_value("file");
  }
}

// Ascertain ISO639-1 Record ID
$q = "SELECT * FROM ax_enc_scheme WHERE enc_scheme_name='ISO639-1'";
$sch = dbrecordset($q);
if ($sch->rowcount == 1) {
  $iso639_1_id = $sch->field("enc_scheme_id");
  $ISO639_DATA_URL = $sch->field("datasrc_uri");
  if ($iso639_1_id != "") {
    debugbr("ISO639-1 encoding scheme ID: $iso639_1_id", DBG_DEBUG);
  }
}
else {
  echo "Error: failed to find unique ISO639-1 record in 'ax_enc_scheme'\n";
  exit;
}
// Ascertain ISO639-2 Record ID
$q = "SELECT * FROM ax_enc_scheme WHERE enc_scheme_name='ISO639-2'";
$sch = dbrecordset($q);
if ($sch->rowcount == 1) {
  $iso639_2_id = $sch->field("enc_scheme_id");
  if ($iso639_2_id != "") {
    debugbr("ISO639-2 encoding scheme ID: $iso639_2_id", DBG_DEBUG);
  }
}
else {
  echo "Error: failed to find unique ISO639-2 record in 'ax_enc_scheme'\n";
  exit;
}

// Try to get the latest file
if ($ISO639_DATA_URL != "") {
  $urlbits = parse_url($ISO639_DATA_URL);
  if ($ISO639_DATA_FILE == "") {
    $ISO639_DATA_FILE = basename($urlbits["path"]);
  }
  if (file_exists($ISO639_DATA_FILE)) {
    unlink($ISO639_DATA_FILE);
  }
  debugbr("data will be downloaded from $ISO639_DATA_URL", DBG_DEBUG);
  debugbr("destination data file is $ISO639_DATA_FILE", DBG_DEBUG);
  exec("wget $ISO639_DATA_URL -O $ISO639_DATA_FILE");
}
else {
  debugbr("Warning: no data source URL, so no download.", DBG_DEBUG);
}


debugbr("deleting current set of language codes..", DBG_DEBUG);
$del = new dbdelete("ax_enc_value");
$del->where("enc_scheme_id in ($iso639_1_id,$iso639_2_id)");
$del->execute();

debugbr("loading new language codes..", DBG_DEBUG);
$langF = new csv_inputfile($ISO639_DATA_FILE);
if ($langF->opened) {
  $lineno = 0;
  while (!$langF->eof()) {
    $fields = $langF->readln("|");
    if (count($fields) > 0) {
      $lineno += 1;
      $lang_code_2 = trim($fields[0]);
      $lang_code_1 = trim($fields[2]);
      $lang_label = trim($fields[3]);

      // ISO639-1
      if ($lang_code_1 != "") {
        $q  = "SELECT * FROM ax_enc_value";
        $q .= " WHERE enc_scheme_id=$iso639_1_id";
        $q .= "   AND enc_value='$lang_code_1'";
        $existing = dbrecordset($q);
        if ($existing->hasdata) {
          $langQ = new dbupdate("ax_enc_value");
          $existing_label = $existing->field("enc_label");
          if (!strstr($existing_label, $lang_label)) {
            $lang_label = $existing_label . "/" . $lang_label;
            $langQ->set("enc_label", $lang_label);
            $langQ->where("enc_scheme_id=$iso639_1_id");
            $langQ->where("AND enc_value='$lang_code_1'");
            $langQ->execute();
          }
        }
        else {
          $langin = new dbinsert("ax_enc_value");
          $langin->set("enc_scheme_id", $iso639_1_id);
          $langin->set("enc_value", $lang_code_1);
          $langin->set("enc_label", $lang_label);
          $langin->execute();
        }
        debugbr("ISO639-1: $lang_label ($lang_code_1)", DBG_DEBUG);
      }

      // ISO639-2
      $q  = "SELECT * FROM ax_enc_value";
      $q .= " WHERE enc_scheme_id=$iso639_2_id";
      $q .= "   AND enc_value='$lang_code_2'";
      $existing = dbrecordset($q);
      if ($existing->hasdata) {
        $existing_label = $existing->field("enc_label");
        if (!strstr($existing_label, $lang_label)) {
          $lang_label = $existing_label . "/" . $lang_label;
          $langQ = new dbupdate("ax_enc_value");
          $lang_label = $existing->field("enc_label") . "/" . $lang_label;
          $langQ->set("enc_label", $lang_label);
          $langQ->where("enc_scheme_id=$iso639_2_id");
          $langQ->where("AND enc_value='$lang_code_2'");
          $langQ->execute();
        }
      }
      else {
        $langin = new dbinsert("ax_enc_value");
        $langin->set("enc_scheme_id", $iso639_2_id);
        $langin->set("enc_value", $lang_code_2);
        $langin->set("enc_label", $lang_label);
        $langin->execute();
      }
      debugbr("ISO639-2: $lang_label ($lang_code_2)", DBG_DEBUG);
    }
  } // while
}
else {
  echo "failed to open $ISO639_DATA_FILE\n";
  exit;
}

// Close the file..
$langF->closefile();
debugbr("finished.", DBG_DEBUG);
?>