#!/bin/sh
# Copyright 1999-2002 Carnegie Mellon University.  
# Portions Copyright 2002 Sun Microsystems, Inc.  
# Portions Copyright 2002 Mitsubishi ElectricResearch Laboratories.
# All Rights Reserved.  Use is subject to license terms.
# 
# See the file "license.terms" for information on usage and
# redistribution of this file, and for a DISCLAIMER OF ALL 
# WARRANTIES.
#
# This script parses the regression log and outputs a summary
# regression report 
#
# usage: ./makeTestSummaryReport regression.log listoftests.list
#     machine typeOfResults  title 
#
# Field definitions: 
#   1) "test"
#   2) date
#   3) time
#   4) machine
#   5) System
#   6) testName 
#   7) who 
#   8) status
#   9) audioTime 
#  10) procTime 
#  11) words 
#  12) insertions 
#  13) deletions 
#  14) substitutions 
#  15) sentences 
#  16) correctSentences 
#  17) heapSize 
#  18) loadAverage 

gawk '

BEGIN {
    FS = "|"
    F_TEST = 1;
    F_DATE = 2;
    F_TIME = 3;
    F_MACHINE = 4;
    F_SYSTEM = 5;
    F_TEST_NAME = 6;
    F_WHO = 7;
    F_STATUS = 8;
    F_AUDIO_TIME = 9;
    F_PROC_TIME = 10;
    F_WORDS = 11;
    F_INSERTIONS = 12;
    F_DELETIONS = 13;
    F_SUBSTITUTIONS = 14;
    F_SENTENCES = 15;
    F_CORRECT_SENTENCES = 16;
    F_HEAP_SIZE = 17;
    F_LOAD_AVERAGE = 18;

    NOT_AVAILABLE = "n/a";

    testList = ARGV[2];
    machine = ARGV[3];
    mode = ARGV[4];
    title = ARGV[5];

    if (machine == "this") {
	"./getHostname" | getline hostname;
	machine = hostname
    }


    ARGV[5] = "";
    ARGV[4] = "";
    ARGV[3] = "";
    ARGV[2] = "";

    systemName[0] = "s3";
    systemName[1] = "s3.3";
    systemName[2] = "s4";

    systemTitle["s3"] = "Sphinx 3";
    systemTitle["s3.3"] = "Sphinx 3.3";
    systemTitle["s4"] = "Sphinx 4";
    systemCount = 3;

    systemLinkName["s3"] = "SphinxThree";
    systemLinkName["s3.3"] = "SphinxThreeThree";
    systemLinkName["s4"] = "SphinxFour";


    modeTitle["latest"] =  "Latest results for ";
    modeTitle["fastest"] = "Fastest results for ";
    modeTitle["best"] =    "Most accurate results for ";

    # read in the date and time when the test was started
    getline date < ".startDate";
    getline time < ".startTimeOfDay";

    # read in the test list

    totalTestCount = 0;
    while (getline x < testList > 0) {
	# skip comment lines
	if (match(x, "#") != 1) {
	    tests[x] = 1;
	    testOrder[totalTestCount++] = x;
	}
    }
    close(testList);
}

$F_TEST == "test"  && ($F_MACHINE == machine || machine == "any") \
	&& ($F_TEST_NAME in tests || testList == "any") {
    tag = $F_TEST_NAME "-" $F_SYSTEM;
    if (mode == "latest" && $F_STATUS == "OK") {
        testData[tag] = $0;
    } else if (mode == "fastest") {
        speed = getSpeedFromLine($0);
	if (speed != NOT_AVAILABLE) {
	    if (!(tag in bestSpeed) || speed <= bestSpeed[tag]) {
	        bestSpeed[tag] = speed;
		testData[tag] = $0;
	    }
	}
    } else if (mode == "best") {
        accuracy = getAccuracyFromLine($0);
	if (accuracy != NOT_AVAILABLE) {
	    if (!(tag in bestAccuracy) || accuracy <= bestAccuracy[tag]) {
	        bestAccuracy[tag] = accuracy;
		testData[tag] = $0;
	    }
	}
    }

    if (testList == "any") {
        tests[$F_TEST_NAME] = 1;
    }
}


#####
# updates the data array with info for the given test
#
function updateData(testName, testCount) {
    split(testData[testName testCount], data);
}


####
# returns the speed for the given test/system
#
function getSpeedFromLine(line) {
     split(line, data);
    _audioTime = data[F_AUDIO_TIME];
    _procTime = data[F_PROC_TIME];

    if (_audioTime == "" || _audioTime == 0 || _procTime == "") {
         return NOT_AVAILABLE;
    } else {
	return _procTime / _audioTime;
    }
}

function getSpeed(testName, systemName) {
     return getSpeedFromLine(testData[testName "-" systemName]);
}

####
# returns the accuracy for the given test/system
#
function getAccuracyFromLine(line) {
     split(line, data);
    _errors = data[F_INSERTIONS] + data[F_DELETIONS] + data[F_SUBSTITUTIONS];
    _words = data[F_WORDS];

    if (_words == "" || _words == 0) {
        return NOT_AVAILABLE;
    } else {
	return _errors / _words * 100;
    }
}

function getAccuracy(testName, systemName) {
     return getAccuracyFromLine(testData[testName "-" systemName]);
}

####
# returns the date for the given test/system
#
function getDateFromLine(line) {
     split(line, data);
    _date = data[F_DATE];
    _status = data[F_STATUS];

    if (_date == "" || _status != "OK") {
        return NOT_AVAILABLE;
    } else {
	return _date;
    }
}

function getDate(testName, systemName) {
     return getDateFromLine(testData[testName "-" systemName]);
}



####
# Dumps data for a single test
#

function dumpTest(testName,  _j) {
   if (testName == "") {
       printf("<tr> <td colspan=8> </td> </tr>\n");
       # printf("| | | | | | | | |\n");
       return;
   }
   bestTime = 10000;
   bestWER = 10000;
   for (_j = 0; _j < systemCount; _j++) {
       speed = getSpeed(testName, systemName[_j]);
       if (speed == NOT_AVAILABLE) {
           speed = 10000;
       }
       if (speed < bestTime) {
          bestTime = speed;
       }
   }
   for (_j = 0; _j < systemCount; _j++) {
       wer = getAccuracy(testName, systemName[_j]);
       if (wer == NOT_AVAILABLE) {
           wer = 10000;
       }
       if (wer < bestWER) {
           bestWER = wer;
       }
   }

   printf("<tr> <td> <font color=#018888> %s </font></td>", testName);
   for (_j = 0; _j < systemCount; _j++) {
       speed = getSpeed(testName, systemName[_j]);
       if (speed == bestTime) {
           printf("<td> <font color=green> %s </font></td>", speed);
       } else {
           printf("<td> %s </td>", speed);
       }
   }
   for (_j = 0; _j < systemCount; _j++) {
       wer = getAccuracy(testName, systemName[_j]);

       if (wer == bestWER) {
           printf("<td> <font color=green> %s </font></td>", wer);
       } else {
           printf("<td> %s </td>", wer);
       }
   }
   testDate = getDate(testName, "s4");
   printf("<td> %s </td>", testDate);
   printf("</tr>\n");
}


#######
# Dumps the title
# 
function dumpTitle(  i) {

    if (machine == "any") {
        machineName = "all systems";
    } else {
        machineName = machine;
    }

    printf("<tr> <th bgcolor=lightblue colspan=8>%s %s on %s\n</th></tr>\n", \
        modeTitle[mode], title, machineName);
    printf("<tr bgcolor=lightblue> <th> &nbsp; </th> <th colspan=3>Speed (RT) <th colspan=3>WER (%) <th> Date</tr>\n");


    printf("<tr bgcolor=lightblue><th> %s ", "Test");
    for (i = 0; i < systemCount; i++) {
        printf("<th> %s ", systemTitle[systemName[i]]);
    }

    for (i = 0; i < systemCount; i++) {
	detailLink = "regressionReports/AutoGen" mode title systemLinkName[systemName[i]] "_" machine ".html";
        printf("<th> <a href=\"%s\">%s</a>", detailLink, systemTitle[systemName[i]]);
    }
    printf("<th> %s ", systemTitle["s4"]); 
    printf("</tr>\n");
}

END {
    # If test list is any then we synthesize the test order

    printf("<table cellpadding=0 cellspacing=0 border=1 width=90% align=center>\n");

    printf("<tbody>\n");
    if (testList == "any") {
	totalTestCount = 0;
        for (i in tests) {
	    testOrder[totalTestCount++] = i;
	}
    }

    dumpTitle();
    for (i = 0; i < totalTestCount; i++) {
        dumpTest(testOrder[i]);
    }
    printf("</tbody>\n");
    printf("</table>\n");

    print "<small>"
    print "<center>"
    printf("<i>This table is automatically generated. ");
    printf("Do not edit this by hand. ");
    printf("Last updated on %s at %s.</i>\n", date, time);
    print "</center>"
    print "</small>"
}

'  $*

