#!/usr/bin/env perl
# -*- mode: cperl; indent-tabs-mode: nil; tab-width: 3; cperl-indent-level: 3; -*-
use warnings;
use strict;
use utf8;
BEGIN {
   $| = 1;
   binmode(STDIN, ':encoding(UTF-8)');
   binmode(STDOUT, ':encoding(UTF-8)');
}
use open qw( :encoding(UTF-8) :std );

use Getopt::Long;
Getopt::Long::Configure('bundling');
Getopt::Long::Configure('no_ignore_case');
my %opts = ();
GetOptions(\%opts, ('weight|w:s', 'reverse|r', 'first|1', 'help|?'));

sub print_help {
   print <<'XOUT';
Usage: cg-sort [OPTIONS]

Pipe a CG stream through this to sort and unique the readings of each cohort.

Options:
 -?, --help       outputs this help
 -w, --weight     sorts by a numeric tag; defaults to W
 -r, --reverse    reverses the sort order
 -1, --first      only keep the first reading

XOUT
}

if (defined $opts{'help'}) {
   print_help();
   exit(0);
}

my $W = 'W';
if (exists($opts{weight}) && length($opts{weight})) {
   $W = $opts{weight};
}

my $in_cohort = 0;
my %readings = ();
my %deleted = ();
my $trail = '';

sub sort_weight {
   $a =~ m/<\Q$W\E:([\d.]+)>/;
   my $wa = $1 || 0.0;
   $b =~ m/<\Q$W\E:([\d.]+)>/;
   my $wb = $1 || 0.0;

   if ($wa == $wb) {
      return $a cmp $b;
   }
   return $wa <=> $wb;
}

sub print_sorted_readings {
   if (!@_) {
      return;
   }
   if (exists($opts{weight})) {
      @_ = sort sort_weight @_;
   }
   else {
      @_ = sort @_;
   }
   if (exists($opts{reverse})) {
      @_ = reverse @_;
   }
   if (exists($opts{first})) {
      print $_[0];
   }
   else {
      print join('', @_);
   }
}

sub print_sorted {
   if (!$in_cohort) {
      return;
   }

   print_sorted_readings(keys(%readings));
   print_sorted_readings(keys(%deleted));

   print $trail;

   %readings = ();
   %deleted = ();
   $trail = '';
   $in_cohort = 0;
}

my $fh = *STDIN;
if (defined $ARGV[0]) {
   open($fh, '<', $ARGV[0]) or die $!;
}

while (<$fh>) {
   if (m/^"<.+?>"/) {
      print_sorted();
      $in_cohort = 1;
   }
   elsif ($in_cohort) {
      if (m/^\s+".+?"/) {
         $readings{$_} = 1;
      }
      elsif (m/^;\s+".+?"/) {
         $deleted{$_} = 1;
      }
      else {
         $trail .= $_;
      }
      next;
   }
   print;
}

print_sorted();
