#!/usr/bin/perl
#
# $Id: fix-groff-xhtml,v 1.3 2003/03/31 20:37:40 cmdjb Exp $
#
# Format XHTML generated by groff -Thtml (via tidy) for websites
#
# Usage: groff -Thtml -P-l something.man | tidy -asxml ... | fix-groff-xhtml OUTPUT-FILE
#
# (C) Copyright 2003 Dave Beckett <Dave.Beckett@bristol.ac.uk>
# University of Bristol
#

use strict;
use File::Basename;

my $progname=basename $0;

my $main_title="Raptor RDF Parser Toolkit";

die "USAGE: $progname OUTPUT-FILE\n" if @ARGV < 1;

my $doc_title;

my($file)=@ARGV;

open(OUT, ">$file") or die "$progname: Cannot create $file - $!\n";
open(IN, "-");
while(<IN>) {

  s%<title>libraptor</title>%<title>$main_title - Raptor API</title>%;
  s%<h1 align="center">libraptor</h1>%<h1 align="center">$main_title - Raptor API</h1>%;

  next if /^<link|meta/i;

  s%^<body>%<body bgcolor="#ffffff" text="#000085">%;

  # This is not xhtml
  s% cols="\d+" % %;

  s%(name|id)="([^"]+)"%my($at,$val)=($1,$2); $val =~ s/ /_/g; qq{$at="$val"};%eg;

  s%(Dave Beckett|Institute for Learning and Research Technology .ILRT.|University of Bristol) (?:- |)(http://[^<]+)%<a href="$2">$1</a>%;

  my $year=1900+(localtime)[5];
  print OUT <<"EOT" if m%^</body>%;

<p>Copyright 2002-$year <a href="http://purl.org/net/dajobe/">Dave Beckett</a>, <a href="http://www.ilrt.bristol.ac.uk/">Institute for Learning and Research Technology</a>, <a href="http://www.bristol.ac.uk/">University of Bristol</a></p>

EOT
  print OUT;
}
close(IN);
close(OUT);
