#set -x
############################## -*- Mode: sh -*- #############################
## wwwoffle-chunks -- chop a wwwoffle index into smaller chunks
## Author          : Dan Jacobson http://www.geocities.com/jidanni/
## Copyright       : GNU General Public License
## Created On      : Aug 2001
## Last Modified By: Dan Jacobson
## Last Modified On: Wed Oct 24 08:19:51 2001
## Update Count    : 88
## Status          : seems to work it seems
###############################################################################

# The newest copy of this program is on
# http://www.geocities.com/jidanni/wwwoffle-jidanni.html

DIR=$HOME/wwwoffle-chunks size=40 defaultindex=lasttime index=${1-$defaultindex} \
defaultsortstyle=domain
: ${sortstyle=$defaultsortstyle} #hmmm, but e.g. mtime&monitor
#will put &'s into the HTML below... no big deal apparently
test $# -gt 1 -o "x$1" = x--help && { cat 1>&2 <<EOF
$0: usage:
After going offline run this program to break a too-big
WWWOFFLE index into chunks to save wear and tear and waiting each time
one hits the BACK key in their favorite browser.  Then one points their
browser to $DIR/...  optional argument is what index to get,
default is "$defaultindex".  One needn't be root to run this program.
Sorting style can be adjusted thru \$sortstyle, default=$defaultsortstyle.
EOF
exit 1;}
test -d $DIR || mkdir $DIR
cd $DIR || exit 22
set -e #over paranoid

#I'm looking for a better date to put on my indexes... ideally
#reflecting when that index was made
# a nice date to use might be obtained from
#ls -dl /var/spool/wwwoffle/lasttime etc. but one must be root...
#wwwoffle -O http://localhost:8080/index/lasttime?sort=none| awk '/Date/{print;exit}'
#looked promising, but is just the current date
indexdir=/var/spool/wwwoffle/$index
if test -r $indexdir
#LC_TIME=C ok, it just looks better to me... sorry
then
    set -- `LC_TIME=C ls -ld $indexdir` #hope it's in the default recent file format
    datestr=$6$7_${8/:/}
else datestr=`LC_TIME=C date '+%b%d_%H%M'` #no :'s
fi
prefix=$datestr-$index- #don't mention $index else confusing...
#but wait... need it to avoid various collisions

wwwoffle -o http://localhost:8080/index/$index?sort=$sortstyle|
sed '1,/<ul>/d;/<\/ul>/,$d'|split -$size - $prefix
tmp=($prefix??); shortrefs=(${tmp[@]##*-}) #??: better than *
for i in ${tmp[*]}; do mv $i $i.html; done #Then lynx doesn't need -force-html
refs=($prefix??.html)
echo making ${refs[*]}|xargs -n 1 #neater
for i in ${refs[*]}
do
	: $((m++))
	test "${refs[$m]}" || sorry=", i.e. this."
	title_and_header="Chunk $i of WWWOFFLE indexs, sorted by $sortstyle"
	ed -s $i <<!
1i
<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"><HTML><HEAD>
<TITLE>$title_and_header</TITLE></HEAD><BODY><H2>$title_and_header</H2><ol>
.
\$a
</ol><p align=center>${refs[$m]+next:} <a href="${refs[$m]}">${shortrefs[$m]}</a>
&nbsp;&nbsp;&nbsp;(last is <a href="${refs[${#refs[@]}-1]}">
${shortrefs[${#shortrefs[@]}-1]}</a>$sorry)</p></BODY></HTML>
.
w
q
!
#Dan Mercer, comp.unix.shell: You can also use:  : "${a[@]}"; echo $_
#I should add the standard footer WWWOFFLE links, but wait, I'm trying
#to correct a size & slowness problem, so i should make this as clean
#as possible.
done
echo
echo OK, now point your browser at $DIR/$refs
#and after a while clean up the old *-chunk-* files by hand
