#! /local/bin/perl -s
#
# ll2html	--- convert (standardized) language list to html
#
# Assumes that The Language List has first been converted by stdll.
#
# Oscar Nierstrasz 27/5/93
#
$v = "ll2html v1.0"; # Re-written to perl 28/6/93

$u = 'Usage: ll2html [-(option)] <standard language list>
	-i	introduction
	-b	body (split by language name)
	-a	appendix
	-idx	index
';

chop($date = `date +%d.%m.%y`);
$omn = '<A HREF="http://cui_www.unige.ch/OSG/omn.html"><I>OMN</I></A><P>';
$sig = "<I>This file was generated by $v on $date.</I>\n$omn<P>\n";

if ($i) { &intro; }
elsif ($b) { &body; }
elsif ($a) { &app; }
elsif ($idx) { &idx; }
else { die($u); }

# Process the introduction.
# Try to guess which lines are supposed to be broken, and
# which can be justified.  Mode 0 means the previous line was blank.
# Mode 1 means we are inside a justified paragraph.  Mode 2 means we
# are inside a list.
# Use <DL> instead of <P> to cut down on white space.
sub intro {
	print "<TITLE>Introduction to The Language List</TITLE>\n\n";
	$mode = 0;
	while (<>) {
		chop;	# strip record separator
		&htmlescape;
		s/\s*$//; # remove trailing white space
		s/\s+/ /; # compress remaining space
		if (/^$/) {
			# terminate lists & paragraphs:
			if (($mode == 1) || ($mode == 2)) { print "</DL>\n"; }
			$mode = 0;
		}
		# non-blank lines:
		elsif ($mode == 0) {
			if (/^The Language List/) {
				print "\n<H2>$_</H2>\n";
				$mode = 1;
			}
			# e.g., Version #:
			elsif (/^[A-Z].*: /) {
				print "\n<DL>\n<DD>$_\n";
				$mode = 2;
			}
			# section heading:
			elsif (/^[A-Z].*:$/) {
				print "\n<H3>$_</H3>\n\n";
				$mode = 0;
			}
			elsif (/^\S/) {
				print "\n<H3>$_</H3>\n\n";
				$mode = 0;
			}
			# indented list:
			elsif (/^\s/) {
				print "\n<DL>\n<DD>$_\n";
				$mode = 1;
			}
		}
		elsif ($mode == 1) {
			if (/^ /) { print "<DD>"; }
			print "$_\n";
		}
		else {	print "<DD>$_\n"; }
	}
	print "$sig";
}

# process the body (for use with parscan)
sub body {
	$/ = "";
	while (<>) {
		&htmlescape;
		chop;
		s!^L (.*)!<DT><A NAME="$1"><B>$1</B></A>\n<DD>!;
		s!\n\| (.*)!\n$1!g;
		s!\nP (.*)!\n<DD>$1!g;
		s!\nI (.*)!\n<DD><I>Info:</I> $1!g;
		s!\nF (.*)!\n<DD><I>Ftp:</I>\n<A HREF="file://$1">\n$1</A>!g;
		s!(HREF=".*)[,.;]"!$1"!g; # remove garbage punctuation
		s!(HREF=".*)[^/]*\.Z"!$1"!g; # delete trailing .Z and *
		s!(HREF=".*)[^/]*\*"!$1"!g;
		s!(file://[^:]*):!$1!g; # remove colons in ftp addresses
		print "<DL>\n$_</DL>\n\n";
	}
}


# escape special characters
sub htmlescape {
	s/\&/\&amp;/g;
	s/</\&lt;/g;
	s/>/\&gt;/g;
}

# Process the appendices.
# Again, try to guess where lines are supposed to be broken.
# Mode 3 means we are building up the subtitle.
sub app {
	print "<TITLE>Appendices to The Language List</TITLE>\n\n";
	$mode = 0;
	while (<>) {
		chop;	# strip record separator
		&htmlescape;
		if (/^$/) {
			# terminate lists & paragraphs:
			if ($mode != 3) {
				if ($mode > 0) { print "</DL>\n"; }
				$mode = 0;
			}
		}
		elsif (/^APPENDIX/) {
			print "<H2>$_</H2>\n\n";
			$mode = 3 ;
		}
		elsif (/^=+$/) { $mode = 0 ; next ; }
		# non-blank lines:
		elsif ($mode == 0) {
			if (/^19\d\d\s\s/) {
				print "<DL>\n<DD>$_\n";
				$mode = 2 ;
			}
			else {
				print "<DL>\n<DD>$_";
				$mode = 1 ;
			}
		}
		elsif ($mode == 1) {
			if (/^\s/) { print "<DD>$_\n"; }
			else { print; }
		}
		elsif ($mode == 2) { print "<DD>$_\n"; }
		elsif ($mode == 3) {
			print "<H2>$_</H2>\n\n";
			$mode = 0;
		}
	}
	if ($mode > 0) { print "</DL>\n"; }
	print "<P>\n$sig";
}

sub idx {
	print "<TITLE>Index to The Language List</TITLE>\n\n";
	print "<H1>Index to The Language List</H1>\n\n";
	while (<>) {
		# chop;
		if (/^L (.*)/) { $lang = $1; }
		if (/^N (\d+)/) { $num = $1; }
		if ($lang =~ /[^a-zA-Z]*([a-zA-Z])/) { $char = $1; }
		if (/^$/) {
			print "<A HREF=\"../Lists/$char.html#$lang\">$lang</A>";
			if ($num =~ /\d/) {
				print "[$num]";
				$num = "";
			}
			print ";\n";
		}
	}
	print "<P>\n$sig";
}

