#! /local/bin/perl -s # # ms2html --- convert (pseudo) troff -ms text to HTML # # Converts an annotated text file into HTML # The annotations are based on troff -ms macros, # but may be generated in a variety of ways. # For example, a Framemaker template (doc2ms.fm) # can be applied to an existing document to # insert the annotations for every paragraph type. # # A Framemaker template that implements the annotations is in: # cui.unige.ch:PUBLIC/oscar/doc2ms.fm.Z # # A Table of Contents and links to references and numbered sections # are automatically generated. # # Each of the following commands should appear on a line by itself # (although arbitrary white space is tolerated, to facilitate # translation from Framemaker files): # # Standard troff -ms: # .TL Title # .ST Subtitle # .AU Author # .AI Author's Institution # .AB Abstract # .NH1 Numbered Section # .NH2 Numbered Subsection # .NH3 Numbered Subsubsection # .NH4 Numbered Subsubsubsection # .SH1 Unnumbered Section # .SH2 Unnumbered Subsection # .SH3 Unnumbered Subsubsection # .SH4 Unnumbered Subsubsubsection # .LP Left Paragraph # .PP Indented Paragraph # .IP Indented Paragraph # .QP Quotation # .FS Footnote # .DS Display Start # .\" Comment # Non-standard: # .BC Block - Centred # .BH Block - Hang Indented # .BU1 Bullet Item (level 1) # .BU2 Bullet Item (level 2) # .BU3 Bullet Item (level 3) # .BU4 Bullet Item (level 4) # .LL Left Label (Bold .LP) # .NS Start Numbered Paragraph # .NN Next Numbered Paragraph # .MD Math Definition # .ML Math Lemma # .MP Math Proposition # .MT Math Theorem # .PR Proof # .UR Unnumbered Reference # .RF[1] Reference # # In addition, the following are understood: # [RF:1] A cross reference: # \fB start bold text # \fI start italic text # \fC start typewriter text # \fR return to Roman text (also \fP) # \. dot (at beginning of line) # # NB: note that .B and .I are not valid commands since they would # start a new "paragraph" (this script splits text at command lines). # Just convert ".B text" to "\fBtext\fR". # # Author: Oscar Nierstrasz -- oscar@cui.unige.ch -- June 1993 # #v = "ms2html v1.1"; # 6.93 #v = "ms2html v1.2"; # 1.7.93 -- changed .DA to .BU3 # -- tweaked file URL recognition #v = "ms2html v1.3"; # 2.7.93 -- fixed separate numbering for NH and SH # -- added named TOC anchor #v = "ms2html v1.4"; # 8.7.93 -- added .LL #v = "ms2html v1.5"; #v = "ms2html v1.6"; # 4.8.93 -- made url'href into a library $v = "ms2html v1.7"; # 25.8.93 -- added \. escape at beginning of line # -- changed TOC to use
'; $sig = "This document was translated by $v on $date.\n$omn
\n"; foreach $FILE (@ARGV) { $TL = 0; # TOC level (starts at 0) $toc = $refs = ""; open(FILE,$FILE) || die "Can't open $FILE\n"; ($BASE = $FILE) =~ s/\.ms$//; # drop the .ms suffix $TOC = $BASE . "-toc.html"; # the title page $CURR = $PREV = $TOC; # current and previous pages &newpage($TOC); $inbody = 0; if ($b) { $TOTOC = ""; } else { $TOTOC = $TOC; } # some useful strings: $REFS = $BASE . "-refs.html"; $totoc = "To Table of Contents
\n\n"; $torefs = "To References
\n\n";
# translate:
while(
\n";
print $sig;
close(STDOUT);
}
}
# convert some standard sequences to HTML:
sub accent2html {
# escape & < and >:
s/\&/\&/g;
s/\</g;
s/>/\>/g;
# convert dead-key accents to HTML
s/\\AE/\Æ/g;
s/\\'([AEIOUYaeiouy])/\&$1acute;/g;
s/\\[<^]([AEIOUaeiou])/\&$1circ;/g;
s/\\`([AEIOUaeiou])/\&$1grave;/g;
s/\\o([Aa])/\&$1ring;/g;
s/\\~([ANOano])/\&$1tilde;/g;
s/\\[:"]([AEIOUYaeiouy])/\&$1uml;/g;
s/\\,([Cc])/\&$1cedil;/g;
s/\\\/([Oo])/\&$1slash;/g;
s/\\ss/\ß/g;
}
# translate the next line:
sub ms2html {
s/^\.//; # delete initial "." (only needed for first record)
s/\n+\.//; # delete the record separator
s/\s+\n/\n/g; # delete trailing white space
s/\n\\\./\n./g; # unescape leading dots
&accent2html; # expand accents
s/\\f([IB])([^\\]*)\\f[RP]/<$1>$2<\/$1>/g; # italics & bold
s/\\fC([^\\]*)\\f[RP]/ \n\n"; return; };
/^AI$/ && do { print "$text \n\n"; return; };
/^AU$/ && do { print "$text \n\n"; return; };
/^AB$/ && do { print "Abstract \n\n$text \n"; return; };
/^PP$/ && do { print "$text \n\n"; return; };
/^BH$/ && do { print " \n\n";
}
# close the current body page and open a new one:
sub newbody {
local($NEXT) = @_;
&popall;
if ($inbody) {
&left; &up; &right; print " \n";
print $sig;
}
close(STDOUT);
$PREV = $CURR; $CURR = $NEXT;
&newpage($CURR);
}
# terminate the last body page:
sub lastbody {
local($NEXT);
&popall;
&left; &up;
# pointer to next only if references exist:
if ($refs =~ /./) { $NEXT = $REFS; &right; };
print " \n";
# clean up:
print $sig;
close(STDOUT);
}
# open a new page:
sub newpage {
local($PAGE) = @_;
open(STDOUT, ">$PAGE") || die "Can't create $PAGE";
print STDERR "Created $PAGE\n";
}
# check if need to push or pop a list level
# when a new list item appears:
sub listitem {
local($ltype) = @_; # this list item type
if ($#lstack < 0) {
# no current list, so start new list:
&newlist($ltype);
}
elsif ($lstack[$#lstack] ne $ltype) {
if (($#lstack > 0) && ($lstack[$#lstack - 1] eq $ltype)) {
# print STDERR "Popping from $ltype to $lstack[$#lstack-1]\n";
&poplist;
}
else {
&newlist($ltype);
}
}
}
# start a new list:
sub newlist {
local($ltype) = @_; # this list item type
if ($ltype eq "NS") { print " \n\n"; }
else { &button("left","$PREV"); }
}
sub right {
if ($p) { print "To Next Page \n\n"; }
else { &button("right","$NEXT"); }
}
__END__
$1<\/CODE>/g; # code
&url'href;
# expand references into HTML links:
s/\[RF:(\d*)\]/[$1]<\/A>/g;
if (/^$/) { return; } # blank record!?
# separate the text from the command:
$text = "";
s/^(\S+)[ \t]+/$1\n/;
s/^(\S+)\n// && do { $text = $_; $_ = $1; };
# NB: s/^(\S+)\s+(.*)// doesn't work since the text may contain newlines.
&popall unless
/^[LI][PL]/ || /^N[SN]$/ || /^BU[1234]/;
/^TL$/ && do { $title = $text; &printtitle("Title Page"); return; };
/^ST$/ && do { print "$text
\n\n"; return; };
/^BC$/ && do { print "
\n\n"; return; };
/^FS$/ && do { print "
\n\n"; return; };
/^QP$/ && do { print "
\n\n"; return; };
/^DS$/ && do { print "\n$text\n
\n\n"; return; };
# don't distinguish LP, LL & IP for nesting purposes:
/^LP$/ && do { &listitem("LP"); print "
\n\n";
return; };
/^MT$/ && do {
$mt++;
print "
\n\n";
return; };
/^ML$/ && do {
$ml++;
print "
\n\n";
return; };
/^MP$/ && do {
$mp++;
print "
\n\n";
return; };
/^PR$/ && do {
print "
\n\n";
return; };
if (/^([NS])H(\d)$/) {
# skip if this is the reference section:
if (($text eq "References") || ($text eq "Bibliography"))
{ return; };
$stype = $1; # numbered or unnumbered sections
$H = $2; # the header level
if ($H == 1) {
if ($stype =~ /N/) { $n1++; $n2 = $n3 = $n4 = 0; $id = "$n1"; }
else { $s1++; $s2 = $s3 = $s4 = 0; $id = "$s1"; }
}
elsif ($H == 2) {
if ($stype =~ /N/) { $n2++; $n3 = $n4 = 0; $id = "$n1.$n2"; }
else { $s2++; $s3 = $s4 = 0; $id = "$s1.$s2"; }
}
elsif ($H == 3) {
if ($stype =~ /N/) { $n3++; $n4 = 0; $id = "$n1.$n2.$n3"; }
else { $s3++; $s4 = 0; $id = "$s1.$s2.$s3"; }
}
elsif ($H == 4) {
if ($stype =~ /N/) { $n4++; $id = "$n1.$n2.$n3.$n4"; }
else { $s4++; $id = "$s1.$s2.$s3.$s4"; }
}
while ($TL < $H) { $toc .= "\n"; $TL++; }
while ($TL > $H) { $toc .= "
\n"; $TL--; }
$name = "${stype}-$id"; # unique anchor name
if ($stype =~ /N/) { $num = "$id "; }
else { $num = ""; }
# start a new page unless -b option was selected:
if (!$b) {
$NEXT = "$BASE-$name.html" ;
&popall;
&newbody($NEXT);
&printtitle("${num}$text");
}
$inbody = 1;
print "\n"; $ltype = "NN"; }
elsif ($ltype eq "NN") { print "
\n"; }
elsif ($ltype =~ /BU[1234]/) { print "
\n\n"; }
elsif ($ltype =~ /BU[1234]/) { print "\n\n"; }
elsif ($ltype =~ /LP/) { print "\n"; }
else { print STDERR "poplist error: unknown list type \"$ltype\"\n"; }
# should never happen!
}
# pop out of all remaining lists:
sub popall {
while ($#lstack >= 0) {
&poplist;
}
}
# yep, you guessed it!
sub printtitle {
local($name) = @_;
print "\n"; }
elsif ($ltype =~ /LP/) { print "
\n"; }
push(@lstack,$ltype);
}
# pop the current list:
sub poplist {
local($ltype);
$ltype = pop(@lstack);
if ($ltype eq "NN") { print "
$title
\n\n";
}
# standard buttons:
sub up {
if ($p) { print $totoc; }
else { &button("up","$TOC#TOC"); }
}
sub left {
if ($p) { print "To Previous Page