#! /local/bin/perl -s
#
# stdll		--- generate a standard record format from the Language List
#
# Picks apart The Language List into records with standard fields
# to facilitate further transformation into various formats.
#
# Oscar Nierstrasz 27/05/93
#
# Translated to perl and fixed various bugs -- 27/6/93

$u = 'Usage: stdll [-<option>] <The Language List>
	-i -- intro only (to stdout)
	-b -- body only (to stdout)
	-a -- appendix only (to stdout)
	default: create ll-intro, ll-body and ll-app
' ;

if ($i) { &intro ; }
elsif ($b) { &skipintro; &body ; }
elsif ($a) { &skipintro; &skipbody; &app ; }
else {
	open(STDOUT,">ll-intro") || die("Can't write intro\n");
	&intro;
	print STDERR "Created ll-intro\n" ;
	close(STDOUT) ;

	open(STDOUT,">ll-body") || die("Can't write body\n");
	&body;
	print STDERR "Created ll-body\n" ;
	close(STDOUT) ;

	open(STDOUT,">ll-app") || die("Can't write appendix\n");
	&app;
	print STDERR "Created ll-app\n" ;
	close(STDOUT) ;
};

sub skipintro {
    while(<>) {
	eof() && die("Couldn't find body\n");
	return if /\* \* \* \*/ ;
    }
}

sub intro {
    while(<>) {
	eof() && die("Couldn't find body\n");
	return if /\* \* \* \*/ ;
	print ;
    }
}

sub skipbody {
    while(<>) {
	eof() && die("Couldn't find appendix\n");
	return if /APPENDIX/;
    }
}

# mode 0 -- new record (preceding line blank)
# mode 1 -- within record
# mode 2 -- within record, after ftp
# mode 3 -- within record, after info
sub body {
    $mode = 0 ;
    $l = "UNKNOWN" ; # language name -- should never print this!
    while(<>) {
	eof() && die("Couldn't find appendix\n");
	chop;
	return if /APPENDIX/ ;
	s/^( +\d)\)/$1./; # fix (two) languages numbered with right paren
	s/^\s+$//g; # zero blank lines
	s/\s+/ /g; # compact white space

	if (/^$/) { $mode = 0; }
	elsif ($mode == 0) {
		# A numbered language (check this first!):
		if (/^ +(\d)\. +(.*)/) {
			$dot = index($_, '. ');
			printf "\nL %s\n", $l;
			printf "N %s\n", $1;
			printf "| %s\n", $2;
			$mode = 1;
		}
		# new language:
		elsif (/ - /) {
			$hyphen = index($_,' - '); # want *first* occurrence!
			printf "\nL %s\n", substr($_,0,$hyphen);
			printf "| %s\n", substr($_,$hyphen+3);
			$l = 'UNKNOWN';
			$mode = 1;
		}
		# save the name of a numbered language:
		elsif (/^(.*) -$/) { $l = $1; $mode = 1; }
		# There are a couple of orphaned infos:
		elsif (/^ *info: *(.*)$/) {
			printf "I %s\n", $1;
			$mode = 3;
		}
		# Language name terminated with ": " instead of " - "
		elsif (/: /) {
			$colon = index($_,': '); # want *first* occurrence!
			printf "\nL %s\n", substr($_,0,$colon);
			printf "| %s\n", substr($_,$colon+2);
			$l = 'UNKNOWN';
			$mode = 1;
		}
		# name terminated with ". " instead of " - "
		elsif (/\. /) {
			$dot = index($_,'. '); # want *first* occurrence!
			printf "\nL %s\n", substr($_,0,$dot);
			printf "| %s\n", substr($_,$dot+2);
			$l = 'UNKNOWN';
			$mode = 1;
		}
		# and at least one numbered language missing " -$":
		elsif (/^(\S*)$/) { $l = $1; $mode = 1; }
		# unrecognized record:
		elsif (/\S/) { printf "? %s\n", $_; }
	}
	elsif (/^ *ftp: *([^ ,]*)/) {
		printf "F %s\n", $1;
		if ($' =~ /,* *(.+)/) { printf "| %s\n", $1; }
		$mode = 2;
	}
	elsif (/^ *info: *(.*)$/) {
		printf "I %s\n", $1;
		$mode = 3;
	}
	# new paragraph:
	elsif (/^ +(.+)/) {
		if ($mode == 1) { printf "P %s\n", $1; }
		elsif ($mode == 3) { printf "I %s\n", $1; }
		else { printf "| %s\n", $1; $mode = 1; }
	}
	else { printf "| %s\n", $_; }
    }
    print "\n" ;
}

sub app {
    print ;
    while(<>) {
	return if eof() ;
	print ;
    }
}

__END__

