#!/usr/local/bin/perl
#
# an2html - convert nroff man page to HTML
#
#  DATE: May 12 2000
#  AUTHOR: Hironori Sakamoto <hsaka@mth.biglobe.ne.jp>
#  LICENSE: GPL
#

$BOLD   = 'B';
$ITALIC = 'U';
%ESC    = &Escape;

%BTAG = (
	'SH',	"\n<BLOCKQUOTE>\n",
	'SS',	"\n<BLOCKQUOTE>\n",
	'TP',	"\n<DL COMPACT>\n",
	'IP',	"\n<DL COMPACT>\n",
	'HP',	"\n<DL COMPACT>\n",
	'RS',	"\n<DL COMPACT>\n",
);
%ETAG = (
	'SH',	"</BLOCKQUOTE>\n\n",
	'SS',	"</BLOCKQUOTE>\n\n",
	'TP',	"</DL>\n\n",
	'IP',	"</DL>\n\n",
	'HP',	"</DL>\n\n",
	'RS',	"</DL>\n\n",
);

@links = ();
$body  = '';

$Dd = 0;
$dl = 0;
$pd = 1;
$pd2 = 1;
$nf = 0;
$ig = 0;
$comm = '';
$comm2 = '';
@font = ('R', 'R');
@block;

while(<>) {
	chop;

	if (/^\.\\\"/) {
		next;
	} elsif (/^\.\./) {
		$ig = 0;
		next;
	} elsif (/^\.(ig|de\s+\S)/) {
		$ig = 1;
		next;
	}
	$ig && next;

	if (s/^\.TH\s+//) {
		($t, $s, $e1, $e2, $e3) = &SplitDQ($_);
		$title = "$t";
		$title .= "($s)" if ($s);
		$extra = '';
		$extra .= "<DIV ALIGN=\"CENTER\">\n$e3\n</DIV>\n" if ($e3);
		if ($e1 || $e2) {
			$extra .= "<DIV ALIGN=\"RIGHT\">\n";
			$extra .= "$e2\n" if ($e2);
			$extra .= "<BR>" if ($e1 && $e2);
			$extra .= "$e1\n" if ($e1);
			$extra .= "</DIV>\n";
		}
		next;
	}
	$title || next;

	if (s/^\.(SH|SS)(\s+|$)//) {
		$comm = $1;
		$comm2 = '';
		$_ eq '' && next;
		s/"//g;
	} elsif (s/^\.(TP|HP)(\s+|$)//) {
		$comm = $1;
		$comm2 = '';
		next;
	} elsif (s/^\.(LP|PP|P)(\s+|$)//) {
		if (($b = $block[$#block]) =~ /[TIH]P/) {
			$body .= $ETAG{$b};
			pop(@block);
			$dl--;
			$comm = '';
		} else {
			$comm = $1;
		}
		$comm2 = '';
		next;
	} elsif (s/^\.(IP)(\s+|$)//) {
		$comm = $1;
		$comm2 = '';
		($_) = &SplitDQ($_);
	} elsif (s/^\.(SB|SM|B|I)(\s+|$)//) {
		$comm2 = $1;
		$_ eq '' && next;
		s/"//g;
	} elsif (s/^\.(B[IR]|I[BR]|R[BI])(\s+|$)//) {
		$_ eq '' && next;
		$comm2 = $1;
	} elsif (/^\.sp(\s+|$)/) {
		($comm || $comm2) && next;
		$body .= "<P>\n"; 
		if ($dl && $dt) {
			$dt = 0;
			$dd = 1;
		}
		$br = 1;
		next;
	} elsif (/^\.(br|in)(\s+|$)/) {
		($comm || $comm2) && next;
		$body .= "<BR>\n";
		if ($dl && $dt) {
			$dt = 0;
			$dd = 1;
		}
		$br = 1;
		next;
	} elsif (/^\.ns(\s+|$)/) {
		$pd2 = $pd;
		$pd = 0;
		next;
	} elsif (/^\.nf(\s+|$)/) {
		$nf = 1;
		next;
	} elsif (/^\.fi(\s+|$)/) {
		$nf = 0;
		$body .= "<BR>\n";
		$br = 1;
		next;
	} elsif (/^\.PD(\s+(\d+)|$)/) {
		$pd = $pd2;
		$pd2 = ($2 eq '') ? 1 : $2 + 0;
		next;
	} elsif (/^\.RS/) {
		if (($b = $block[$#block]) =~ /[TIH]P/) {
			$body .= $ETAG{$b};
			$dl--;
			pop(@block);
		}
		$body .= $BTAG{'RS'};
		$body .= "<DT>&nbsp;\n<DD>\n";
		$dl++;
		$dt = 0;
		$dd = 0;
		push(@block, 'RS');
		next;
	} elsif (/^\.RE/) {
		if (($b = $block[$#block]) =~ /[TIH]P/) {
			$body .= $ETAG{$b};
			$dl--;
			pop(@block);
		}
		if (($b = $block[$#block]) eq 'RS') {
			$body .= $ETAG{$b};
			$dl--;
			pop(@block);
		}
		next;
	} elsif (/^\.Dd/) {
		$Dd = 1;
		next;
	} elsif (/^\./) {
		next;
	} elsif (/^\s*$/) {
		($comm || $comm2) && next;
		$body .= "<P>\n"; 
		if ($dl && $dt) {
			$dt = 0;
			$dd = 1;
		}
		$br = 1;
		next;
	}

	@para = ($_);
	if ($comm2 eq 'SB') {
		@font = ('B', 'B');
	} elsif ($comm2 eq 'SM') {
		@font = ('R', 'R');
	} elsif ($comm2 eq 'B') {
		@font = ('B', 'B');
	} elsif ($comm2 eq 'I') {
		@font = ('I', 'I');
	} elsif ($comm2) {
		@font = split('', $comm2);
		@para = &SplitDQ($_);
	}
	if ($comm eq 'SH') {
		@font = ('B', 'B');
		($_, @font) = &Paragraph(@font, @para);
		@font = ('R', 'R');
		while(@block) {
			$body .= $ETAG{$block[$#block]};
			pop(@block);
		}
		$NAME = &Name($_);
		$body .= "<H2><A HREF=\"#top\" NAME=\"$NAME\">$_</A></H2>\n";
		$body .= $BTAG{'SH'};
		push(@block, 'SH');
		$dl = 0;
		push(@links, "<A HREF=\"#$NAME\">$_</A>");
	} elsif ($comm eq 'SS') {
		($_, @font) = &Paragraph(@font, @para);
		@font = ('R', 'R');
		while(@block) {
			$body .= $ETAG{$block[$#block]};
			pop(@block);
		}
		$body .= "<H3>$_</H3>\n";
		$body .= $BTAG{'SS'};
		push(@block, 'SS');
		$dl = 0;
	} elsif ($comm =~ /^[TIH]P/) {
		($_, @font) = &Paragraph(@font, @para);
		@font = ('R', 'R');
		$body .= "\n" if ($pd);
		if ($block[$#block] !~ /[TIH]P/) {
			$body .= $BTAG{$comm};
			push(@block, $comm);
			$dl++;
		}
		if ($_ eq '') {
			$_ = "&nbsp;";
		}
		$body .= "<DT></B>$_\n";
		if ($nf || $comm =~ /^[TI]P/) {
			$dt = 0;
			$dd = 1;
		} else {
			$dt = 1;
			$dd = 0;
		}
		$pd = $pd2;
	} elsif ($comm =~ /^(LP|PP|P)/) {
		($_, @font) = &Paragraph(@font, @para);
		@font = ('R', 'R');
		$body .= "<P>$_\n";
	} elsif (/^\s/ || $nf) {
		($_, @font) = &Paragraph(@font, @para);
		s/\s/\&nbsp;/g;
		@font = ('R', 'R') if ($comm2);
		$body .= "<DD>" if ($dl && $dd);
		$body .= "<BR>" if (! $br);
		$body .= "$_\n";
		$dd = 0;
		$br = 0;
	} else {
		($_, @font) = &Paragraph(@font, @para);
		@font = ('R', 'R') if ($comm2);
		$body .= "<DD>" if ($dl && $dd);
		$body .= "$_\n";
		$dd = 0;
		$br = 0;
	} 
	$comm = '';
	$comm2 = '';
}

if (! $title) {
	if ($Dd) {
		print STDERR "File format isn't supported\n";
	} else {
		print STDERR "File format isn't nroff-man format\n";
	}
	exit 2;
}

while(@block) {
	$body .= $ETAG{$block[$#block]};
	pop(@block);
}

$link = "<DIV ALIGN=\"CENTER\">\n[\n" . join(" |\n", @links) . "\n]\n</DIV>";

print <<"EOF";
<HTML>

<HEAD>
<TITLE>$title</TITLE>
</HEAD>

<BODY>
<TT>

<H1 ALIGN="CENTER"><A NAME="top"><B>$title</B></A></H1>

$extra
<P>
$link

<HR>

$body

</TT>
</BODY>

</HTML>
EOF

sub Paragraph {
	local($font1, $font2, @para) = @_;
	local(@fonts) = ($font1, $font2);
	local($html) = '';
	local($font, $man, $_);
	local($escQ) = 0;
	local($escC) = 0;

	foreach $man (@para) {
		$font1 = shift(@fonts);
		push(@fonts, $font1);

	if ($font1 eq 'B') {
		$html .= "<$BOLD>";
	} elsif ($font1 eq 'I') {
		$html .= "<$ITALIC>";
	}
	$font = $font1;
	foreach( split(/(\\f.|\\\(bu|\\.|\n)/, $man) ) {
		if (/^\n$/) {
			$escC = 0;
		}
		$escC && next;
		if (! s/^\\//) {
			s/\&/\&amp;/g;
			s/\</\&lt;/g;
			s/\>/\&gt;/g;
			s/\&lt;([^\s\&]+\@[^\s\&]+)\&gt;/\&lt;\<A HREF=\"mailto:$1\"\>$1\<\/A\>\&gt;/g;
			s/(http|ftp):[^\s\&]+/\<A HREF=\"$&\"\>$&\<\/A\>/g;
			$html .= $_;
			next;
		}
		if (/^fB/) {
			$font eq 'B' && next;
			if ($font eq 'I') {
				$html .= "</$ITALIC>";
			}
			$font = 'B';
			$html .= "<$BOLD>";
		} elsif (/^fI/) {
			$font eq 'I' && next;
			if ($font eq 'B') {
				$html .= "</$BOLD>";
			}
			$font = 'I';
			$html .= "<$ITALIC>";
		} elsif (/^f/) {
			if ($font eq 'B') {
#				$font1 eq 'B' && next;
				$html .= "</$BOLD>";
			} elsif ($font eq 'I') {
#				$font1 eq 'I' && next;
				$html .= "</$ITALIC>";
			}
			$font = 'R';
#			if ($font eq 'B') {
#				$html .= "<$BOLD>";
#			} elsif ($font eq 'I') {
#				$html .= "<$ITALIC>";
#			}
		} elsif (/^\(bu/) {
			$html .= 'o';
		} elsif (/^\?/) {
			$escQ = ! $escQ;
			$html .= $escQ ? '<!--' : '-->';
		} elsif (/^[#"c]/) {
			$escC = 1;
		} else {
			$html .= $ESC{$_};
		}
	}
	if ($font eq 'B') {
		$html .= "</$BOLD>";
	} elsif ($font eq 'I') {
		$html .= "</$ITALIC>";
	}

	}
	return($html, $font, $font);
}

sub Name {
	local($_) = @_;

	s/\<[^>]*\>//g;
	s/\W/('%' . unpack("H*", $&))/eg;
	return($_);
}

sub Escape {
	local(%esc) = (
		'e',	'\\',
		'0',	' ',
		'~',	' ',
	);
	local($_);

	foreach( split('', ' +-.\':;<=>@\\]_`123456789BFGIJKMOPQTUWijmqy') ) {
		$esc{$_} = $_;
	}
	return %esc;
}

sub SplitDQ {
	local($_) = @_;
	local(@list);

	while(s/^\s*(\"([^"]*)\"?|((\\.|\S)+))//) {
		push(@list, $2 . $3);
	}
	return @list;
}
