#! /usr/local/bin/perl # convert table in stanza format to HTML # stz2html # stz2html # define scale factors $scale{'E'} = 1.E18; $scale{'P'} = 1.E15; $scale{'T'} = 1.E12; $scale{'G'} = 1.E9; $scale{'M'} = 1.E6; $scale{'k'} = 1.E3; $scale{'h'} = 1.E2; $scale{'d'} = 1.E-1; $scale{'c'} = 1.E-2; $scale{'m'} = 1.E-3; $scale{'µ'} = 1.E-6; $scale{'n'} = 1.E-9; $scale{'p'} = 1.E-12; $scale{'f'} = 1.E-15; $scale{'a'} = 1.E-18; $scale{'EB'} = 1.152921504606846976E18; $scale{'PB'} = 1.125899906842624E15; $scale{'TB'} = 1.099511627776E12; $scale{'GB'} = 1.073741824E9; $scale{'MB'} = 1.048576E6; $scale{'KB'} = 1.024E3; $scale{'kB'} = 1.E3; # look for options and files if ($#ARGV == 0 && -f "$ARGV[0].stz") { open (STZ, "<$ARGV[0].stz") || die "unable to open file $ARGV[0].stz"; if (-f "$ARGV[0].stzd") { open (STZD, "<$ARGV[0].stzd") || die "unable to open file $ARGV[0].stzd"; $exi_stzd = 1; }; open (HTML, ">$ARGV[0].html") || die "unable to open file $ARGV[0].html"; } else { if ($#ARGV >= 0) { open (STZ, "<$ARGV[0]") || die "unable to open file $ARGV[0]"; } else { open (STZ, "<-"); }; if ($#ARGV >= 1) { open (STZD, "<$ARGV[1]") || die "unable to open file $ARGV[1]"; $exi_stzd = 1; }; if ($#ARGV >= 2) { open (HTML, ">$ARGV[2]") || die "unable to open file $ARGV[2]"; } else { open (HTML, ">-"); }; }; # read description file if ($exi_stzd) { while () { chop; undef $sopts; undef $hopts; if (/^\s*(\#.*)?$/) { next; } elsif (! $postlude && ($prelude || /^\s* $exi_sort-1) { die "inconsistent sort directives: " . substr ($sorterr, 2); }; } else { &close_prelude; }; # read input file if (read (STZ, $text, 400000) == 400000) { die "input file too long (>= 400000 bytes)"; }; close (STZ); @text = split (/\s*\n\s*\n\s*/, $text); undef $text; foreach $stanza (@text) { $idn++; $id = $idn . '~'; @lines = split (/\s*\n\s*/, $stanza); foreach $line (@lines) { if ($line =~ /([^:]*):/) { $hdr = $1; $bdy = $'; $hdr = &strip ($hdr); $bdy = &strip ($bdy); if ($exi_stzd) { next unless defined $newhdr{$hdr}; $hdr = $newhdr{$hdr}; } elsif (!$ishdr{$hdr}) { $ishdr{$hdr} = 1; push (@hdrs, $hdr); }; if ($field{$id.$hdr}) { $field{$id.$hdr} .= '
' . $bdy; } else { $field{$id.$hdr} = $bdy; }; } elsif (! /^\s*(\#.*)?$/) { die "illegal line: $line"; }; }; }; undef @text; # determine types and alignments foreach $hdr (keys %ishdr) { $type = $type{$hdr}; if (! $type || $type eq 'num' || $type eq 'int') { $suffix = $field{"1~$hdr"}; for ($i=2; $i<=$idn; $i++) { $suffix = &common_suffix ($suffix, $field{$i.'~'.$hdr}); last unless $suffix; }; if ($suffix =~ /[!#-&(-.:-@[-`{-~]/) { $suffix = ''; } elsif ($suffix =~ /^\d+/) { $suffix = $'; }; }; if ($type eq 'num' || $type eq 'int') { if ($suffix && $suffix ne 'B') { $type = "$type/$suffix"; }; $type{$hdr} = $type; } elsif (! $type) { $type = 7; # 1=num, 2=int, 4=name for ($i=1; $i<=$idn; $i++) { next unless $field{$i.'~'.$hdr}; if ($type & 3) { $field{$i.'~'.$hdr} =~ /$suffix$/; $x = $`; if (&numeric ($x, 'num') eq 'undef') { $type &= 6; }; if ($x !~ /^-?[0-9.,]+$/) { $type &= 5; }; }; if (($type & 4) && ($field{$i.'~'.$hdr} =~ /\s/)) { $type &= 3; }; last unless $type; }; if ($type & 1) { if ($suffix && $suffix ne 'B') { $type = "num/$suffix"; } else { $type = "num"; }; } elsif ($type & 2) { if ($suffix) { $type = "int/$suffix"; } else { $type = "int"; }; } elsif ($type & 4) { $type = "name"; } else { $type = "text"; }; $type{$hdr} = $type; }; if (! $align{$hdr}) { if ($type eq 'name') { $align{$hdr} = 'center'; } elsif ($type eq 'text') { $align{$hdr} = 'left'; } else { $align{$hdr} = 'right'; }; }; }; # sort if ($exi_sort) { $j = 0; foreach $i (sort sortalg (1..$idn)) { $j++; foreach $hdr (@hdrs) { $nfield{$j.'~'.$hdr} = $field{$i.'~'.$hdr}; delete $field{$i.'~'.$hdr}; }; }; undef %field; %field = %nfield; undef %nfield; }; # coalesce if ($exi_coal) { $i = 0; while ($i < $idn) { $i++; EQUTEST: foreach $j ($i+1..$idn) { foreach $hdr (@coal) { last EQUTEST if $field{$j.'~'.$hdr} ne $field{$i.'~'.$hdr}; }; $k = $j; }; if ($k > $i) { $kk = $k - $i + 1; foreach $hdr (@coal) { $field{$i.'~'.$hdr} = "$kk\013$field{$i.'~'.$hdr}"; foreach $j ($i+1..$k) { $field{$j.'~'.$hdr} = "\013"; }; }; $i = $k; }; }; }; # print HTML headers print (HTML "$prelude\n"); # print column headers foreach $hdr (@hdrs) { @hhdr = split (/>>/, $hdr); if ($#hhdr > $dpth) { $dpth = $#hhdr; }; }; foreach $d (0..$dpth) { foreach $hdr (@hdrs) { if (!$hdr) { print (HTML "\n"); next; }; @hhdr = split (/>>/, $hdr); if ($d <= $#hhdr) { if ($d == $#hhdr) { &flushth; printf (HTML "%s\n", $dpth-$d+1, $hhdr[$d]); } else { &printth ($hhdr[$d]); }; }; }; &flushth; print (HTML "\n"); }; # print table foreach $i (1..$idn) { print (HTML "\n"); foreach $hdr (@hdrs) { if ($tag{hdr}) { $tag = $tag{hdr}; } elsif ($hdr) { $tag = 'td'; } else { $tag = 'th'; }; $bdy = $field{$i.'~'.$hdr}; undef $number; if ($bdy =~ /\013/) { $number = $`; $bdy = $'; next unless $number; }; if ($bdy =~ /^\s*$/) { $bdy = ' '; }; $align = $align{$hdr}; if ($number) { $number = " rowspan=\"$number\""; }; print (HTML "<$tag align=\"$align$options{$hdr}$number\">$bdy\n"); }; print (HTML "\n"); }; print (HTML $postlude); # subroutines sub close_prelude { if (! $prelude) { $prelude = "\n\n"; }; if (! $postlude || $postlude == 1) { $postlude = "
\n"; if ($prelude =~ /$th\n"); undef $th; } sub common_suffix { local ($l); return $_[0] unless $_[1] && ($_[0] ne $_[1]); return $_[1] unless $_[0]; while (substr ($_[0], --$l) eq substr ($_[1], $l)) {}; return substr ($_[0], $l) if ++$l; return ''; } sub numeric { local ($x, $type) = @_; local ($y); if ($type =~ /\//) { $type = $`; $y = $'; if ($x =~ /$y$/) { $x = $`; } else { return 'undef'; }; }; if ($type eq 'int') { $x =~ tr/-0-9//cd; if ($x =~ /^-?\d+$/) { return $x; } else { return 'undef'; }; }; if ($x =~ /[EPTGMKkhdcmµnpfa]B?$/) { $x = $`; $y = $&; if ($y =~ /^[hdcmµnpfa]B$/) { $y = $scale{substr ($y, 0, 1)}; } else { $y = $scale{$y}; }; } else { $y = 1; }; if ($x =~ /^\s*[+-]?(\d+(\.\d*)?|\.\d+)(E[+-]?\d+)?\s*$/) { return $x*$y; } else { return 'undef'; }; } sub collat0 { # replace all characters by lower case characters, hyphens, and digits local ($x) = $_[0]; $x =~ tr/\200-\253\256\260-\377\254\255\257/\000-\037 !c??Y|?"ca????AAAAAAACEEEEIIIIDNOOOOOxOUUUUYTsaaaaaaaceeeeiiiidnooooo:ouuuuyty-/; $x =~ tr/0-9A-Za-z/-/cs; $x =~ tr/A-Z/a-z/; if ($x =~ /^-/) { return $'; } else { return $x; }; } sub collat1 { # like collat0 but care about groups of digits local ($x) = &collat0 ($_[0]); $x =~ s/(\d+)/'.'.chr(length($1)).$1/ge; return $x; } sub collat { &collat1 ($a) cmp &collat1 ($b) } sub sortstep { local ($a, $b, $hdr) = @_; local ($type) = $type{$hdr}; if ($type eq 'text') { return $field{$a.'~'.$hdr} cmp $field{$b.'~'.$hdr}; } elsif ($type eq 'name') { return &collat1 ($field{$a.'~'.$hdr}) cmp &collat1 ($field{$b.'~'.$hdr}); } else { $a = &numeric ($field{$a.'~'.$hdr}); $b = &numeric ($field{$b.'~'.$hdr}); return -1 if ($a eq 'undef' && $b ne 'undef'); return 1 if ($a ne 'undef' && $b eq 'undef'); return $a <=> $b; }; } sub sortalg { local ($i, $r); foreach $i (0..$#sort) { if ($r = &sortstep ($a, $b, substr ($sort[$i], 1))) { if (substr ($sort[$i], 0, 1) =~ /[aA]/) { return $r; } else { return -$r; }; }; }; return 0; }; sub PA { local($n) = $_[0]; local($i); eval "\%T=\%$n;"; print (STDERR "\n\narray $n\n\n"); foreach $i (sort keys %T) { printf (STDERR "%16s %s\n", $i, $T{$i}); }; } sub PD { local($n) = $_[0]; local($i); eval "\@T=\@$n;"; print (STDERR "\n\narray $n\n\n"); foreach $i (0 .. $#T) { printf (STDERR "%10d %s\n", $i, $T[$i]); }; } sub PB { local($n) = $_[0]; local($i); eval "\%T=\%$n;"; print (STDERR "\n\narray $n\n\n"); foreach $i (sort keys %T) { print (STDERR " $i"); }; print (STDERR "\n"); } sub TEST { &PA ('newhdr'); &PA ('align'); &PA ('options'); &PA ('type'); &PA ('tag'); &PB ('ishdr'); &PD ('hdrs'); &PD ('coal'); &PD ('sort'); }