######################################################################## # sgmlspl script for a *supplemented* version of SGMLS.pm that, when # reading an nsgmls ESIS with empty tag labels, provides detection # of defined-empty elements with the method $element->defempty . # # Document Type: article (GELLMU) # Output: text/xml # Edited by: William F. Hammond # Begun: 21 November 1998 # # The design has become overloaded. This script is serving two # functions that should be performed by a pipeline of length 2. # 1. converting SGML to fleshed out XML # 2. central styling (such as section id management, label and # reference handling, . . .) ######################################################################## $utf8On = 0; if($ENV{"GELLMU_UTF8"} == 1){ use utf8; $utf8On = 1; print STDERR " *** xmlgart.pl: UTF-8 Library\n"; } else{ print STDERR $0, " *** ", $WhoAmI, "\n"; }; use SGMLS; # Use the SGMLS package. use SGMLS::Output; # Use stack-based output. $WhoAmI = "xmlgart.pl"; # Global variables $allsecs = "section|subsection|subsubsection|paragraph|subparagraph"; $AllSecs = "Section|Subsection|Subsubsection|Paragraph|Subparagraph"; @Alphabet = qw(0 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z); @alphabet = qw(0 a b c d e f g h i j k l m n o p q r s t u v w x y z); @Roman = qw (0 I II III IV V VI VII VIII IX X XI XII XIII XIV XV XVI XVII XVIII XIX XX); @roman = qw (0 i ii iii iv v vi vii viii ix x xi xii xiii xiv xv xvi xvii xviii xix xx); $asstkey = ""; $asstname = ""; $asstid = ""; $asstlabel=""; # Computed label; formed at but written # just before rather than in output. # A label in could become a font-entangled anchor # in an HTML formatting. $asstser = ""; $asstseq=0; $asstlabelprefix="AssertLabel-"; $asststyle = ""; @attsidflag=(); # Logicals: current sectional unit has sid attribute set $autoname = 0; # Suppress writing name of anonymous element $autokey = 0; # counter for automatic key generation $autoprefix = "KEY-"; $biblabelprefix="BibLabel-"; $bibprefix="BibRef-"; $bibliseq=0; $bibkey=""; $biblabel=""; $blabel = 0; $citekey = ""; $citenote = ""; $citetext = ""; $cssurl = "/~hammond/gellmu/gellmuart.css"; $discardflag = 0; $dtdfpi = "-//GNU GPL: William F. Hammond//DTD GELLMU XML 0.7.0//EN"; $dtdurl = "http://www.albany.edu/~hammond/gellmu/xml/xgellmu.dtd"; $eltser=0; $eltdepth=0; $emath = 0; # Signal for eqnline or eqnrow labeling $encoding="ISO-8859-1"; # text encoding of XML output @eqkey = (); # $eqkey[$thiseq] = key for last explicit label in equation $eqnautoprefix = "EqnKey-"; $eqnautoseries = "EqnAuto"; $eqnacellseq=0; # eqnarray cell sequence $eqnintautoprefix = "EqnIntKey-"; $eqnintautoseries = "EqnAuto"; $eqnkey = ""; # label key, if any, for overall equation or eqnarray $eqnlabel = ""; # stored label (for equation and eqnarray/eqnrow) $eqnline= ""; # contents inside last eqnline $eqnamode = 0; # eqnarray mode, 0 <= value <= 7 # Value dictionary for $eqnamode: # Odd means the eqnarray attribute nonum is true # 0 or 1 means no eqnkey nor eqnser # 2 or 3 means eqnkey only # 4 or 5 means eqnser only # 6 or 7 means eqnkey and eqnser $eqnintseries = ""; # label series, if any, for last label in eqnrow $eqnser = ""; # label series, if any, for overall equation or eqnarray $eqntag = ""; # author supplied visible tag for overall equation or eqnarray $eqseq=0; # sequence index for equation and eqnarray elements $fgcnt=""; $fglbl=""; $gmath = 0; # Inside math or not? $internalopen = 0; $intsubsetopen = 0; $lgcnt=0; # logical for existence of aux file for contents $lgents = 0; # logical for existence of aux file for entities $lglbl=0; # logical for existence of aux file for references $labelhold = ""; # store all stuff for current label pending disposition $labelinshead = 0; $lastuserlabel = ""; # key of last label -- used for "popkey" and "sunit" %labelkeys = (); # num seq valued hash on label keys (for key uniqueness) %labelloc = (); # sectional unit location of label hash by key ?? %labelser=(); # series name hash from label series attribute, if any ?? $labelrun=0; # num seq for last label including auto-label for sections %labelseq=(); # num seq hash on label key %labelsqr=(); # num seq runner hash on label series name $llocprefix = "LabelSecRef-"; # ent. prefix to $key for $labelloc{$key} $lserprefix = "LabelSer-"; # ent. prefix to $key for $labelser{$key} $lseqprefix = "LabelSeq-"; # Was $sertagprefix $makecontents=0; $maxeltdepth=0; $maxsecdepth=5; # for article $minsecdepth=0; # 1 for doctype "article" -- set at 'start' $popkey = 0; # to detect presence of in sectional unit head # in case there is a timing problem finding the key $sepchar = '\\'; $sepcharre = '\\\\'; # regexp for $sepchar $sqrtdepth=0; @radicflag=(); # radicand is not the total content of sqrt $tabdepth=0; @tabarg=(); @tabalen=(); @tabaseq=(); @tabasym=(); @taburowflag=(); @tabuhline=(); @tdname=(); # name for output table cell at current tabdepth @currserial=(); $currserial[0] = 0; @agpar=(); @agseq=(); @oppar=(); @opseq=(); $secdepth=-1; $secnumdepth=3; # By default, parts, sections, subsections @secname=(); # Name of current sectional unit at $secdepth @secrefs=(); # Output names of sectional units $secref[0] = "part"; # not yet wired $secref[1] = "Section"; $secref[2] = "Subsection"; $secref[3] = "Subsubsection"; $secref[4] = "Paragraph"; $secref[5] = "Subparagraph"; @secser=(); # Sequence number of current sectional unit by $secdepth $secser[0] = 0; @secsid=(); # sid value for current Sectional unit, indexed on $secdepth $secsid[0] = ""; @secunit=(); $secunit[0] = ""; $shead = ""; $srefprefix = "SecRef-"; $srefseq = 0; $srefname = $srefprefix . $srefseq; @sheadlabel=(); # seq. of queued labels in shead indexed by $labelinshead $seckey = ""; # key for last-opened Section or section family element $slabelprefix = "SU-"; # prefix for auto-generated section keys $spassatt = ""; $sopt = ""; $sprefix = ""; $sunit = ""; $sunitflag = 0; $sipdepth=0; @sipsubseq=(); @sipsupseq=(); $stemname=""; $havecitekey=0; $thislabel = ""; # string for last user label; used for
/shead $thissecunit = 0; $tocdepth=3; $xmlinput = 0; # Is input an xml document? # Argument processing $pname = $0; $pname =~ s/^.*\///; $usage = "Usage: " . $pname . " < {nsgmls-ESIS} " . "[ -e={text-encoding} ]\n"; $arglen = scalar(@ARGV); print STDERR "No. of args: " . $arglen . "\n"; if ($arglen >= 2){ print STDERR $usage; exit(1); } elsif(($arglen == 1) && ($ARGV[0] =~ /^-e=/ )){ my $tenc = $ARGV[0]; $tenc =~ s/^-e=//; $encoding = $tenc; print STDERR "Encoding argument: ", $encoding, "\n"; } elsif($arglen == 1){ print STDERR "Unrecognized first argument: ", $ARGV[0], "\n"; exit(1); } else{ if($ENV{"GELLMU_Encoding"} ne ""){ $encoding = $ENV{"GELLMU_Encoding"}; print STDERR "Using ENV-GELLMU_Encoding: ", $encoding, "\n"; } elsif($utf8On == 1){ print STDERR "UTF-8 Coding implied by use of UTF-8 library\n"; $encoding = "UTF-8"; }; }; print STDERR "Final encoding value: ", $encoding, "\n"; if($encoding eq "ISO-8859-1"){ $dtdfpi = "-//GNU GPL: William F. Hammond//DTD GELLMU XML 0.7.5//EN"; $dtdurl = "http://www.albany.edu/~hammond/gellmu/xml/xgellmu.dtd"; print STDERR "Encoding switch for ISO-8859-1\n"; } elsif($encoding eq "UTF-8"){ $dtdfpi = "-//GNU GPL: William F. Hammond//DTD GELLMU XML 0.7.5U//EN"; $dtdurl = "http://www.albany.edu/~hammond/gellmu/xml/uxgellmu.dtd"; print STDERR "Encoding switch for UTF-8\n"; } else{ print STDERR '*** Encoding spec "' . $encoding . "\" not recognized\n"; exit(2); }; # # Specific Element Handlers. # # The Empty String sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $eltdepth--; my $os = ""; # Pass it on so that the XML version is a usable source # But: my $par = $_[0]->parent; my $pan = $par->name; if($pan eq "sunit"){ $os = " "; } elsif($pan eq "op0"){ my $gpan = $par->parent->name; if($gpan =~ /^($AllSecs|$allsecs)$/){ $os = " "; }; }; output($os); }); sgml('', ""); sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $currserial[$eltdepth] = $eltser; output(''); $discardflag = 1; # eat a subsequent newline }); sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; if($lgcnt != 0){ my $os = "&TableOfContentsFile;\n"; output($os); } else{ print STDERR "xmlgart.pl WARNING: No file $stemname.xcn\n", "cannot do \n"; }; }); sgml('', sub{ $eltdepth--; }); sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $currserial[$eltdepth] = $eltser; $labelrun++; push_output('string'); }); sgml('', sub{ $eltdepth--; my $pos = pop_output; my $os=""; $blabel++; # for use as default value if($pos ne ""){ $os = $pos; } else{ $os = "BL\%" . $blabel; }; $lastuserlabel = $os; $os = ""; output($os); }); sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $currserial[$eltdepth] = $eltser; $agseq[$eltser] = 0; $opseq[$eltser] = 0; push_output('string'); }); sgml('', sub{ $eltdepth--; my $pos = pop_output; my $par = $_[0]->parent; my $pan = $par->name; my $gpan = $par->parent->name; if(($pan eq "tabarg") || (($pan eq "ag0") && ($gpan =~ /^(table|tabular|tabuhead|array|arrhead)/))){ # return; # presumably a p-cell -- ignore a width spec for now my $os = "{" . $pos . "}"; output($os); } else{ my $os = ""; if($gmath == 1){ $os = "" . $pos . ""; } else{ $os = "" . $pos . ""; }; output($os); }; }); # This code for secnumdepth is here solely for the purpose of # saving the value of secnumdepth internally; it is passed on. sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $currserial[$eltdepth] = $eltser; $agseq[$eltser] = 0; $opseq[$eltser] = 0; push_output('string'); }); sgml('', sub{ $eltdepth--; my $pos = pop_output; my $os = "" . $pos . ""; output($os); $secnumdepth = int($pos); }); sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $currserial[$eltdepth] = $eltser; $agseq[$eltser] = 0; $opseq[$eltser] = 0; push_output('string'); }); sgml('', sub{ $eltdepth--; my $pos = pop_output; my $key = $pos; my $os = "\[$lseqprefix$key\]"; if($labelser{$key} ne ""){ $os = '&' . $lseqprefix . $key . ';'; }; output($os); }); sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $currserial[$eltdepth] = $eltser; $agseq[$eltser] = 0; $opseq[$eltser] = 0; push_output('string'); }); sgml('', sub{ $eltdepth--; my $pos = pop_output; my $key = $pos; my $os = "\[labelseq($key)\]"; if($labelseq{$key} > 0){ $os = $labelseq{$key}; } elsif($labelloc{$key} > 0){ $os = $labelloc{$key}; }; output($os); }); # sectional unit of parent where the label with given key occurs # empty key: the sectional unit where the sref occurs sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; push_output('string'); }); sgml('', sub{ my $pos = pop_output; my $elt = $_[0]; my $name = $elt->name; my $par = $elt->parent; my $pan = $par->name; my $os; if($pos =~ /^\s*$/){ $os = '&' . $srefname . ';'; } else{ $os = '&' . $llocprefix . $pos . ';'; }; output($os); $eltdepth--; }); sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $currserial[$eltdepth] = $eltser; $agseq[$eltser] = 0; $opseq[$eltser] = 0; push_output('string'); }); sgml('', sub{ $eltdepth--; my $pos = pop_output; my $os = ""; my $npos = $pos; my $type = $_[0]->attribute("type")->value; if($npos > 0){ if($type eq "A"){ $os = $Alphabet[$npos]; } elsif($type eq "a"){ $os = $alphabet[$npos]; } elsif($type eq "I"){ $os = $Roman[$npos]; } elsif($type eq "i"){ $os = $roman[$npos]; } else{ # type unsupported or unspecified $os = $npos; }; } else{ # Hope that the popped output is an appropriate cdata entity $os = '' . $pos . ""; }; output($os); }); sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $currserial[$eltdepth] = $eltser; $agseq[$eltser] = 0; $opseq[$eltser] = 0; push_output('string'); }); sgml('', sub{ $eltdepth--; my $pos = pop_output; my $os = "" . $pos . ""; output($os); $tocdepth = int($pos); }); sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $currserial[$eltdepth] = $eltser; $agseq[$eltser] = 0; $opseq[$eltser] = 0; $gmath = 1; push_output('string'); }); sgml('', sub{ $eltdepth--; my $pos = pop_output; my $elt = $_[0]; my $name = $elt->name; my $os = "<" . $name . "\n>" . $pos . ""; output($os); $gmath = 0; }); sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $currserial[$eltdepth] = $eltser; $agseq[$eltser] = 0; $opseq[$eltser] = 0; $gmath = 1; push_output('string'); }); sgml('', sub{ $eltdepth--; my $pos = pop_output; my $elt = $_[0]; my $name = $elt->name; my $os = "<" . $name . "\n>" . $pos . ""; output($os); $gmath = 0; }); sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $currserial[$eltdepth] = $eltser; $agseq[$eltser] = 0; $opseq[$eltser] = 0; $gmath = 1; push_output('string'); }); sgml('', sub{ $eltdepth--; my $pos = pop_output; my $elt = $_[0]; my $name = $elt->name; my $os = "<" . $name . "\n>" . $pos . ""; output($os); $gmath = 0; }); sgml('', sub{ $eltser++; $eltdepth++; if($maxeltdepth < $eltdepth){$maxeltdepth = $eltdepth;}; $currserial[$eltdepth] = $eltser; $agseq[$eltser] = 0; $opseq[$eltser] = 0; $gmath = 1; $emath = 1; $eqseq++; $eqnkey = ""; $eqnser = ""; $eqntag = ""; $eqnintseries = ""; $eqnlabel = ""; # for passed inside labels push_output('string'); }); sgml('', sub{ my ($elt, $ev) = @_; my $pos = pop_output; # Inside output apart from eqnline contents if($pos ne ""){ print STDERR "xmlgart.pl WARNING: Loose content in eqn id $eqseq\n", $pos, " for equation at source line ", $ev->line, "\n"; }; my $os = ""; my $nonum = $elt->attribute("nonum")->value; my $atteqseq = $elt->attribute("eqseq")->value; my $thiseqkey = ""; my $thiseqser = ""; # With nonum unset a label in eqnline can supersede the values, # if any, of $eqnkey and $eqnser from , . # Handling for inside labels and nonum is in code for