;; This file: gellmu.el
;;
;; The GELLMU Syntactic Translator
;;
;; By William F. Hammond
;;
;; GELLMU stands for ``Generalized Extensible LaTeX-Like MarkUp''.
;; This Elisp program, for use with Gnu-Emacs (version 19 or higher),
;; may be used to convert a document with GELLMU's LaTeX-Like markup
;; to a document in an authoring language under Standard Generalized
;; Markup Language (SGML) or, if desired, to a document under
;; eXtensible Markup Language (XML).
;;
;; Copyright (C) 1999-2007 by William F. Hammond
;;
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License as
;; published by the Free Software Foundation; either version 2 of the
;; License, or (at your option) any later version.
;;
;; A copy of this license is available at the same location as this
;; program; simply substitute "LICENSE" for "gellmu.el" in the
;; full resource name for this file.
;;
;; This program is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY, without even an implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;; General Public License for more details.
;;
;; You should have received a copy of the GNU General Public License
;; along with this program; if not, write to the Free Software
;; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
;;
;; Author's Address:
;; Department of Mathematics & Statistics
;; The University at Albany
;; 1400 Washington Avenue
;; Albany, New York 12222
;; USA
;;
;; Author's Email: hammond@math.albany.edu
;;
;; The said License does not permit incorporating this program into
;; proprietary programs. There may, however, be permissible ways to use
;; it; see the license.
;; ======================================================================
;; Be aware that:
;;
;; (1) GELLMU is not LaTeX.
;;
;; (2) There is no completely reliable way to convert legacy
;; LaTeX to an SGML or XML language.
;; See the docs.
;; A first level outline of this code is available by searching for lines
;; beginning with the string " ;;" (three spaces, two semi-colons).
;;
;; Elementary Usage:
;;
;; -- Interactive --
;;
;; M-x load-library gellmu
;;
;; Then one of the following:
;;
;; M-x gellmu-trans
;; M-x gellmu-html
;; M-x gellmu-sgml
;; M-x gellmu-xml
;;
;; after saving your GELLMU markup but not closing Emacs. gellmu-trans
;; will place its output in a separate buffer. For a complete list
;;
;; -- Batch Mode --
;;
;; emacs -batch -l ./gellmu.el -f gellmu-trans myfoo.glm
;; OR
;; emacs -batch -l ./gellmu.el -f _function-name_ myfoo.glm
;;
;; The first usage is for "advanced" GELLMU. Roughly, the other
;; function calls are for "basic" GELLMU (for only the simplest
;; layer of LaTeX-like syntax) or for hybrid usage.
;;
;; IMPORTANT: Any form of GELLMU requires knowledge of the
;; SGML or XML document type for which the author is writing.
;;
;; Advanced Gellmu with the didactic "article" document type has many
;; names that match corresponding LaTeX names. Someone with knowledge
;; of LaTeX should be able to prepare a simple "article" by using
;; "\documenttype{article}" instead of LaTeX's "\documentclass{article}".
;; The preamble ends with "\begin{document}", and the document ends
;; with "\end{document}". Blank lines begin new paragraphs.
;;
;; For "article" under the function call gellmu-trans aside from that
;; just mentioned:
;;
;; o There must be a \title{...} in the preamble although it
;; may be empty, i.e., \title{}
;;
;; o Inline markup may not be loose in the document body (between
;; \begin{document} and \end{document}. At the very least it
;; must be in a paragraph, and a paragraph may be begun with
;; a blank line.
;;
;; Document Type Identification
;;
;; If your document is to be LaTeX-like "article", make the first
;; line
;;
;; \documenttype{article} ,
;;
;; which references the didactic document type represented by
;; "gellmu.dtd".
;;
;; For full flexibility there is a key system for attaching a sequence
;; of document type information to a key that is supplied with
;;
;; \documenttype[keystring]{doctype-name}
;;
;; The option is treated as a SYSTEM identifier unless its value appears
;; as a key for the associative list gellmu-doctype-info.
;;
;; Moreover, each doctype-name may appear as a key for the associative
;; list gellmu-doctype-keylist.
;;
;; Thus,
;; \documenttype{article}
;;
;; spawns "gellmu.dtd" because "article" is by default assigned
;; the key "gellmuart" with gellmu-doctype-keylist and "gellmuart"
;; is assigned the sequence ("SYSTEM", "gellmu.dtd") with
;; gellmu-doctype-info.
;;
;; The same effect would be obtained with the usage
;;
;; \documenttype[gellmuart]{article} .
;;
;; Other examples:
;;
;; \documenttype{html}
;;
;; picks up the key "html-4.01", which, in turn, spawns the sequence
;; ("PUBLIC", "-//W3C//DTD HTML 4.01 Transitional//EN"), while
;;
;; \documenttype[xhtml-1.0]{html}
;;
;; spawns the sequence
;; ("xml", "PUBLIC", "-//W3C//DTD XHTML 1.0 Transitional//EN",
;; "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd",
;; "UTF-8", "xmlns=\"http://www.w3.org/1999/xhtml\"") ,
;;
;; \documenttype[xhtml-1.0s]{html}
;;
;; changes "transitional" in the previous example to "strict", and
;;
;; \documenttype[mathml-altheim]{html}
;;
;; references
;; ("xml", "PUBLIC", "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN",
;; "mathml.dtd", "UTF-8", "xmlns=\"http://www.w3.org/1999/xhtml\"") .
;;
;; In the larger GELLMU design support of legacy LaTeX is not the main
;; goal. Keeping the syntactic translator free of more than minimal
;; command-name intelligence is a goal. That makes it possible to use
;; LaTeX-like markup practice as a general markup interface to *any*
;; SGML or XML language
;;
;;
(defvar gellmu-version "0.7.4.8" "Version number of the library \"gellmu\"")
(defvar gellmu-version-date "16-Jan-2014"
"Date of the current source version (\"gellmu.el\") of the library \"gellmu\""
)
;; CHANGES:
;;
;; 0.7.4.7 -> 0.7.4.8 * Added "html-5" to doctype-info and made it the
;; 16-Jan-2014 default for the "html" document-type
;;
;; 0.7.4.6 -> 0.7.4.7 * Changed value of variable gellmu-sgml-default-coding
;; 20-Sep-2010 from "iso-latin-1" to "utf-8"
;;
;; 0.7.4.5 -> 0.7.4.6 * Change: removed tolerance of omitted spec for
;; 25-May-2008 the number of arguments of a newcommand
;;
;; 0.7.4.4 -> 0.7.4.5 * Cleaned handling of (what-line) in messages
;; 29-Jun-2007 and fixed a bug in error message code inside
;; balance-pair-forward
;;
;; 0.7.4.3 -> 0.7.4.4
;; 03-Jun-2007 * Better handling of mismatched newcommand args
;;
;; * In LaTeX emulation run diagnostics for the
;; balancing of '$' characters (toggle math zones)
;;
;; 0.7.4.2 -> 0.7.4.3
;; 01-Feb-2007 * Code the strings "'''" and "''''" as \rtq and \rqq
;; and then the loose "`" and "'" as \lsq and \rsq
;;
;; 0.7.4.1 -> 0.7.4.2
;; 05-Jan-2006 * Corrected failed attempt to generate warnings
;; about loose '#' in source
;;
;; 0.7.4 -> 0.7.4.1
;; 29-Aug-2004 * Corrected definition of variable
;; gellmu-tab-amp-tail
;;
;; 0.7.3 -> 0.7.4
;;
;; 04-Aug-2004 * For "regular" handling added translation of "\@"
;; to and of "\/" to .
;;
;; 02-Aug-2004 * Added vect, mscript, setOf, overset, underset
;; to gellmu-autoclose-list (for "regular" handling)
;;
;; 11-Jul-2004 * Corrected overzealous safety check of
;; 07-Jul-2001. It is intended that the
;; name of a newcommand may be used at the
;; beginning of its value string in order to
;; "front" document type names. The previous
;; code blocked the use of \ab in the definition
;; of \a.
;;
;; * Provide for decimal numeric character entities
;; outside of newcommand arguments. OK in macro.
;; (The char entity form is not ambiguous.)
;;
;; * Provided escape "\#" to make literal '#' safe
;; (when followed by an integer) in newcommand
;; values. (\# had been documented to do this.)
;;
;; * Corrected obsolete call to concat to prepend
;; "#" to an integer in the course of processing
;; invocations of \newcommand with arguments.
;;
;; 0.7.2 -> 0.7.3: * Provided some looping safety for \newcommand:
;; 07-Jul-2001 Exit with error if there is a self reference
;; in (unexpanded) definition beyond its location 1
;;
;; * Small bug in newcommand handling: pick up
;; point for next invocation one char too soon.
;;
;;
;; 0.7.1 -> 0.7.2: * Use first \documenttype option "" to avoid
;; 11-Jun-2001 writing "" and root tags
;; altogether. Use first option "INTERNAL" for
;; "" under SGML rules
;; where the internal declaration subset comes,
;; as always, from the \documenttype option that
;; follows its sole argument. For XML rules
;; instead of SGML rules, use "XINTERNAL".
;;
;; * Code for the internal declaration subset now
;; accepts \attlist{} and \element{} in addition
;; to \entity{} and \notation{}, and all of these
;; names are now configurable (so one can avoid
;; possible name conflicts).
;;
;; * Bug: "\%" in internal declaration subset source
;; is now converted to "%".
;;
;; * Provided variable gellmu-hold-xml-introducer
;; for blocking the writing of an XML declaration.
;;
;; * Provided variable gellmu-procinst-name
;; for a meta-command that may be used consciously
;; to enter a processing instruction.
;;
;; Tried and then backed out change moving pick up
;; point for newcommand substitutions moved from one
;; character beyond the beginning of the
;; substitution point to the end of the last
;; occurrence of the invocation string
;; introducer in the expanded substitution if
;; there is such an occurrence. Perhaps at some
;; point this would be useful for something called
;; "\frontcommand" rather than "\newcommand".
;;
;; 05-Jun-2001 * Provided variable gellmu-parb-everywhere
;; and calling function gellmu-part-advanced for
;; allowing parb handling with fragments of
;; advanced gellmu documents.
;;
;; * Provided variable gellmu-doc-fragment and
;; calling function gellmu-part-basic for
;; facilitating the preparation of document
;; fragments with documenttype handling.
;;
;; * What was called "regular GELLMU" (the full
;; layer of LaTeX syntactic emulation) is now
;; called "advanced GELLMU" (in comments here)
;;
;; 0.7.0 -> 0.7.1: * Bug: blocked writing of "nul" and friends
;; 27-May-2001 in cleanup for "\macro", "\newcommand",
;; and "\Macro" (late macros) except when
;; gellmu-regular-sgml is false (advanced gellmu)
;;
;; * Bug: pick up point in the loop for making
;; newcommand substitutions was at the end of
;; the last completed substitution rather
;; than one character beyond its origin.
;;
;; * Provided item in gellmu-doctype-info for a
;; list of attribute settings for the root tag
;; under basic GELLMU in the XML case
;;
;; * Bug fixes:
;;
;; * Code for encoding item in gellmu-doctype-info
;; supplied
;;
;; * \documenttype with empty option fouled up;
;; should simply suppress DOCTYPE declaration but
;; provide root open and close tags. Useful to
;; override setting of root tag key.
;;
;; * When \documenttype option is not a known key
;; and therefore must be a SYSTEM identifier,
;; URIs are now accepted. (Added the chars
;; `/', `:', `%', and `~' to the internal variable
;; gellmu-doctype-opt-regexp)
;;
;; * In XML mode at point where possible unbalanced
;; open tag warning was issued, no open tag was
;; written.
;;
;;
;; 0.6.9 -> 0.7.0: * New version level. No plans for new features.
;; 26-Mar-2001
;;
;; 0.6.8 -> 0.6.9:
;;
;; * Revised code for quophrase and squophrase to
;; attempt detection of loose apostrophes and
;; (right) single quotes inside quophrase and
;; squophrase zones. Not completely foolproof.
;; Sometimes explicit markup is required.
;;
;; * Fixed error with buffer-substring overreach
;;
;; * Revised tab-amp code to treat as tabcell
;; when followed either by blank or newline
;; (gives sgml entity when followed by other things)
;;
;; * Changed \macro to \Macro and then cloned
;; the code for \Macro to new code for \macro
;; that comes first. So the macro processing
;; sequence is now:
;;
;; 1. macro 2. mathsym 3. newcommand 4. Macro.
;;
;; This reflects the ideas that (a) \macro is safest
;; before other expansions and (b) \macro is most
;; useful for porting regular LaTeX to GELLMU.
;; WARNING: \Macro has had little testing. Use with
;; extreme caution; it's hard to forsee how things
;; will look after the other substitutions.
;;
;; * Added code to control coding-system-for-write
;; without query when noninteractive
;;
;; * Added gellmu-autoclose-list; for these
;; names write a closetag at the end of an aolist
;; unless the author has already done so. Note that
;; an author endtag must follow the aolist without
;; whitespace if the name is on this list.
;;
;; 5-Jan-2001 * Activated public variable gellmu-parb-hold --
;; list of name strings for "parb" blocking under
;; "advanced gellmu" (==> gellmu-regular-sgml false)
;;
;; * Greatly changed the function of \macro.
;; The old functionality is that of \newcommand
;; without arguments. New: whatever is in the
;; name field is the name, with or without a
;; leading backslash. The name can be anything.
;; At an invocation site a `;' immediately following
;; the macro name terminates the invocation; a
;; semi-colon used this way is discarded.
;; Use with extreme caution. If questions arise,
;; use the "expansions" exit.
;;
;; * Changed the order of macro processing to:
;; (1) \mathsym, (2) \newcommand, (3) \macro
;;
;; * Greatly widened gellmu-command-regexp to
;; to accommodate non alphanumeric command
;; names. (Name discipline will be imposed by
;; by your validating parser.)
;;
;; 1-Dec-2000 * Fixed bug in macro expansions: was not using
;; regexp-quote on names when expanding; this was
;; detected when a name contained '+' which is
;; special for Elisp regexps.
;;
;; * Fixed aolist bug: a command with a sole option
;; was getting an automatic endtag.
;;
;; * If gellmu-verbatim-clean is set, then the
;; routine gellmu-clean-verbatim is used to
;; to convert *completely literal* input
;; to a name-protected "verblist" (item = "nln")
;; -- not the past verbatim and not the present
;; default.
;;
;; * Automated manmac-like verbatim handling added.
;; Usage is NOT recommended for new documents but
;; may be useful for porting legacy documents.
;; Depends on the variables
;; gellmu-manmac-bar-name and
;; gellmu-manmac-literal
;; which are "" and nil, respectively, by default.
;;
;; * Provided automated conversion of balanced
;; `` ... '' to \quophrase{ ... } or to
;; the tag represented by the variable
;; gellmu-quophrase-name when not set to "".
;; Likewise for ` ... ' when
;; gellmu-squophrase-name is not "" (the default).
;;
;; * Provided a variable gellmu-expansions-only
;; as a flag to gellmu-trans for exiting after
;; writing a GELLMU source file (with suffix
;; the value of gellmu-expansions-suffix) after
;; washing, removing comments, and expanding all
;; of the macro pseudocommands. gellmu-trans
;; on this source should yield output identical
;; to that obtained from the original source
;; but for the value of the stem attribute.
;; * Divided the "cleanse" section into "wash"
;; and "cleanse" and moved macro expansions
;; in between the two.
;; * Attribute "stem" (value of gellmu-stem-name)
;; automatically written on root element to
;; facilitate naming of aux files by others
;; * "macro" and "newcommand" changed to allow.
;; any char other than "\" in the macro name.
;; * Provision of meta-command \mathsym (value
;; of gellmu-mathsym-name) and variables
;; gellmu-mathsym-wrapper, which surrounds
;; every invocation, and gellmu-mathsym-tag
;; which writes meta-info in the output.
;; 0.6.7 -> 0.6.8:
;; 9-Sep-2000 * Unified escapes for "basic" and "advanced":
;; 1. '\' <--- "\\" (deprecated in "advanced" GELLMU
;; since "\\" gives: "brk0" = "taburow" if in
;; "tabular", "brk" = newline if followed by
;; white space, or '\' otherwise, with all of
;; this usage not recommended in "advanced")
;; 2. '{' <-- "\{"
;; 3. '}' <-- "\}"
;; * Clean up of "basic" GELLMU handling after
;; a shakedown on docs prepared for Sebastian
;; Rahtz's version of the TEI.2 document type
;; * Newcommand with arguments; when used without
;; args, slightly more expensive than simple
;; macros. These can be mixed and matched.
;; See gellmu-newcommand-name. Should
;; gellmu-macro-name be purged from this code?
;; * Simple macros (without arguments); see
;; gellmu-macro-name.
;; * Cleaned up some error messages
;; * Enforcement of gellmu-xml-strict
;;
;; 0.6.6 -> 0.6.7: * Key-based handling of ""
;; 16-Aug-2000 * Handling of internal declaration subset and
;; SGML entity references in the standard SGML
;; notation, i.e., "&foo;"
;; * Plans for delayed internal declaration subset
;; e.g., in XML made from direct SGML output
;; * Basic GELLMU: switches that make it possible to
;; edit directly for any SGML or XML using only
;; basic LaTeX-like syntax (but not the default
;; behavior, which still supports many other LaTeX-
;; like features. With *basic* GELLMU a LaTeX-like
;; command option is used only for attributes, and
;; the leading ':' is optional.
;; * Emulation of LaTeX's tabular (lrc only)
;; Now writing "brk0" instead of "brk" in regular
;; GELLMU for "\\", if followed by white space
;; so that it can be used dually for "brk" and
;; for tabular's "taburow".
;; Now writing "tabucell" for "&" if followed by
;; white space; "&" otherwise has SGML entity
;; meaning.
;; * Many new variables, some not used yet.
;;
;;
;; 0.6.5 -> 0.6.6:
;; 13-Jul-2000
;; The new variables may be set in a batch
;; file that launches gellmu-trans non-interactively
;; There is a good bit of control of document
;; type issues in GELLMU source. The option to
;; \documenttype, if present, now serves as a
;; key to gellmu-doctype-info. If there is no
;; option, the program will seek the key for the
;; document's root element name, i. e., the argument
;; of \documenttype, in the variable
;; gellmu-doctype-keylist. Thus, for each root name
;; there can be a preferred DOCTYPE.
;;
;; Provided variables gellmu-doctype-keylist,
;; gellmu-doctype-info, gellmu-xml-introducer,
;; and changed the former gellmu-doctype to
;; gellmu-doctype-root-element.
;;
;; 0.6.4 -> 0.6.5:
;; 29-Jun-2000 When gellmu-regular-sgml is true and, therefore,
;; the only legitimate command option is the [: ...]
;; for an attribute list, make the ':' redundant.
;;
;; Provided different rules for escaped use of '\',
;; '{', '}', '&', '%', ... in source for regular-sgml.
;;
;; Provided string gellmu-sgml-emptytag-close
;; with default value "/>" which can be changed
;; to ">" for classical SGML.
;;
;; Provided gellmu-regular-sgml to block most
;; LaTeX emulation beyond basic syntax.
;;
;; When first arg/opt is an option inside which
;; the first char is ':', the rest of the option
;; content is inserted inside the command's
;; open tag. For advanced GELLMU one
;; needs to watch what characters other than
;; 0-9A-Za-z are used in attribute values since
;; by default most other characters are made into
;; empty elements, which are not legal in attribute
;; values. Thus, for example, without regular-sgml
;; urls should not be attribute values. For this
;; reason such things that normally would be
;; attributes in the SGML world are provided as
;; command arguments or options for advanced
;; GELLMU.
;;
;; Cleaned up long strings so that this file has
;; width at most 79.
;;
;; 0.6.3 -> 0.6.4:
;; 16-Dec-1999 Changed tagging for "\foo;" from "". This has a small advantage
;; when the tag is not recognized by "nsgmls":
;; it is parsed as empty.
;;
;; Corresponding changes to gellmu-parb-regexp and
;; parb handling.
;;
;; Added code for "\-", "\/", "\,", and "\ ".
;;
;; 0.6.2 -> 0.6.3:
;; 26-Oct-1999 Small changes in gellmu-parb-regexp
;;
;; 0.6.1 -> 0.6.2: Changed incorrect loop re-entrance in code
;; 03-Aug-1999 for \begin ... \end to gellmu-tmp-pos rather
;; than gellmu-loop-pos.
;;
;; Eliminated uncontrolled variable names and
;; replaced some fixed command names with variable
;; ones.
;;
;; Cleaned up batch-mode terminal output.
;;
;; Added about a dozen user variables. See the
;; list gellmu-public-vars and conditionals based
;; on them including:
;; * gellmu-latex-body is true when gellmu-body-name
;; is found and gellmu-no-latex is not set
;; * $-toggle math zones disabled and $ is ordinary
;; if gellmu-no-latex is set
;; * blank lines are not replaced by parb's when
;; gellmu-latex-body is false
;;
;; 0.6.0 -> 0.6.1: Removed code for automatic insertion of
;; 17-Jul-1999 "preamble" tags. (Should not cause any
;; breakage with current dtd.)
;;
;; 0.5.5 -> 0.6.0: Error corrections
;; 02-Jun-1999
;;
;; 0.5.4 -> 0.5.5: Enabled batch use:
;; 11-Oct-1998 emacs -batch -l gellmu -f gellmu-trans myfile.glm
;;
;; 0.5.3 -> 0.5.4: Cleaned up "parb" relative to new comment code
;; 11-Oct-1998
;;
;; 0.5.2 -> 0.5.3: 1) Added 4 names to parb-hold
;; 09-Oct-1998 2) Change handling of comments before "doctype"
;; 3) Don't write "" after "