ocamlweb

1.1emThis is ocamlweb, a literate programming tool for Objective Caml. This document has been automatically produced using ocamlweb itself, applied to his own code, in some kind of ‘bootstrap’.

Module	Function
Output	Low level printing functions, independent of the document type
Cross	Cross references in Caml files
Pretty	Pretty-print of code and documentation
Web	Production of the whole document, including the index
Doclexer	Lexer to separate code and doc parts in Caml files
Main	Main program

Interface for module Output

2.1em In that module, we concentrate all the printing functions. Thus, it will be easy to add another kind of output, HTML for instance, by adapting the code of that module, and nothing else. Default output is to standard output, but it can be redirected to a file with set_output_to_file. close_output closes the output, if it is a file.
val set_output_to_file : string → unit
val close_output : unit → unit
3.1emThese mutable flags controls the output. If quiet, no extra output is done on standard output, default is false. If use_greek_letters is true, greek letters are used to display (some of) the single-letter type variables, default is true.
val quiet : bool ref
val use_greek_letters : bool ref
val short : bool ref
4.1emThen we introduce some low level output functions, for characters and strings. (output_file f) copies the contents of file f on the output. (output_verbatim s) outputs the string s ‘as is’.
val output_char : char → unit
val output_string : string → unit
val output_file : string → unit
val output_verbatim : string → unit
5.1emThe following functions are mainly useful for a L^AT_EX output, but will work correctly in other cases. A call to set_no_preamble suppresses the output of the header and trailer. (end_line ()) ends a line, (indentation n) introduces an indentation of size n at the beggining of a line. latex_header takes as argument the options of the L^AT_EX package ocamlweb.sty.
val class_options : string ref
val set_no_preamble : bool → unit
val push_in_preamble : string → unit
val fullpage_headings : bool ref
val latex_header : string → unit
val latex_trailer : unit → unit

val indentation : int → unit
val end_line : unit → unit
val end_line_string : unit → unit

val enter_math : unit → unit
val leave_math : unit → unit
6.1emThe following functions are used to pretty-print the code. is_keyword identifies the keywords of Objective Caml. output_ident outputs an identifier, in different faces for keywords and other identifiers, escaping the characters that need it, like _ for instance in L^AT_EX. output_escaped_char pretty-prints the reserved char of L^AT_EX, like & or $. output_symbol pretty-prints the Caml symbols, like → for ->. output_type_variable s pretty-prints type variables, in particular one-letter type variables are output as greek letters. output_ascii_char n outputs the character of ASCII code n.

type char_type = Upper ∣ Lower ∣ Symbol
val what_is_first_char : string → char_type

val is_keyword : string → bool
val output_ident : string → unit
val output_escaped_char : char → unit
val output_symbol : string → unit
val output_type_variable : string → unit
val output_ascii_char : int → unit

output_lex_ident (resp. output_yacc_ident) outputs an identifier as above but taking into account CAMLLEX keywords (resp. CAMLYACC keywords)
val output_lex_ident : string → unit
val output_yacc_ident : string → unit
7.1emConstants are typeset by the following functions.
val output_integer : string → unit
val output_float : string → unit
8.1emComments inside code are opened and closed respectively by output_bc and output_ec. The function output_hfill is called before output_bc to justify a comment. output_byc and output_eyc are the same for CAMLYACC comments, that is /*…*/.
val output_bc : unit → unit
val output_ec : unit → unit
val output_byc : unit → unit
val output_eyc : unit → unit
val output_hfill : unit → unit
9.1emStrings inside code are opened and close respectively by output_bs and output_es. A space character in a string is output as a visible space, with output_vspace.
val output_bs : unit → unit
val output_es : unit → unit
val output_vspace : unit → unit
10.1emThe following functions deal with sectioning. The highest level is the one of modules and interfaces. The next level is the one of section. The last level is the one of paragraphs, which are atomic pieces of documentation or code.
val output_module : string → unit
val output_interface : string → unit
val output_lexmodule : string → unit
val output_yaccmodule : string → unit

val begin_code : unit → unit
val end_code : unit → unit
val begin_dcode : unit → unit
val end_dcode : unit → unit

val begin_code_paragraph : unit → unit
val end_code_paragraph : bool → unit
val begin_doc_paragraph : bool → int → unit
val end_doc_paragraph : unit → unit
11.1emIndex functions. (output_index_entry id t def use) outputs an entry line for identifier id, with type t, where def is the list of sections where f is introduced and use the list of sections where f is used. If the type of the entry is "", then it is omitted.
type α elem = Single of α ∣ Interval of α × α

val begin_index : unit → unit
val output_index_entry :
string → string → string elem list → string elem list → unit
val output_raw_index_entry :
string → string → string list → string list → unit
val end_index : unit → unit

val output_label : string → unit
12.1emThe parameters of the output engine are reset to their initial values with reset_output.
val reset_output : unit → unit

Module Output

13.1em Low level output. out_channel is a reference on the current output channel. It is initialized to the standard output and can be redirect to a file by the function set_output_to_file. The function close_output closes the output channel if it is a file. output_char, output_string and output_file are self-explainable.

let output_file f =
   let ch = open_in f in
   try
     while true do
       Pervasives.output_char !out_channel (input_char ch)
     done
   with End_of_file → close_in ch
14.1emHigh level output. In this section and the following, we introduce functions which are L^AT_EX dependent.
15.1emoutput_verbatim outputs a string in verbatim mode. A valid delimiter is given by the function char_out_of_string. It assumes that one of the four characters of fresh_chars is not used (which is the case in practice, since output_verbatim is only used to print quote-delimited characters).
let fresh_chars = [ '!'; '|'; '"'; '+' ]

let char_out_of_string s =
   let rec search = function
     ∣ [ ] → assert false
     ∣ c :: r → if String.contains s c then search r else c
   in
   search fresh_chars

let output_verbatim s =
let c = char_out_of_string s in
output_string (sprintf "\\verb%c%s%c" c s c)

let latex_header opt =
   if ¬ !no_preamble then begin
     output_string (sprintf "\\documentclass[%s]{article}\n" !class_options);
     output_string "\\usepackage[latin1]{inputenc}\n";
         if !fullpage_headings then
       output_string "\\usepackage[headings]{fullpage}\n"
     else
       output_string "\\usepackage{fullpage}\n";
     output_string "\\usepackage";
     if opt ≠ "" then output_string (sprintf "[%s]" opt);
     output_string "{ocamlweb}\n";
     output_string "\\pagestyle{headings}\n";
     Queue.iter (fun s → output_string s; output_string "\n") preamble;
     output_string "\\begin{document}\n"
   end;
   output_string
     "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
   output_string
     "%% This file has been automatically generated with the command\n";
   output_string "%% ";
   Array.iter (fun s → output_string s; output_string " ") Sys.argv;
   output_string "\n";
   output_string
     "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"

let latex_trailer () =
   if ¬ !no_preamble then begin
     output_string "\\end{document}\n"
   end
16.1emMath mode. We keep a boolean, math_mode, to know if we are currently already in T_EX math mode. The functions enter_math and leave_math inserts $ if necessary, and switch that boolean.

let enter_math () =
   if ¬ !math_mode then begin
     output_string "$";
     math_mode := true
   end

let leave_math () =
   if !math_mode then begin
     output_string "$";
     math_mode := false
   end
17.1emIndentation. An indentation at the beginning of a line of n spaces is produced by (indentation n) (used for code only).
let indentation n =
   let space = 0.5 *. (float n) in
   output_string (sprintf "\\ocwindent{%2.2fem}\n" space)
18.1emEnd of lines. (end_line ()) ends a line. (used for code only).
let end_line () =
   leave_math ();
   output_string "\\ocweol\n"

let end_line_string () =
output_string "\\endgraf\n"
19.1emKeywords. Caml keywords and base type are stored in two hash tables, and the two functions is_caml_keyword and is_base_type make the corresponding tests. The function output_keyword prints a keyword, with different macros for base types and keywords.

let build_table l =
   let h = Hashtbl.create 101 in
   List.iter (fun key → Hashtbl.add h key ()) l;
   Hashtbl.mem h

let is_caml_keyword =
   build_table
     [ "and"; "as"; "assert"; "begin"; "class";
       "constraint"; "do"; "done"; "downto"; "else"; "end"; "exception";
       "external"; "false"; "for"; "fun"; "function"; "functor"; "if";
       "in"; "include"; "inherit"; "initializer"; "lazy"; "let"; "match";
       "method"; "module"; "mutable"; "new"; "object"; "of"; "open";
       "or"; "parser"; "private"; "rec"; "sig"; "struct"; "then"; "to";
       "true"; "try"; "type"; "val"; "virtual"; "when"; "while"; "with";
       "mod"; "land"; "lor"; "lxor"; "lsl"; "lsr"; "asr"
     ]

let is_base_type =
   build_table
     [ "string"; "int"; "array"; "unit"; "bool"; "char"; "list"; "option";
       "float"; "ref" ]

let is_yacc_keyword =
   build_table
     [ "%token"; "%left"; "%right"; "%type"; "%start"; "%nonassoc"; "%prec";
       "error" ]

let output_keyword s =
   if is_base_type s then
     output_string "\\ocwbt{"
   else
     output_string "\\ocwkw{";
   output_string s;
   output_string "}"

let output_lex_keyword s =
   output_string "\\ocwlexkw{";
   output_string s;
   output_string "}"

let output_yacc_keyword s =
   output_string "\\ocwyacckw{";
   if String.get s 0 = '%' then output_string "\\";
   output_string s;
   output_string "}"
20.1emIdentifiers. The function output_raw_ident prints an identifier, escaping the T_EX reserved characters with output_escaped_char. The function output_ident prints an identifier, calling output_keyword if necessary.

let output_escaped_char c =
   if c = '^' ∨ c = '~' then leave_math();
   match c with
     ∣ '\\' →
         output_string "\\symbol{92}"
     ∣ '$' ∣ '#' ∣ '%' ∣ '&' ∣ '{' ∣ '}' ∣ '_' →
         output_char '\\'; output_char c
     ∣ '^' ∣ '~' →
         output_char '\\'; output_char c; output_string "{}"
     ∣ '<' ∣ '>' →
         output_string "\\ensuremath{"; output_char c; output_string "}"
     ∣ _ →
         output_char c

let output_latex_id s =
   for i = 0 to String.length s − 1 do
     output_escaped_char s.[i]
   done

let what_char = function
   ∣ 'A'..'Z' ∣ '\192'..'\214' ∣ '\216'..'\222' → Upper
   ∣ 'a'..'z' ∣'\223'..'\246' ∣ '\248'..'\255' ∣ '_' → Lower
   ∣ _ → Symbol

let what_is_first_char s =
if String.length s > 0 then what_char s.[0] else Lower

let output_raw_ident_in_index s =
   begin match what_is_first_char s with
     ∣ Upper → output_string "\\ocwupperid{"
     ∣ Lower → output_string "\\ocwlowerid{"
     ∣ Symbol → output_string "\\ocwsymbolid{"
   end;
   output_latex_id s;
   output_string "}"

let output_raw_ident s =
   begin match what_is_first_char s with
     ∣ Upper → output_string "\\ocwupperid{"
     ∣ Lower → output_string "\\ocwlowerid{"
     ∣ Symbol → output_string "\\ocwsymbolid{"
   end;
   try
     let qualification = Filename.chop_extension s in
     (∗ We extract the qualified name. ∗)
     let qualified_name =
       String.sub s (String.length qualification + 1)
         (String.length s − String.length qualification − 1)
     in
     (∗ We check now whether the qualified term is a lower id or not. ∗)
     match qualified_name.[0] with
       ∣ 'A'..'Z' →
       (∗ The qualified term is a module or a constructor: nothing to change. ∗)
           output_latex_id (s);
           output_string "}"
       ∣ _ →
       (∗ The qualified term is a value or a type: \\ocwlowerid used instead. ∗)
           output_latex_id (qualification ^ ".");
           output_string "}";
           output_string "\\ocwlowerid{";
           output_latex_id qualified_name;
           output_string "}"
   with Invalid_argument _ →
     (∗ The string s is a module name or a constructor: nothing to do. ∗)
     output_latex_id s;
     output_string "}"

let output_ident s =
   if is_keyword s then begin
     leave_math (); output_keyword s
   end else begin
     enter_math (); output_raw_ident s
   end

let output_lex_ident s =
   if is_lex_keyword s then begin
     leave_math (); output_lex_keyword s
   end else begin
     enter_math ();
     output_string "\\ocwlexident{";
     output_latex_id s;
     output_string "}";
   end

let output_yacc_ident s =
   if is_yacc_keyword s then begin
     leave_math (); output_yacc_keyword s
   end else begin
     enter_math ();
     output_string "\\ocwyaccident{";
     output_latex_id s;
     output_string "}";
   end
21.1emSymbols. Some mathematical symbols are printed in a nice way, in order to get a more readable code. The type variables from 'a to 'd are printed as Greek letters for the same reason.

let output_tv id =
output_string "\\ocwtv{"; output_latex_id id; output_char '}'

let output_greek l =
enter_math (); output_char '\\'; output_string l; output_string "{}"

let output_type_variable id =
   if ¬ !use_greek_letters then
     output_tv id
   else
     match id with
       ∣ "a" → output_greek "alpha"
       ∣ "b" → output_greek "beta"
       ∣ "c" → output_greek "gamma"
       ∣ "d" → output_greek "delta"
       ∣ "e" → output_greek "varepsilon"
       ∣ "i" → output_greek "iota"
       ∣ "k" → output_greek "kappa"
       ∣ "l" → output_greek "lambda"
       ∣ "m" → output_greek "mu"
       ∣ "n" → output_greek "nu"
       ∣ "r" → output_greek "rho"
       ∣ "s" → output_greek "sigma"
       ∣ "t" → output_greek "tau"
       ∣ _ → output_tv id

let output_ascii_char n =
   output_string (sprintf "\\symbol{%d}" n)
22.1emConstants.
let output_integer s =
   let n = String.length s in
   let base b =
     let v = String.sub s 2 (n − 2) in
     output_string (sprintf "\\ocw%sconst{%s}" b v)
   in
   if n > 1 then
     match s.[1] with
       ∣ 'x' ∣ 'X' → base "hex"
       ∣ 'o' ∣ 'O' → base "oct"
       ∣ 'b' ∣ 'B' → base "bin"
       ∣ _ → output_string s
   else
     output_string s

let output_float s =
   try
     let i = try String.index s 'e' with Not_found → String.index s 'E' in
     let m = String.sub s 0 i in
     let e = String.sub s (succ i) (String.length s − i − 1) in
     if m = "1" then
       output_string (sprintf "\\ocwfloatconstexp{%s}" e)
     else
       output_string (sprintf "\\ocwfloatconst{%s}{%s}" m e)
   with Not_found →
     output_string s
23.1emComments.
let output_bc () = leave_math (); output_string "\\ocwbc{}"

let output_eyc () = leave_math (); output_string "\\ocweyc{}"
24.1emStrings.
let output_bs () = leave_math (); output_string "\\ocwstring{\""

let output_vspace () = output_string "\\ocwvspace{}"
25.1emReset of the output machine.
let reset_output () =
math_mode := false
26.1emSectioning commands.
let begin_section () =
output_string "\\allowbreak\\ocwsection\n"

let output_typeout_command filename =
   output_string "\\typeout{OcamlWeb file ";
   output_string filename;
   output_string "}\n"

let output_module module_name =
   if ¬ !short then begin
     output_typeout_command (module_name^".ml");
     output_string "\\ocwmodule{";
     output_latex_id module_name;
     output_string "}\n"
   end

let output_interface module_name =
   if ¬ !short then begin
     output_typeout_command (module_name^".mli");
     output_string "\\ocwinterface{";
     output_latex_id module_name;
     output_string "}\n"
   end

let output_lexmodule module_name =
   if ¬ !short then begin
     output_typeout_command (module_name^".mll");
     output_string "\\ocwlexmodule{";
     output_latex_id module_name;
     output_string "}\n"
   end

let output_yaccmodule module_name =
   if ¬ !short then begin
     output_typeout_command (module_name^".mly");
     output_string "\\ocwyaccmodule{";
     output_latex_id module_name;
     output_string "}\n"
   end

let begin_code () =
   if ¬ !in_code then output_string "\\ocwbegincode{}";
   in_code := true
let end_code () =
   if !in_code then output_string "\\ocwendcode{}";
   in_code := false

let begin_dcode () =
output_string "\\ocwbegindcode{}"
let end_dcode () =
output_string "\\ocwenddcode{}"

let begin_code_paragraph () =
if ¬ !last_is_code then output_string "\\medskip\n";
last_is_code := true

let end_code_paragraph is_last_paragraph =
if is_last_paragraph then end_line() else output_string "\\medskip\n\n"

let begin_doc_paragraph is_first_paragraph n =
if ¬ is_first_paragraph then indentation n;
last_is_code := false

let end_doc_paragraph () =
output_string "\n"
27.1emIndex. It is opened and closed with the two macros ocwbeginindex and ocwendindex. The auxiliary function print_list is a generic function to print a list with a given printing function and a given separator.

let print_list print sep l =
   let rec print_rec = function
     ∣ [ ] → ()
     ∣ [x] → print x
     ∣ x::r → print x; sep(); print_rec r
   in
   print_rec l
28.1emIndex in WEB style. The function output_index_entry prints one entry line, given the name of the entry, and two lists of pre-formatted sections labels, like 1–4,7,10–17, of type string elem list. The first list if printed in bold face (places where the identifier is defined) and the second one in roman (places where it is used).

let output_elem = function
   ∣ Single r →
       output_ref r
   ∣ Interval (r1,r2) →
       output_ref r1;
       output_string "--";
       output_ref r2

let output_bf_elem n =
output_string "\\textbf{"; output_elem n; output_string "}"

let output_index_entry s t def use =
   let sep () = output_string ", " in
   output_string "\\ocwwebindexentry{";
   enter_math ();
   output_raw_ident_in_index s;
   leave_math ();
   if t ≠ "" then output_string (" " ^ t);
   output_string "}{";
   print_list output_bf_elem sep def;
   output_string "}{";
   if def ≠ [ ] ∧ use ≠ [ ] then output_string ", ";
   print_list output_elem sep use;
   output_string "}\n"
29.1emIndex in L^AT_EX style. When we are not in WEB style, the index in left to L^AT_EX, and all the work is done by the macro \ocwrefindexentry, which takes three arguments: the name of the entry and the two lists of labels where it is defined and used, respectively.

let output_raw_index_entry s t def use =
   let sep () = output_string ","
   and sep′ () = output_string ", " in
   output_string "\\ocwrefindexentry{";
   enter_math ();
   output_raw_ident_in_index s;
   leave_math ();
   if t ≠ "" then output_string (" " ^ t);
   output_string "}{";
   print_list output_string sep def;
   output_string "}{";
   print_list output_string sep use;
   output_string "}{";
   print_list output_ref sep′ def;
   output_string "}{";
   print_list output_ref sep′ use;
   output_string "}\n"

let output_label l =
output_string "\\label{"; output_string l; output_string "}%\n"

Module Yacc_parser (Yacc)

   let dummy_loc =
     { start_pos = Lexing.dummy_pos;
       end_pos = Lexing.dummy_pos;
       start_line = 0 ;
       start_col = 0 }

31.1emYacc tokens.
%token Ttoken Tstart Ttype Tleft Tright Tnonassoc Tprec Terror
%token < Yacc_syntax.ident > Tident
%token < Yacc_syntax.location > Taction Ttypedecl
%token Tor Tsemicolon Tcolon Tmark
%token EOF

32.1emStart symbol for yacc description files
yacc_definitions::=
   ∣ header tokendecls Tmark rules header EOF
       {   { header = $1 ;
           decls = $2;
           rules = $4;
           trailer = $5 } }

header ::=
   ∣ Taction
       {   $1 }
   ∣ /∗ ε ∗/
     {   dummy_loc }

33.1emToken declarations.
tokendecls ::=
   ∣ tokendecl tokendecls
     {   $1::$2 }
   ∣ /∗epsilon∗/
     {   [ ] }

tokendecl ::=
   ∣ Ttoken Ttypedecl idlist
       {   Typed_tokens($2,$3) }
   ∣ Ttoken idlist
       {   Untyped_tokens($2) }
   ∣ Ttype Ttypedecl idlist
       {   Non_terminals_type($2,$3) }
   ∣ Tstart idlist
       {   Start_symbols($2) }
   ∣ Tleft idlist
       {   Tokens_assoc($2) }
   ∣ Tnonassoc idlist
       {   Tokens_assoc($2) }
   ∣ Tright idlist
       {   Tokens_assoc($2) }

idlist::=
   ∣ Tident
     {   [$1] }
   ∣ Tident idlist
     {   $1 :: $2 }

34.1emParsing of rules.
rules::=
   ∣ /∗ ε ∗/
     {   [ ] }
   ∣ general_rule rules
     {   $1 :: $2 }

Ocamlyacc manual asks for a semicolon at end of each rules. But ocamlyacc accepts if they are missing. We issue a warning for non conformity to ocamlyacc documentation.

general_rule::=
   ∣ rule Tsemicolon
       {   $1 }
   ∣ rule
       {   Yacc_syntax.issue_warning "ocamlyacc documentation recommends adding a semicolon at end of each grammar rules";
       $1 }

rule ::=
   ∣ Tident Tcolon right_part
     {   ($1,$3) }
   ∣ Tident Tcolon Tor right_part
     {   ($1,$4) }

right_part ::=
   ∣ word Taction
     {   [($1,$2)] }
   ∣ word Taction Tor right_part
     {   ($1,$2) :: $4 }

word ::=
   ∣ /∗ ε ∗/
     {   [ ] }
   ∣ Tident word
     {   $1 :: $2 }
   ∣ Tprec Tident word
     {   $2 :: $3 }
   ∣ Terror word
     {   $2 }

Module Yacc_lexer (Lex)

Auxiliaries for the lexical analyzer
let brace_depth = ref 0
and comment_depth = ref 0
and mark_count = ref 0

let handle_lexical_error fn lexbuf =
   let line = !current_line_num
   and column = Lexing.lexeme_start lexbuf − !current_line_start_pos in
   try
     fn lexbuf
   with Lexical_error(msg, _, _) →
     raise(Lexical_error(msg, line, column))
36.1emyacc keywords
let keyword_table = Hashtbl.create 17
let _ =
   List.iter (fun (kwd, tok) → Hashtbl.add keyword_table kwd tok)
     [ "token", Ttoken;
       "start", Tstart;
       "type", Ttype;
       "left", Tleft;
       "right", Tright;
       "nonassoc", Tnonassoc;
       "prec", Tprec ]

let keyword_token lexbuf =
   try
     Hashtbl.find keyword_table (Lexing.lexeme lexbuf)
   with
       Not_found →
         raise(Lexical_error
               ("unknown keyword " ^ String.escaped(Lexing.lexeme lexbuf),
               !current_line_num, Lexing.lexeme_start lexbuf − !current_line_start_pos))

let cur_loc lexbuf =
   { start_pos = Lexing.lexeme_start_p lexbuf;
     end_pos = Lexing.lexeme_end_p lexbuf;
     start_line = !current_line_num;
     start_col = Lexing.lexeme_start lexbuf − !current_line_start_pos }

let reset_lexer f lexbuf =
   current_file_name := f;
   mark_count := 0;
   current_line_num := 1;
   current_line_start_pos := 0;
   current_lexbuf := lexbuf

Although few grammar files include commas anywhere commas are skipped in yacc. The original yacc code is used for ocaml. See yacc/reader.c:nextc(),read_grammar() from the ocaml 3.04 distribution.

   ∣ ','
     {   issue_warning
         "use of commas in mly files is allowed but not conform to ocamlyacc documentation";
         main lexbuf }

   ∣ '\010'
     {   current_line_start_pos := Lexing.lexeme_end lexbuf;
       incr current_line_num;
       main lexbuf }
   ∣ "/*"
     {   handle_lexical_error yacc_comment lexbuf;
       main lexbuf }
   ∣ ['A'–'Z' 'a'–'z'] ['A'–'Z' 'a'–'z' '\'' '_' '0'–'9'] ^⋆
     {   match Lexing.lexeme lexbuf with
           "error" → Terror
         ∣ s → let l = cur_loc lexbuf in
                   Tident (s,l) }
   ∣ '{'
     {   let n1 = Lexing.lexeme_end_p lexbuf
       and l1 = !current_line_num
       and s1 = !current_line_start_pos in
       brace_depth := 1;
       let n2 = handle_lexical_error action lexbuf in
       Taction({start_pos = n1; end_pos = n2;
                 start_line = l1; start_col = n1.Lexing.pos_cnum − s1}) }
   ∣ '|'
       {   Tor }
   ∣ ';'
       {   Tsemicolon }
   ∣ ':'
       {   Tcolon }
   ∣ '%'
       {   yacc_keyword lexbuf }
   ∣ '<'
       {   let n1 = Lexing.lexeme_end_p lexbuf
         and l1 = !current_line_num
         and s1 = !current_line_start_pos in
         let n2 = handle_lexical_error typedecl lexbuf in
         Ttypedecl({start_pos = n1; end_pos = n2;
                     start_line = l1; start_col = n1.Lexing.pos_cnum − s1}) }
   ∣ eof
       {   EOF }
   ∣ _
     {   raise(Lexical_error
               ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf),
               !current_line_num, Lexing.lexeme_start lexbuf − !current_line_start_pos)) }

and yacc_keyword = parse
   ∣ '%'
       {   incr mark_count;
         if !mark_count = 1 then Tmark else
           let n1 = Lexing.lexeme_end_p lexbuf
           and l1 = !current_line_num
           and s1 = !current_line_start_pos in
           brace_depth := 0;
           let n2 = handle_lexical_error action lexbuf in
           Taction({start_pos = n1; end_pos = n2;
                     start_line = l1; start_col = n1.Lexing.pos_cnum − s1}) }
   ∣ '{'
       {   let n1 = Lexing.lexeme_end_p lexbuf
         and l1 = !current_line_num
         and s1 = !current_line_start_pos in
         brace_depth := 1;
         let n2 = handle_lexical_error action lexbuf in
         Taction({start_pos = n1; end_pos = n2;
                   start_line = l1; start_col = n1.Lexing.pos_cnum − s1}) }
   ∣ ['a'–'z']⁺
       {   keyword_token lexbuf }
   ∣ _
       {   raise(Lexical_error
               ("illegal character " ^ String.escaped(Lexing.lexeme lexbuf),
               !current_line_num, Lexing.lexeme_start lexbuf − !current_line_start_pos)) }
38.1emrecognizes a CAML action
and action = parse
   ∣ '{'
     {   incr brace_depth;
       action lexbuf }
   ∣ '}'
     {   decr brace_depth;
       if !brace_depth = 0
       then Lexing.lexeme_start_p lexbuf else action lexbuf }
   ∣ "%}"
     {   decr brace_depth;
       if !brace_depth = 0 then Lexing.lexeme_start_p lexbuf else
         raise(Lexical_error
                 ("ill-balanced brace ",
                   !current_line_num, Lexing.lexeme_start lexbuf − !current_line_start_pos)) }
   ∣ '"'
     {   string lexbuf;
       action lexbuf }
   ∣ "’" [^ '\\'] "’"
     {   action lexbuf }
   ∣ "’" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "’"
     {   action lexbuf }
   ∣ "’" '\\' ['0'–'9'] ['0'–'9'] ['0'–'9'] "’"
     {   action lexbuf }
   ∣ "(*"
     {   comment_depth := 1;
       comment lexbuf;
       action lexbuf }
   ∣ eof
     {   if !brace_depth = 0 then Lexing.lexeme_start_p lexbuf else
         raise (Lexical_error("unterminated action", 0, 0)) }
   ∣ '\010'
     {   current_line_start_pos := Lexing.lexeme_end lexbuf;
       incr current_line_num;
       action lexbuf }
   ∣ _
     {   action lexbuf }
39.1emrecognizes a CAML type between < and >
and typedecl = parse
   ∣ '>'
     {   Lexing.lexeme_start_p lexbuf }
   ∣ eof
     {   raise (Lexical_error("unterminated type declaration", 0, 0)) }
   ∣ '\010'
     {   current_line_start_pos := Lexing.lexeme_end lexbuf;
       incr current_line_num;
       typedecl lexbuf }
   ∣ "->"
     {   typedecl lexbuf }
   ∣ _
     {   typedecl lexbuf }

and string = parse
     '"'
     {   () }
   ∣ '\\' [' ' '\013' '\009' '\012'] ^⋆ '\010' [' ' '\013' '\009' '\012'] ^⋆
     {   current_line_start_pos := Lexing.lexeme_end lexbuf;
       incr current_line_num;
       string lexbuf }
   ∣ '\\' ['\\' '"' 'n' 't' 'b' 'r']
     {   string lexbuf }
   ∣ '\\' ['0'–'9'] ['0'–'9'] ['0'–'9']
     {   string lexbuf }
   ∣ eof
     {   raise(Lexical_error("unterminated string", 0, 0)) }
   ∣ '\010'
     {   current_line_start_pos := Lexing.lexeme_end lexbuf;
       incr current_line_num;
       string lexbuf }
   ∣ _
     {   string lexbuf }

and comment = parse
     "(*"
     {   incr comment_depth; comment lexbuf }
   ∣ "*)"
     {   decr comment_depth;
       if !comment_depth = 0 then () else comment lexbuf }
   ∣ '"'
     {   string lexbuf;
       comment lexbuf }
   ∣ "’’"
       {   comment lexbuf }
   ∣ "’" [^ '\\' '\''] "’"
       {   comment lexbuf }
   ∣ "’\\" ['\\' '\'' 'n' 't' 'b' 'r'] "’"
       {   comment lexbuf }
   ∣ "’\\" ['0'–'9'] ['0'–'9'] ['0'–'9'] "’"
       {   comment lexbuf }
   ∣ eof
     {   raise(Lexical_error("unterminated comment", 0, 0)) }
   ∣ '\010'
     {   current_line_start_pos := Lexing.lexeme_end lexbuf;
       incr current_line_num;
       comment lexbuf }
   ∣ _
     {   comment lexbuf }

and yacc_comment = parse
   ∣ "*/"
     {   () }
   ∣ eof
     {   raise(Lexical_error("unterminated yacc comment", 0, 0)) }
   ∣ '\010'
     {   current_line_start_pos := Lexing.lexeme_end lexbuf;
       incr current_line_num;
       yacc_comment lexbuf }
   ∣ _
     {   yacc_comment lexbuf }

Interface for module Cross

40.1em That module exports to global tables used and defined, indexed by identifiers (strings) and containing respectively the sets of locations where they are defined and used. Those locations are of type where, which contain the name of the file and the absolute position in the source.

type entry_type =
   ∣ Value
   ∣ Constructor
   ∣ Field
   ∣ Label
   ∣ Type
   ∣ Exception
   ∣ Module
   ∣ ModuleType
   ∣ Class
   ∣ Method
   ∣ LexParseRule (∗ CAMLLEX entry points ∗)
   ∣ RegExpr (∗ CAMLLEX regular expressions ∗)
   ∣ YaccNonTerminal (∗ CAMLYACC non-terminal symbols ∗)
   ∣ YaccTerminal (∗ CAMLYACC terminal symbols, i.e. tokens ∗)

val used : Whereset.t Idmap.t ref
val defined : Whereset.t Idmap.t ref
41.1emThe two following functions fill the above tables for a given file.
val cross_implem : string → string → unit
val cross_interf : string → string → unit

cross-referencing lex and yacc description files
val cross_lex : string → string → unit
val cross_yacc : string → string → unit

Module Cross

42.1em Cross references inside Caml files are kept in the following two global tables, which keep the places where things are defined and used, to produce the final indexes.
type where = { w_filename : string; w_loc : int }

let defined = ref Idmap.empty
let used = ref Idmap.empty
43.1emThe function add_global is a generic function to add an entry in one table. add_def is used to add the definition of an identifier (so in the table defined).
let add_global table k i =
   try
     let s = Idmap.find k !table in
     table := Idmap.add k (Whereset.add i s) !table
   with Not_found →
     table := Idmap.add k (Whereset.singleton i) !table

let current_location loc =
{ w_filename = !current_file;
w_loc = !current_offset + loc.loc_start.pos_cnum }

let add_def loc t s =
   if String.length s > 0 then
     let e = { e_name = s; e_type = t } in
     add_global defined e (current_location loc)
44.1emAnother table, locals, keeps the bound variables, in order to distinguish them from global identifiers. Then the function add_uses registers that an identifier is used (in the table used), taking care of the fact that it is not a bound variable (in the table locals). add_uses_q iters add_uses on a qualified identifier.
module Stringset = Set.Make(struct type t = string let compare = compare end)

let reset_cross f offs =
   assert (Stringset.cardinal !locals = 0);
   locals := Stringset.empty;
   current_file := f;
   current_offset := offs

let add_uses loc t s =
   if String.length s > 0 ∧
       ¬ (is_keyword s) ∧ ¬ (Stringset.mem s !locals)
   then
     let e = { e_name = s; e_type = t } in
     add_global used e (current_location loc)

let add_uses_q loc t q =
   let rec addmod = function
     ∣ Lident s → add_uses loc Module s
     ∣ Ldot (q,s) → addmod q; add_uses loc Module s
     ∣ Lapply (q1,q2) → addmod q1; addmod q2
   in
   match q with
     ∣ Lident s → add_uses loc t s
     ∣ Ldot (q,s) → addmod q; add_uses loc t s
     ∣ Lapply (q1,q2) → addmod q1; addmod q2
45.1emSome useful functions.
let iter_fst f = List.iter (fun x → f (fst x))

let option_iter f = function None → () ∣ Some x → f x
46.1emWhen traversing a pattern, we must collect all its identifiers, in order to declare them as bound variables (or definitions behind a let construction). That is the job of the function ids_of_a_pattern. Then pattern_for_def declares all the identifiers of a pattern as new definitions.
let ids_of_a_pattern p =
   let r = ref [ ] in
   let add id = r := id :: !r in
   let rec pattern_d = function
     ∣ Ppat_any → ()
     ∣ Ppat_var id → add id
     ∣ Ppat_alias (p,id) → add id; pattern p
     ∣ Ppat_constant _ → ()
     ∣ Ppat_tuple pl → List.iter pattern pl
     ∣ Ppat_construct (_,po,_) → option_iter pattern po
     ∣ Ppat_record l → iter_snd pattern l
     ∣ Ppat_array pl → List.iter pattern pl
     ∣ Ppat_or (p1,p2) → pattern p1; pattern p2
     ∣ Ppat_constraint (p,_) → pattern p
     ∣ Ppat_variant (_,po) → option_iter pattern po
     ∣ Ppat_type _ → ()
   and pattern p =
     pattern_d p.ppat_desc
   in
   pattern p; !r

let pattern_for_def p =
   let loc = p.ppat_loc in
   let ids = ids_of_a_pattern p in
   List.iter (add_def loc Value) ids
47.1emThe following function locally adds some given variables to the set of bound variables, during the time of the application of a given function on a given argument.
let bind_variables ids f x =
   let save = !locals in
   List.iter add_local ids;
   f x;
   locals := save
48.1emTraversing of Caml abstract syntax trees. Each type t in those abstract syntax trees is associated to a function tr_t which traverses it, declaring the identifiers used and defined. Those types are defined in the Caml module interface Paresetree.mli contained in the Caml source distribution.

49.1emCore types.
let rec tr_core_type t =
tr_core_type_desc t.ptyp_loc t.ptyp_desc

and tr_core_type_desc loc = function
   ∣ Ptyp_any ∣ Ptyp_var _ →
       ()
   ∣ Ptyp_arrow (l,t1,t2) →
       add_def loc Label l; tr_core_type t1; tr_core_type t2
   ∣ Ptyp_tuple tl →
       List.iter tr_core_type tl
   ∣ Ptyp_constr (q,tl) →
       add_uses_q loc Type q; List.iter tr_core_type tl
   ∣ Ptyp_object l →
       List.iter tr_core_field_type l
   ∣ Ptyp_class (id,l,ll) →
       add_uses_q loc Class id;
       List.iter (add_def loc Label) ll;
       List.iter tr_core_type l
   ∣ Ptyp_alias (ct,_) →
       tr_core_type ct
   ∣ Ptyp_variant (l,_,_) →
       List.iter tr_row_field l
   ∣ Ptyp_poly (_,t) →
       tr_core_type t

and tr_row_field = function
∣ Rtag (_,_,ctl) → List.iter tr_core_type ctl
∣ Rinherit t → tr_core_type t

and tr_core_field_desc loc = function
   ∣ Pfield (id,ct) →
       add_uses loc Method id;
       tr_core_type ct
   ∣ Pfield_var → ()
50.1emType expressions for the class language.
let tr_class_infos f p =
   add_def p.pci_loc Class p.pci_name;
   f p.pci_expr
51.1emValue expressions for the core language.
let bind_pattern f (p,e) =
   bind_variables (ids_of_a_pattern p) f e

let bind_patterns f pl e =
let ids = List.flatten (List.map ids_of_a_pattern pl) in
bind_variables ids f e

and tr_expression_desc loc = function
   ∣ Pexp_ident q →
       add_uses_q loc Value q
   ∣ Pexp_apply (e,lel) →
       tr_expression e;
       List.iter (fun (l,e) → add_uses loc Label l; tr_expression e) lel
   ∣ Pexp_ifthenelse (e1,e2,e3) →
       tr_expression e1; tr_expression e2; option_iter tr_expression e3
   ∣ Pexp_sequence (e1,e2) →
       tr_expression e1; tr_expression e2
   ∣ Pexp_while (e1,e2) →
       tr_expression e1; tr_expression e2
   ∣ Pexp_tuple el →
       List.iter tr_expression el
   ∣ Pexp_construct (q,e,_) →
       add_uses_q loc Constructor q;
       option_iter tr_expression e
   ∣ Pexp_function (l,eo,pel) →
       add_def loc Label l;
       option_iter tr_expression eo;
       List.iter (bind_pattern tr_expression) pel
   ∣ Pexp_match (e,pel) →
       tr_expression e; List.iter (bind_pattern tr_expression) pel
   ∣ Pexp_try (e,pel) →
       tr_expression e; List.iter (bind_pattern tr_expression) pel
   ∣ Pexp_let (recf,pel,e) →
       let pl = List.map fst pel in
       if recf = Recursive then
         iter_snd (bind_patterns tr_expression pl) pel
       else
         iter_snd tr_expression pel;
       bind_patterns tr_expression pl e
   ∣ Pexp_record (l,e) →
       iter_fst (add_uses_q loc Field) l; iter_snd tr_expression l;
       option_iter tr_expression e
   ∣ Pexp_field (e,q) →
       tr_expression e; add_uses_q loc Field q
   ∣ Pexp_setfield (e1,q,e2) →
       tr_expression e1; add_uses_q loc Field q; tr_expression e2
   ∣ Pexp_array el →
       List.iter tr_expression el
   ∣ Pexp_for (i,e1,e2,_,e) →
       tr_expression e1; tr_expression e2; bind_variables [i] tr_expression e
   ∣ Pexp_constraint (e,t1,t2) →
       tr_expression e; option_iter tr_core_type t1; option_iter tr_core_type t2
   ∣ Pexp_when (e1,e2) →
       tr_expression e1; tr_expression e2
   ∣ Pexp_letmodule (x,m,e) →
       tr_module_expr m; bind_variables [x] tr_expression e
   ∣ Pexp_constant _ →
       ()
   ∣ Pexp_send (e,id) →
       add_uses loc Method id; tr_expression e
   ∣ Pexp_new id →
       add_uses_q loc Class id
   ∣ Pexp_setinstvar (id,e) →
       add_uses loc Value id; tr_expression e
   ∣ Pexp_override l →
       iter_fst (add_uses loc Method) l; iter_snd tr_expression l
   ∣ Pexp_variant (_,eo) →
       option_iter tr_expression eo
   ∣ Pexp_assert e →
       tr_expression e
   ∣ Pexp_assertfalse →
       ()
   ∣ Pexp_lazy e →
       tr_expression e
   ∣ Pexp_poly (e, t) →
       tr_expression e; option_iter tr_core_type t
   ∣ Pexp_object cs →
       tr_class_structure cs
52.1emValue descriptions.
and tr_value_description vd =
   tr_core_type vd.pval_type
53.1emType declarations.
and tr_type_declaration td =
   tr_type_kind td.ptype_loc td.ptype_kind;
   option_iter tr_core_type td.ptype_manifest

and tr_type_kind loc = function
   ∣ Ptype_abstract → ()
   ∣ Ptype_variant (cl,_) →
       iter_fst (add_def loc Constructor) cl;
       iter_snd (List.iter tr_core_type) cl
   ∣ Ptype_record (fl,_) →
       List.iter (fun (f,_,t) → add_def loc Field f; tr_core_type t) fl

and tr_exception_declaration ed =
List.iter tr_core_type ed
54.1emType expressions for the class language.
and tr_class_type c =
tr_class_type_desc c.pcty_loc c.pcty_desc

and tr_class_type_desc loc = function
   ∣ Pcty_constr (id,l) →
       add_uses_q loc Class id;
       List.iter tr_core_type l
   ∣ Pcty_signature cs →
       tr_class_signature cs
   ∣ Pcty_fun (l,co,cl) →
       add_def loc Label l;
       tr_core_type co;
       tr_class_type cl

and tr_class_signature (ct,l) =
tr_core_type ct;
List.iter tr_class_type_field l

and tr_class_type_field = function
   ∣ Pctf_inher ct →
       tr_class_type ct
   ∣ Pctf_val (id,_,ct,loc) →
       add_def loc Value id;
       option_iter tr_core_type ct
   ∣ Pctf_virt (id,_,ct,loc) →
       add_def loc Method id;
       tr_core_type ct
   ∣ Pctf_meth (id,_,ct,loc) →
       add_def loc Method id;
       tr_core_type ct
   ∣ Pctf_cstr (ct1,ct2,_) →
       tr_core_type ct1;
       tr_core_type ct2

and tr_class_type_declaration x = tr_class_infos tr_class_type x
55.1emValue expressions for the class language.
and tr_class_expr ce = tr_class_expr_desc ce.pcl_loc ce.pcl_desc

and tr_class_expr_desc loc = function
   ∣ Pcl_constr (id,l) →
       add_uses_q loc Class id;
       List.iter tr_core_type l
   ∣ Pcl_structure cs →
       tr_class_structure cs
   ∣ Pcl_fun (l,eo,p,ce) →
       add_def loc Label l;
       option_iter tr_expression eo;
       bind_variables (ids_of_a_pattern p) tr_class_expr ce
   ∣ Pcl_apply (ce,l) →
       tr_class_expr ce;
       List.iter (fun (l,e) → add_uses loc Label l; tr_expression e) l
   ∣ Pcl_let (recf,pel,ce) →
       let pl = List.map fst pel in
       if recf = Recursive then
         iter_snd (bind_patterns tr_expression pl) pel
       else
         iter_snd tr_expression pel;
       bind_patterns tr_class_expr pl ce
   ∣ Pcl_constraint (ce,ct) →
       tr_class_expr ce;
       tr_class_type ct

and tr_class_structure (p,l) =
List.iter (fun f → bind_pattern tr_class_field (p,f)) l

and tr_class_field = function
   ∣ Pcf_inher (ce,_) →
       tr_class_expr ce
   ∣ Pcf_val (id,_,e,loc) →
       add_def loc Value id;
       tr_expression e
   ∣ Pcf_virt(id,_,ct,loc) →
       add_def loc Method id;
       tr_core_type ct
   ∣ Pcf_meth (id,_,e,loc) →
       add_def loc Method id;
       tr_expression e
   ∣ Pcf_cstr (ct1,ct2,_) →
       tr_core_type ct1;
       tr_core_type ct2
   ∣ Pcf_let (recf,pel,_) →
       let pl = List.map fst pel in
       if recf = Recursive then
         iter_snd (bind_patterns tr_expression pl) pel
       else
         iter_snd tr_expression pel
   ∣ Pcf_init e →
       tr_expression e

and tr_class_declaration x = tr_class_infos tr_class_expr x
56.1emType expressions for the module language.
and tr_module_type mt =
tr_module_type_desc mt.pmty_loc mt.pmty_desc

and tr_module_type_desc loc = function
   ∣ Pmty_ident id →
       add_uses_q loc ModuleType id
   ∣ Pmty_signature s →
       tr_signature s
   ∣ Pmty_functor (id,mt1,mt2) →
       tr_module_type mt1;
       bind_variables [id] tr_module_type mt2
   ∣ Pmty_with (mt,cl) →
       tr_module_type mt;
       List.iter
         (fun (id,c) → add_uses_q loc Type id; tr_with_constraint loc c) cl

and tr_signature_item_desc loc = function
   ∣ Psig_value (x,vd) →
       add_def loc Value x; tr_value_description vd
   ∣ Psig_type l →
       iter_fst (add_def loc Type) l; iter_snd tr_type_declaration l
   ∣ Psig_exception (id,ed) →
       add_def loc Exception id; tr_exception_declaration ed
   ∣ Psig_module (id,mt) →
       add_def loc Module id; tr_module_type mt
   ∣ Psig_recmodule l →
       List.iter (fun (id,mt) → add_def loc Module id; tr_module_type mt) l
   ∣ Psig_modtype (id,mtd) →
       add_def loc ModuleType id; tr_modtype_declaration mtd
   ∣ Psig_open q →
       add_uses_q loc Module q
   ∣ Psig_include mt →
       tr_module_type mt
   ∣ Psig_class l →
       List.iter tr_class_description l
   ∣ Psig_class_type l →
       List.iter tr_class_type_declaration l

and tr_modtype_declaration = function
∣ Pmodtype_abstract → ()
∣ Pmodtype_manifest mt → tr_module_type mt

and tr_with_constraint loc = function
   ∣ Pwith_type td → tr_type_declaration td
   ∣ Pwith_module id → add_uses_q loc Module id
57.1emValue expressions for the module language.
and tr_module_expr me =
   tr_module_expr_desc me.pmod_loc me.pmod_desc

and tr_module_expr_desc loc = function
   ∣ Pmod_ident id →
       add_uses_q loc Module id
   ∣ Pmod_structure s →
       tr_structure s
   ∣ Pmod_functor (id,mt,me) →
       tr_module_type mt;
       bind_variables [id] tr_module_expr me
   ∣ Pmod_apply (me1,me2) →
       tr_module_expr me1;
       tr_module_expr me2
   ∣ Pmod_constraint (me,mt) →
       tr_module_expr me;
       tr_module_type mt

and tr_structure_item_desc loc = function
   ∣ Pstr_eval e →
       tr_expression e
   ∣ Pstr_value (_,pel) →
       iter_fst pattern_for_def pel; iter_snd tr_expression pel
   ∣ Pstr_primitive (id,vd) →
       add_def loc Value id; tr_value_description vd
   ∣ Pstr_type l →
       iter_fst (add_def loc Type) l; iter_snd tr_type_declaration l
   ∣ Pstr_exception (id,ed) →
       add_def loc Exception id; tr_exception_declaration ed
   ∣ Pstr_module (id,me) →
       add_def loc Module id; tr_module_expr me
   ∣ Pstr_recmodule l →
       List.iter
         (fun (id,mt,me) →
             add_def loc Module id; tr_module_type mt; tr_module_expr me) l
   ∣ Pstr_modtype (id,mt) →
       add_def loc ModuleType id; tr_module_type mt
   ∣ Pstr_open m →
       add_uses_q loc Module m
   ∣ Pstr_class l →
       List.iter tr_class_declaration l
   ∣ Pstr_class_type l →
       List.iter tr_class_type_declaration l
   ∣ Pstr_exn_rebind (id,q) →
       add_def loc Exception id;
       add_uses_q loc Exception q
   ∣ Pstr_include me →
       tr_module_expr me
58.1emGiven all that collecting functions, we can now define two functions cross_implem and cross_interf which respectively compute the cross-references in implementations and interfaces.
let zero = { pos_fname = ""; pos_lnum = 0; pos_bol = 0; pos_cnum = 9 }

let add_module m =
add_def { loc_start = zero; loc_end = zero; loc_ghost = false } Module m

let wrapper parsing_function traverse_function f m =
   reset_cross f 0;
   add_module m;
   let c = open_in f in
   let lexbuf = Lexing.from_channel c in
   try
     traverse_function (parsing_function lexbuf);
     close_in c
   with Syntaxerr.Error _ ∣ Syntaxerr.Escape_error ∣ Lexer.Error _ → begin
     if ¬ !quiet then
       eprintf " ** warning: syntax error while parsing %s\n" f;
     close_in c
   end

let cross_interf = wrapper Parse.interface tr_signature
59.1emcross-referencing lex and yacc description files
let input_string_inside_file ic loc =
   seek_in ic loc.Lex_syntax.start_pos.pos_cnum;
   let len =
     loc.Lex_syntax.end_pos.pos_cnum − loc.Lex_syntax.start_pos.pos_cnum
   in
   let buf = String.create len in
   try
     really_input ic buf 0 len;
     buf
   with End_of_file → assert false

let lexer_function_inside_file ic loc =
   seek_in ic loc.Lex_syntax.start_pos.pos_cnum;
   let left =
     ref (loc.Lex_syntax.end_pos.pos_cnum − loc.Lex_syntax.start_pos.pos_cnum)
   in
   fun buf len →
     let m = input ic buf 0 (min !left len) in
     for i=0 to pred m do
             if String.get buf i = '$' then String.set buf i ' '
     done;
     left := !left − m;
     m

let cross_action_inside_file msg f m loc =
   reset_cross f loc.Lex_syntax.start_pos.pos_cnum;
   let c = open_in f in
   let lexbuf = Lexing.from_function (lexer_function_inside_file c loc) in
   try
     tr_structure (Parse.implementation lexbuf);
     close_in c
   with Syntaxerr.Error _ ∣ Syntaxerr.Escape_error ∣ Lexer.Error _ → begin
     if ¬ !quiet then begin
       eprintf "File \"%s\", character %d\n"
         f loc.Lex_syntax.start_pos.pos_cnum;
       eprintf " ** warning: syntax error while parsing %s\n" msg
     end;
     close_in c
   end

let cross_type_inside_file f m loc =
   reset_cross f (loc.Lex_syntax.start_pos.pos_cnum − 7);
   let c = open_in f in
   let lexbuf =
     Lexing.from_string ("type t=" ^ input_string_inside_file c loc) in
   try
     tr_structure (Parse.implementation lexbuf);
     close_in c
   with Syntaxerr.Error _ ∣ Syntaxerr.Escape_error ∣ Lexer.Error _ → begin
     if ¬ !quiet then begin
       eprintf "File \"%s\", character %d\n"
         f loc.Lex_syntax.start_pos.pos_cnum;
       eprintf " ** warning: syntax error while parsing type\n"
     end;
     close_in c
   end

let transl_loc loc =
   { loc_start = loc.Lex_syntax.start_pos;
     loc_end = loc.Lex_syntax.end_pos;
     loc_ghost = false }
60.1emcross-referencing lex description files
let rec add_used_regexps f m r =
   match r with
       Lex_syntax.Ident (id,loc) →
         add_uses (transl_loc loc) RegExpr id
     ∣ Lex_syntax.Sequence(r1,r2) →
         add_used_regexps f m r1;
         add_used_regexps f m r2
     ∣ Lex_syntax.Alternative(r1,r2) →
         add_used_regexps f m r1;
         add_used_regexps f m r2
     ∣ Lex_syntax.Repetition(r) → add_used_regexps f m r
     ∣ Lex_syntax.Epsilon
     ∣ Lex_syntax.Characters _ → ()

let traverse_lex_defs f m lexdefs =
   (∗ Caution : header, actions and trailer must be traversed last, since traversing an action changes the location offset ∗)
   (∗ traverse named regexps ∗)
   List.iter
     (fun (id,loc,regexp) →
         add_def (transl_loc loc) RegExpr id;
         add_used_regexps f m regexp)
     lexdefs.Lex_syntax.named_regexps;

   traverse lexer rules
   List.iter
     (fun (id,loc,rules) →
         add_def (transl_loc loc) LexParseRule id;
         List.iter
           (fun (r,_) → add_used_regexps f m r)
           rules)
     lexdefs.Lex_syntax.entrypoints;
   (∗ now we can traverse actions ∗)
   (∗ traverse header ∗)
   cross_action_inside_file "header" f m lexdefs.Lex_syntax.header;
   (∗ traverse actions ∗)
   List.iter
     (fun (id,loc,rules) →
         List.iter
           (fun (regexp,action) →
             add_used_regexps f m regexp;
             cross_action_inside_file "action" f m action)
           rules)
     lexdefs.Lex_syntax.entrypoints;
   (∗ traverse trailer ∗)
   cross_action_inside_file "trailer" f m lexdefs.Lex_syntax.trailer

let cross_lex f m =
   reset_cross f 0;
   add_module m;
   let c = open_in f in
   let lexbuf = Lexing.from_channel c in
   try
     let lexdefs = Lex_parser.lexer_definition Lex_lexer.main lexbuf in
     traverse_lex_defs f m lexdefs;
     close_in c
   with Parsing.Parse_error ∣ Lex_lexer.Lexical_error _ → begin
     if ¬ !quiet then
       eprintf " ** warning: syntax error while parsing lex file %s\n" f;
     close_in c
   end
61.1emcross-referencing yacc description files
let traverse_yacc f m yacc_defs =
   (∗ Caution : header, actions and trailer must be traversed last, since traversing an action changes the location offset ∗)
   (∗ traverse decls ∗)
   let tokens =
     List.fold_left
       (fun acc decl →
           match decl with
             ∣ Yacc_syntax.Typed_tokens(typ,idl) →
                 List.fold_left
                   (fun acc (id,loc) →
                     add_def (transl_loc loc) YaccTerminal id;
                     Stringset.add id acc)
                   acc
                   idl
             ∣ Yacc_syntax.Untyped_tokens(idl) →
                 List.fold_left
                   (fun acc (id,loc) →
                     add_def (transl_loc loc) YaccTerminal id;
                     Stringset.add id acc)
                   acc
                   idl
             ∣ Yacc_syntax.Non_terminals_type(typ,idl) →
                 List.iter
                   (fun (id,loc) →
                     add_uses (transl_loc loc) YaccNonTerminal id)
                   idl;
                 acc
             ∣ Yacc_syntax.Start_symbols(idl) →
                 List.iter
                   (fun (id,loc) →
                     add_uses (transl_loc loc) YaccNonTerminal id)
                   idl;
                 acc
             ∣ Yacc_syntax.Tokens_assoc(idl) →
                 List.iter
                   (fun (id,loc) →
                     add_uses (transl_loc loc) YaccTerminal id)
                   idl;
                 acc)
       Stringset.empty
       yacc_defs.Yacc_syntax.decls
   in
   (∗ traverse grammar rules ∗)
   List.iter
     (fun ((id,loc),rhss) →
         add_def (transl_loc loc) YaccNonTerminal id;
         List.iter
           (fun (rhs,_) →
             List.iter
               (fun (id,loc) →
                   if Stringset.mem id tokens
                   then add_uses (transl_loc loc) YaccTerminal id
                   else add_uses (transl_loc loc) YaccNonTerminal id)
               rhs)
           rhss)
     yacc_defs.Yacc_syntax.rules;
   (∗ now let’s traverse types, actions, header, trailer ∗)
   (∗ traverse header ∗)
   cross_action_inside_file "header" f m yacc_defs.Yacc_syntax.header;
   (∗ traverse types in decls ∗)
   List.iter
     (function
         ∣ Yacc_syntax.Typed_tokens(typ,idl) →
             cross_type_inside_file f m typ
         ∣ Yacc_syntax.Non_terminals_type(typ,idl) →
             cross_type_inside_file f m typ
         ∣ _ → ())
     yacc_defs.Yacc_syntax.decls;
   (∗ traverse actions ∗)
   List.iter
     (fun (_,rhss) →
         List.iter
           (fun (_,action) →
             cross_action_inside_file "action" f m action)
           rhss)
     yacc_defs.Yacc_syntax.rules;
   (∗ traverse trailer ∗)
   cross_action_inside_file "trailer" f m yacc_defs.Yacc_syntax.trailer

let cross_yacc f m =
   reset_cross f 0;
   add_module m;
   let c = open_in f in
   let lexbuf = Lexing.from_channel c in
   try
     Yacc_lexer.reset_lexer f lexbuf;
     let yacc_defs = Yacc_parser.yacc_definitions Yacc_lexer.main lexbuf in
     traverse_yacc f m yacc_defs;
     close_in c
   with
     ∣ Parsing.Parse_error → begin
         Yacc_syntax.issue_warning "syntax error";
         close_in c
       end
     ∣ Yacc_lexer.Lexical_error(msg,line,col) → begin
         Yacc_syntax.issue_warning ("lexical error (" ^ msg ^ ")");
         close_in c
       end

Interface for module Pretty

62.1em The following functions pretty-print the paragraphs of code and documentation, respectively. The boolean argument indicates whether the given paragraph is the last one for pretty_print_code or the first one for pretty_print_doc.
val pretty_print_code : bool → string → unit
val pretty_print_doc : bool → bool × int × string → unit
63.1emThese three functions pretty-print subparagraphs of Caml code, Camllex code and Camlyacc code respectively
val pretty_print_caml_subpar : string → unit
val pretty_print_lex_subpar : string → unit
val pretty_print_yacc_subpar : string → unit
64.1emThis function sets values in order to reset the lexer, so we could call it on an another file.
val reset_pretty : unit → unit

Module Pretty (Lex)

65.1em {
   open Printf
   open Output
   open Lexing
66.1emGlobal variables and functions used by the lexer.
comment_depth indicates how many opening-braces we saw, so we know how many closing-braces we have to look for, in order to respect caml’s-specifications concerning comments imbrication.
   let comment_depth = ref 0

Set a reference on the starting character when we see \verb.
let verb_delim = ref (Char.chr 0)

This function returns the first char of a lexbuf.
let first_char lexbuf = lexeme_char lexbuf 0

The count_spaces function acccounts for spaces in order to respect alignment (in the L^AT_EX-outputed file) concerning left margins.
   let count_spaces s =
     let c = ref 0 in
     for i = 0 to String.length s − 1 do
       if s.[i] = '\t' then
         c := !c + (8 − (!c mod 8))
       else
         incr c
     done;
     !c

This boolean value is true if we enter in math mode, false otherwise.
let user_math_mode = ref false

user_math function does everything is needed to set the math mode when it is called, particularly it checks/sets the user_math_mode value as needed.
   let user_math () =
     if ¬ !user_math_mode then begin
       user_math_mode := true;
       enter_math ()
     end else begin
       user_math_mode := false;
       leave_math ()
     end

Checks if we are in maths mode and prints char c considering the case.
let check_user_math c =
if !user_math_mode then output_char c else output_escaped_char c

This function sets values in order to reset the lexer, so we could call it on an another file.
   let reset_pretty () =
     reset_output ();
     yaccdoublepercentcounter := 0;
     user_math_mode := false

}
67.1emShortcuts for regular expressions.
let space = [' ' '\t']
let lowercase = ['a'–'z' '\223'–'\246' '\248'–'\255' '_']
let uppercase = ['A'–'Z' '\192'–'\214' '\216'–'\222']
(∗ This is for the identifiers as specified in caml’s specifications. ∗)
let identchar =
   ['A'–'Z' 'a'–'z' '_' '\192'–'\214' '\216'–'\246' '\248'–'\255' '\'' '0'–'9']
let identifier = (lowercase ∣ uppercase) identchar^⋆
(∗ This one helps protecting special caracters. ∗)
let symbolchar =
   ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
let caml_token =
     "[" ∣ "]" ∣ "[|" ∣ "|]" ∣ "[<" ∣ ">]" ∣ "{" ∣ "}" ∣ "{<" ∣ ">}" ∣ "[]"
   ∣ "(" ∣ ")" ∣ "or" ∣ "not" ∣ "||"
let symbol_token =
   caml_token ∣ (symbolchar⁺)
let character =
   "’" ( [^ '\\' '\''] ∣ '\\' ['\\' '\'' 'n' 't' 'b' 'r']
       ∣ '\\' ['0'–'9'] ['0'–'9'] ['0'–'9'] ) "’"
let decimal_literal = ['0'–'9']⁺
let hex_literal = '0' ['x' 'X'] ['0'–'9' 'A'–'F' 'a'–'f']⁺
let oct_literal = '0' ['o' 'O'] ['0'–'7']⁺
let bin_literal = '0' ['b' 'B'] ['0'–'1']⁺
let float_literal =
   ['0'–'9']⁺ ('.' ['0'–'9']^⋆)? (['e' 'E'] ['+' '-']? ['0'–'9']⁺)?
68.1emPretty-printing of code. Main entry points for Caml and Lex and Yacc files, counts for spaces in order to respect alignment. The following function pretty-prints some code and assumes that we are at the beginning of a line.
rule pr_camlcode = parse
   ∣ space^⋆ {   let n = count_spaces (lexeme lexbuf) in indentation n;
               pr_camlcode_inside lexbuf; pr_camlcode lexbuf }
   ∣ eof {   leave_math () }

and pr_lexcode = parse
   ∣ space^⋆ {   let n = count_spaces (lexeme lexbuf) in indentation n;
               pr_lexcode_inside lexbuf; pr_lexcode lexbuf }
   ∣ eof {   leave_math () }

and pr_yacccode = parse
   ∣ space^⋆ {   let n = count_spaces (lexeme lexbuf) in indentation n;
               pr_yacccode_inside lexbuf; pr_yacccode lexbuf }
   ∣ eof {   leave_math () }
69.1emThat function pretty-prints the Caml code anywhere else.
and pr_camlcode_inside = parse
   ∣ '\n'
       {   end_line () }
   ∣ space⁺
       {   output_char '~'; pr_camlcode_inside lexbuf }
   ∣ character
       {   output_verbatim (lexeme lexbuf); pr_camlcode_inside lexbuf }
   ∣ "’" identifier
       {   let id = lexeme lexbuf in
         output_type_variable (String.sub id 1 (pred (String.length id)));
         pr_camlcode_inside lexbuf }
   ∣ "(*r"
       {   output_hfill (); output_bc (); comment_depth := 1;
         pr_comment lexbuf; pr_camlcode_inside lexbuf }
   ∣ "(*"
       {   output_bc (); comment_depth := 1;
         pr_comment lexbuf; pr_camlcode_inside lexbuf }
   ∣ '"'
       {   output_bs (); pr_code_string lexbuf; pr_camlcode_inside lexbuf }
   ∣ symbol_token
       {   output_symbol (lexeme lexbuf); pr_camlcode_inside lexbuf }
   ∣ (identifier '.')^⋆ identifier
       {   output_ident (lexeme lexbuf); pr_camlcode_inside lexbuf }
   ∣ eof
       {   () }
   ∣ decimal_literal ∣ hex_literal ∣ oct_literal ∣ bin_literal
       {   output_integer (lexeme lexbuf); pr_camlcode_inside lexbuf }
   ∣ float_literal
       {   output_float (lexeme lexbuf); pr_camlcode_inside lexbuf }
   ∣ _
       {   output_escaped_char (first_char lexbuf); pr_camlcode_inside lexbuf }
70.1emThat function pretty-prints the Lex code anywhere else.
and pr_lexcode_inside = parse
   ∣ '_'
       {   output_string "\\ocwlexwc"; pr_lexcode_inside lexbuf }
   ∣ '*'
       {   enter_math ();
         output_string "^\\star{}";
         pr_lexcode_inside lexbuf }
   ∣ '+'
       {   enter_math ();
         output_string "^{\\scriptscriptstyle +}";
         pr_lexcode_inside lexbuf }
   ∣ '-'
       {   leave_math ();
         output_string "--";
         pr_lexcode_inside lexbuf }
   ∣ '|'
       {   enter_math (); output_string "\\mid{}"; pr_lexcode_inside lexbuf }
   ∣ '\n'
       {   end_line () }
   ∣ space⁺
       {   output_char '~'; pr_lexcode_inside lexbuf }
   ∣ character
       {   output_verbatim (lexeme lexbuf); pr_lexcode_inside lexbuf }
   ∣ "(*" {   output_bc (); comment_depth := 1;
             pr_comment lexbuf; pr_lexcode_inside lexbuf }
   ∣ "(*r"
       {   output_hfill (); output_bc (); comment_depth := 1;
         pr_comment lexbuf; pr_lexcode_inside lexbuf }
   ∣ '"' {   output_bs (); pr_code_string lexbuf; pr_lexcode_inside lexbuf }
   ∣ identifier
       {   output_lex_ident (lexeme lexbuf); pr_lexcode_inside lexbuf }
   ∣ eof {   () }
   ∣ _
       {   output_escaped_char (first_char lexbuf); pr_lexcode_inside lexbuf }
71.1emThat function pretty-prints the Yacc code anywhere else.
and pr_yacccode_inside = parse
   ∣ "%%"
       {
         incr yaccdoublepercentcounter;
         output_string
           (if !yaccdoublepercentcounter = 1
             then "\\ocwyaccrules"
             else "\\ocwyacctrailer");
         pr_yacccode_inside lexbuf
       }
   ∣ "%{"
       {
         output_string "\\ocwyaccopercentbrace";
         pr_yacccode_inside lexbuf
       }
   ∣ "%}"
       {
         output_string "\\ocwyacccpercentbrace";
         pr_yacccode_inside lexbuf
       }
   ∣ ":"
       {
         output_string "\\ocwyacccolon";
         pr_yacccode_inside lexbuf
       }
   ∣ ";"
       {
         output_string "\\ocwyaccendrule";
         pr_yacccode_inside lexbuf
       }
   ∣ "|"
       {
         output_string "\\ocwyaccpipe";
         pr_yacccode_inside lexbuf
       }
   ∣ '\n'
       {   end_line (); }
   ∣ space⁺
       {   output_char '~'; pr_yacccode_inside lexbuf }
   ∣ "/*r"
       {   output_hfill (); output_byc (); pr_yacc_comment lexbuf;
         pr_yacccode_inside lexbuf }
   ∣ "/*"
       {   output_byc (); pr_yacc_comment lexbuf; pr_yacccode_inside lexbuf }
   ∣ '%'? identifier
       {   output_yacc_ident (lexeme lexbuf); pr_yacccode_inside lexbuf }
   ∣ _
       {   output_escaped_char (first_char lexbuf); pr_yacccode_inside lexbuf }
   ∣ eof
       {   () }
72.1emComments.
and pr_comment = parse
   ∣ "(*"
       {   output_bc ();
         incr comment_depth;
         pr_comment lexbuf }
   ∣ "*)"
       {   output_ec ();
         decr comment_depth;
         if !comment_depth > 0 then pr_comment lexbuf }
   ∣ '"'
       {   output_bs ();
         pr_code_string lexbuf;
         pr_comment lexbuf; }
   ∣ '['
       {   if !user_math_mode then
           output_char '['
         else begin
           bracket_depth := 1;
           begin_dcode (); escaped_code lexbuf; end_dcode ()
         end;
         pr_comment lexbuf }
   ∣ eof
       {   () }
   ∣ "\\$"
       {   output_string (lexeme lexbuf); pr_comment lexbuf }
   ∣ '$'
       {   user_math(); pr_comment lexbuf }
   ∣ _
       {   output_char (first_char lexbuf); pr_comment lexbuf }

The C_like_comments are not inbricable
and pr_yacc_comment = parse
   ∣ "*/" {   output_eyc (); }
   ∣ '\n' space^⋆ '*' ' '
           {   output_string "\n "; pr_yacc_comment lexbuf }
   ∣ '[' {   if !user_math_mode then
               output_char '['
             else begin
               bracket_depth := 1;
               begin_dcode (); escaped_code lexbuf; end_dcode ()
             end;
             pr_yacc_comment lexbuf }
   ∣ eof {   () }
   ∣ "\\$" {   output_string (lexeme lexbuf); pr_yacc_comment lexbuf }
   ∣ '$' {   user_math(); pr_yacc_comment lexbuf }
   ∣ _  {   output_char (first_char lexbuf); pr_yacc_comment lexbuf }
73.1emStrings in code.
and pr_code_string = parse
   ∣ '"' {   output_es () }
   ∣ '\n' {   end_line_string (); pr_code_string lexbuf }
   ∣ ' ' {   output_vspace (); pr_code_string lexbuf }
   ∣ '\\' ['"' 't' 'b' 'r']
           {   output_escaped_char '\\';
             output_char (lexeme_char lexbuf 1);
             pr_code_string lexbuf }
   ∣ '\\' '\n'
           {   output_escaped_char '\\'; end_line_string ();
             pr_code_string lexbuf }
   ∣ '\\' '\\'
           {   output_escaped_char '\\'; output_escaped_char '\\';
             pr_code_string lexbuf }
   ∣ eof {   () }
   ∣ '-' {   output_ascii_char 45; pr_code_string lexbuf }
   ∣ _  {   output_escaped_char (first_char lexbuf); pr_code_string lexbuf }
74.1emEscaped code.
and escaped_code = parse
   ∣ '[' {   output_char '['; incr bracket_depth; escaped_code lexbuf }
   ∣ ']' {   decr bracket_depth;
             if !bracket_depth > 0 then begin
               output_char ']'; escaped_code lexbuf
             end else
               if ¬ !user_math_mode then leave_math () }
   ∣ '"' {   output_bs (); pr_code_string lexbuf; escaped_code lexbuf }
   ∣ space⁺
           {   output_char '~'; escaped_code lexbuf }
   ∣ character
           {   output_verbatim (lexeme lexbuf); escaped_code lexbuf }
   ∣ "’" identifier
           {   let id = lexeme lexbuf in
             output_type_variable (String.sub id 1 (pred (String.length id)));
             escaped_code lexbuf }
   ∣ symbol_token
           {   output_symbol (lexeme lexbuf); escaped_code lexbuf }
   ∣ identifier
           {   output_ident (lexeme lexbuf); escaped_code lexbuf }
   ∣ eof {   if ¬ !user_math_mode then leave_math () }
   ∣ decimal_literal ∣ hex_literal ∣ oct_literal ∣ bin_literal
           {   output_integer (lexeme lexbuf); escaped_code lexbuf }
   ∣ float_literal
           {   output_float (lexeme lexbuf); escaped_code lexbuf }
   ∣ _  {   output_escaped_char (first_char lexbuf); escaped_code lexbuf }
75.1emDocumentation. It is output ‘as is’, except for quotations.
and pr_doc = parse
   ∣ '['
       {   if !user_math_mode then
           output_char '['
         else begin
           bracket_depth := 1;
           begin_dcode (); escaped_code lexbuf; end_dcode ()
         end;
         pr_doc lexbuf }
   ∣ "\\$"
       {   output_string (lexeme lexbuf); pr_doc lexbuf }
   ∣ '$'
       {   user_math(); pr_doc lexbuf }
   ∣ "\\verb" _
       {   verb_delim := lexeme_char lexbuf 5;
         output_string (lexeme lexbuf);
         pr_verb lexbuf; pr_doc lexbuf }
   ∣ "\\begin{verbatim}"
       {   output_string (lexeme lexbuf);
         pr_verbatim lexbuf; pr_doc lexbuf }
   ∣ eof
       {   () }
   ∣ _
       {   output_char (first_char lexbuf); pr_doc lexbuf }

and pr_doc_title = parse
   ∣ '['
       {   if !user_math_mode then
           output_char '['
         else begin
           bracket_depth := 1;
           begin_dcode (); escaped_code lexbuf; end_dcode ()
         end;
         pr_doc_title lexbuf }
   ∣ '.'
       {   output_string ".}\\quad{}" }
   ∣ eof
       {   () }
   ∣ _
       {   output_char (first_char lexbuf); pr_doc_title lexbuf }

and pr_verb = parse
   ∣ eof {   () }
   ∣ _  {   let c = lexeme_char lexbuf 0 in
             output_char c;
             if c ≡ !verb_delim then () else pr_verb lexbuf }

and pr_verbatim = parse
   ∣ "\\end{verbatim}"
           {   output_string (lexeme lexbuf) }
   ∣ eof {   () }
   ∣ _  {   output_char (lexeme_char lexbuf 0); pr_verbatim lexbuf }

{
76.1empretty-printing subparagraphs
let pretty_print_caml_subpar s =
pr_camlcode (Lexing.from_string s)

let pretty_print_yacc_subpar s =
pr_yacccode (Lexing.from_string s)
77.1emThen we can introduce two functions pretty_print_code and pretty_print_doc, which pretty-print respectively code and documentation parts.

   let pretty_print_code is_last_paragraph s =
     pr_camlcode (Lexing.from_string s);
     end_code_paragraph is_last_paragraph

   let pretty_print_doc is_first_paragraph (big,n,s) =
     begin_doc_paragraph is_first_paragraph n;
     let lb = Lexing.from_string s in
     if big then begin output_string "\\textbf{"; pr_doc_title lb end;
     pr_doc lb;
     end_doc_paragraph ()

Interface for module Web

78.1emThis module is the heart of the program. The only function is produce_document, which takes a list of files and produces the final L^AT_EX document.
79.1emSource file structure.
A source file is splitted into paragraphs of code and documentation. A new paragraph begins either when switching between code and comment or, within code, when an empty line occurs.

A paragraph of documentation contains arbitrary text. A paragraph of CAML code is arbitrary CAML source text. A paragraph of LEX/YACC code is again a sequence of subparagraphs, which are either CAML source (actions), CAMLLEX syntax or CAMLYACC syntax
type sub_paragraph =
   ∣ CamlCode of string
   ∣ LexCode of string
   ∣ YaccCode of string

type paragraph =
   ∣ Documentation of bool × int × string
   ∣ RawLaTeX of string
   ∣ Code of int × string
   ∣ LexYaccCode of int × (sub_paragraph list)

A web section is a numbered part of a source file, which contains a sequence of paragraphs. The sec_beg field is the character position of the beginning of the web section inside the whole file
type raw_section = {
sec_contents : paragraph list;
sec_beg : int }

Finally, the contents of a source file is a sequence of web sections. The content_file field is the whole file name (including dirname and extension) whereas the content_name field is the corresponding module name
type content = {
   content_file : string;
   content_name : string;
   content_contents : raw_section list }
80.1emA source file is either an implementation, an interface, a camllex description, a camlyacc description, or any other file, which is then considered as a L^AT_EX file.
type file =
   ∣ Implem of content
   ∣ Interf of content
   ∣ Lex of content
   ∣ Yacc of content
   ∣ Other of string
81.1emOptions.

extern_defs indicates whether identifiers used but not defined should appear in the index; default value is false.

add_latex_option passed an option to the ocamlweb L^AT_EX package.
val extern_defs : bool ref
val add_latex_option : string → unit
val index : bool ref
val web : bool ref
82.1emMain entry: production of the document from a list of files.
val produce_document : file list → unit

Module Web

83.1em type sub_paragraph =
   ∣ CamlCode of string
   ∣ LexCode of string
   ∣ YaccCode of string

type paragraph =
   ∣ Documentation of bool × int × string
   ∣ RawLaTeX of string
   ∣ Code of int × string
   ∣ LexYaccCode of int × (sub_paragraph list)

type content = {
   content_file : string;
   content_name : string;
   content_contents : raw_section list }

type file =
   ∣ Implem of content
   ∣ Interf of content
   ∣ Lex of content
   ∣ Yacc of content
   ∣ Other of string
84.1emOptions of the engine.
let index = ref true

let add_latex_option s =
   if !latex_options = "" then
     latex_options := s
   else
     latex_options := !latex_options ^ "," ^ s
85.1emConstruction of the global index.
let index_file = function
   ∣ Implem i → cross_implem i.content_file i.content_name
   ∣ Interf i → cross_interf i.content_file i.content_name
   ∣ Yacc i → cross_yacc i.content_file i.content_name
   ∣ Lex i → cross_lex i.content_file i.content_name
   ∣ Other _ → ()

let build_index l = List.iter index_file l
86.1emThe locations tables.
module Smap = Map.Make(struct type t = string let compare = compare end)

let add_loc table file ((_,s) as loc) =
let l = try Smap.find file !table with Not_found → [(0,s)] in
table := Smap.add file (loc :: l) !table

let add_par_loc =
   let par_counter = ref 0 in
   fun f p → match p with
     ∣ Code (l,_) →
         incr par_counter;
         add_loc code_locations f (l,!par_counter)
     ∣ LexYaccCode (l,_) →
         incr par_counter;
         add_loc code_locations f (l,!par_counter)
     ∣ Documentation _ → ()
     ∣ RawLaTeX _ → ()

let add_sec_loc =
   let sec_counter = ref 0 in
   fun f s →
     incr sec_counter;
     add_loc sec_locations f (s.sec_beg,!sec_counter);
         List.iter (add_par_loc f) s.sec_contents

let add_file_loc it =
List.iter (add_sec_loc it.content_file) it.content_contents

let locations_for_a_file = function
   ∣ Implem i → add_file_loc i
   ∣ Interf i → add_file_loc i
   ∣ Lex i → add_file_loc i
   ∣ Yacc i → add_file_loc i
   ∣ Other _ → ()

let find_where w =
   let rec lookup = function
     ∣ [ ] → raise Not_found
     ∣ (l,n) :: r → if w.w_loc ≥ l then ((w.w_filename,l),n) else lookup r
   in
   let table = if !web then !sec_locations else !code_locations in
   lookup (Smap.find w.w_filename table)
87.1emPrinting of the index.
88.1emAlphabetic order for index entries. To sort index entries, we define the following order relation alpha_string. It puts symbols first (identifiers that do not begin with a letter), and symbols are compared using Caml’s generic order relation. For real identifiers, we first normalize them by translating lowercase characters to uppercase ones and by removing all the accents, and then we use Caml’s comparison.

let norm_char c = match Char.uppercase c with
   ∣ '\192'..'\198' → 'A'
   ∣ '\199' → 'C'
   ∣ '\200'..'\203' → 'E'
   ∣ '\204'..'\207' → 'I'
   ∣ '\209' → 'N'
   ∣ '\210'..'\214' ∣ '\216' → 'O'
   ∣ '\217'..'\220' → 'U'
   ∣ '\221' → 'Y'
   ∣ c → c

let norm_string s =
   let u = String.copy s in
   for i = 0 to String.length s − 1 do
     u.[i] ← norm_char s.[i]
   done;
   u

let alpha_string s1 s2 =
   match what_is_first_char s1, what_is_first_char s2 with
     ∣ Symbol, Symbol → s1 < s2
     ∣ Symbol, _ → true
     ∣ _, Symbol → false
     ∣ _,_ → norm_string s1 < norm_string s2

let order_entry e1 e2 =
(alpha_string e1.e_name e2.e_name) ∨
(e1.e_name = e2.e_name ∧ e1.e_type < e2.e_type)
89.1emThe following function collects all the index entries and sort them using alpha_string, returning a list.
module Idset = Set.Make(struct type t = index_entry let compare = compare end)

let all_entries () =
   let s = Idmap.fold (fun x _ s → Idset.add x s) !used Idset.empty in
   let s = Idmap.fold (fun x _ s → Idset.add x s) !defined s in
   Sort.list order_entry (Idset.elements s)
90.1emWhen we are in L^AT_EX style, an index entry only consists in two lists of labels, which are treated by the L^AT_EX macro \ocwrefindexentry. When we are in WEB style, we can do a bit better, replacing a list like 1,2,3,4,7,8,10 by 1–4,7,8,10, as in usual L^AT_EX indexes. The following function intervals is used to group together the lists of at least three consecutive integers.

let intervals l =
   let rec group = function
     ∣ (acc, [ ]) → List.rev acc
     ∣ (Interval (s1,(_,n2)) :: acc, (f,n) :: rem) when n = succ n2 →
         group (Interval (s1,(f,n)) :: acc, rem)
     ∣ ((Single _)::(Single (f1,n1))::acc, (f,n)::rem) when n = n1 + 2 →
         group (Interval ((f1,n1),(f,n)) :: acc, rem)
     ∣ (acc, (f,n) :: rem) →
         group ((Single (f,n)) :: acc, rem)
   in
   group ([ ],l)

let elem_map f = function
∣ Single x → Single (f x)
∣ Interval (x,y) → Interval (f x, f y)

let web_list l =
let l = intervals l in
List.map (elem_map (fun x → make_label_name (fst x))) l
91.1emPrinting one index entry. The function call (list_in_table id t) collects all the sections for the identifier id in the table t, using the function find_where, and sort the result thanks to the counter which was associated to each new location (see section 86). It also removes the duplicates labels.

let rec uniquize = function
∣ [ ] ∣ [_] as l → l
∣ x :: (y :: r as l) → if x = y then uniquize l else x :: (uniquize l)

let map_succeed_nf f l =
   let rec map = function
     ∣ [ ] → [ ]
     ∣ x :: l → try (f x) :: (map l) with Not_found → map l
   in
   map l

let list_in_table id t =
   try
     let l = Whereset.elements (Idmap.find id t) in
     let l = map_succeed_nf find_where l in
     let l = Sort.list (fun x x′ → snd x < snd x′) l in
     uniquize l
   with Not_found →
     [ ]

let entry_type_name = function
   ∣ Value ∣ Constructor → ""
   ∣ Field → "(field)"
   ∣ Label → "(label)"
   ∣ Type → "(type)"
   ∣ Exception → "(exn)"
   ∣ Module → "(module)"
   ∣ ModuleType → "(sig)"
   ∣ Class → "(class)"
   ∣ Method → "(method)"
   ∣ LexParseRule → "(camllex parsing rule)"
   ∣ RegExpr → "(camllex regexpr)"
   ∣ YaccNonTerminal → "(camlyacc non-terminal)"
   ∣ YaccTerminal → "(camlyacc token)"

let print_one_entry id =
   let def = list_in_table id !defined in
   if !extern_defs ∨ def ≠ [ ] then begin
     let use = list_in_table id !used in
     let s = id.e_name in
     let t = entry_type_name id.e_type in
     if !web then
       output_index_entry s t (web_list def) (web_list use)
     else
       output_raw_index_entry s t (label_list def) (label_list use)
   end
92.1emThen printing the index is just iterating print_one_entry on all the index entries, given by (all_entries()).
let print_index () =
   begin_index ();
   List.iter print_one_entry (all_entries());
   end_index ()
93.1emPretty-printing of the document.
let rec pretty_print_sub_paragraph = function
   ∣ CamlCode(s) →
         pretty_print_caml_subpar s
   ∣ YaccCode(s) →
         pretty_print_yacc_subpar s
   ∣ LexCode(s) →
         pretty_print_lex_subpar s

let pretty_print_paragraph is_first_paragraph is_last_paragraph f = function
   ∣ Documentation (b,n,s) →
       end_code ();
       pretty_print_doc is_first_paragraph (b,n,s);
       end_line() ∣ RawLaTeX s →
       end_code ();
       output_string s;
       end_line()
   ∣ Code (l,s) →
       if l > 0 then output_label (make_label_name (f,l));
       begin_code_paragraph ();
       begin_code ();
       pretty_print_code is_last_paragraph s
   ∣ LexYaccCode (l,s) →
       if l > 0 then output_label (make_label_name (f,l));
       begin_code_paragraph ();
       begin_code ();
       List.iter pretty_print_sub_paragraph s;
       end_code_paragraph is_last_paragraph

let pretty_print_section first f s =
   if !web then begin_section ();
   if first ∧ s.sec_beg > 0 then output_label (make_label_name (f,0));
   output_label (make_label_name (f,s.sec_beg));
   let rec loop is_first_paragraph = function
     ∣ [ ] →
         ()
     ∣ [ p ] →
         pretty_print_paragraph is_first_paragraph true f p
     ∣ p :: rest →
         pretty_print_paragraph is_first_paragraph false f p;
         loop false rest
   in
   loop true s.sec_contents;
   end_code ()

let pretty_print_sections f = function
   ∣ [ ] → ()
   ∣ s :: r →
       pretty_print_section true f s;
       List.iter (pretty_print_section false f) r

let pretty_print_content output_header content =
   reset_pretty();
   output_header content.content_name;
   pretty_print_sections content.content_file content.content_contents

let pretty_print_file = function
   ∣ Implem i → pretty_print_content output_module i
   ∣ Interf i → pretty_print_content output_interface i
   ∣ Lex i → pretty_print_content output_lexmodule i
   ∣ Yacc i → pretty_print_content output_yaccmodule i
   ∣ Other f → output_file f
94.1emProduction of the document. We proceed in three steps:

let produce_document l =
     List.iter locations_for_a_file l;
   build_index l;
   latex_header !latex_options;
   List.iter pretty_print_file l;
   if !index then print_index ();
   latex_trailer ();
   close_output ()

Interface for module Doclexer

95.1em Caml files are represented by the record caml_file, which contains their file names and their module names. The functions module_name and make_caml_file are used to construct such values.
type caml_file = { caml_filename : string; caml_module : string }

type file_type =
   ∣ File_impl of caml_file
   ∣ File_intf of caml_file
   ∣ File_lex of caml_file
   ∣ File_yacc of caml_file
   ∣ File_other of string
96.1emThe following function read_one_file reads a Caml file, separating the sections, and separating the paragraphs inside the sections. The boolean reference skip_header indicates whether the header must be skipped. web_style records if web sections were used anywhere in any file.
val skip_header : bool ref

Module Doclexer (Lex)

97.1em {
   open Printf
   open Lexing
   open Output
   open Web
   open Pretty
98.1emGlobal variables and functions used by the lexer.
skip_header tells whether option --header has been selected by the user.
   let skip_header = ref true

for displaying error message if any, current_file records the name of the file currently read, and comment_or_string_start records the starting position of the comment or the string currently being read.
let current_file = ref ""
let comment_or_string_start = ref 0

brace_depth records the current depth of imbrication of braces {..}, to know in lex files whether we are in an action or not. lexyacc_brace_start records the position of the starting brace of the current action, to display an error message if this brace is unclosed.
   let in_lexyacc_action = ref false
   let doublepercentcounter = ref 0
   let brace_depth = ref 0
   let lexyacc_brace_start = ref 0

web_style records if web sections were used anywhere in any file.
let web_style = ref false

global variables for temporarily recording data, for building the Web.file structure.
let parbuf = Buffer.create 8192
let ignoring = ref false

   let push_char c =
     if ¬ !ignoring then Buffer.add_char parbuf c
   let push_first_char lexbuf =
     if ¬ !ignoring then Buffer.add_char parbuf (lexeme_char lexbuf 0)
   let push_string s =
     if ¬ !ignoring then Buffer.add_string parbuf s

   let push_caml_subpar () =
     if Buffer.length parbuf > 0 then begin
       subparlist := (CamlCode (Buffer.contents parbuf)) :: !subparlist;
       Buffer.clear parbuf
     end

   let push_lex_subpar () =
     if Buffer.length parbuf > 0 then begin
       subparlist := (LexCode (Buffer.contents parbuf)) :: !subparlist;
       Buffer.clear parbuf
     end

   let push_yacc_subpar () =
     if Buffer.length parbuf > 0 then begin
       subparlist := (YaccCode (Buffer.contents parbuf)) :: !subparlist;
       Buffer.clear parbuf
     end

   let push_code () =
     assert (List.length !subparlist = 0);
     if Buffer.length parbuf > 0 then begin
       parlist := (Code (!code_beg, Buffer.contents parbuf)) :: !parlist;
       Buffer.clear parbuf
     end

   let push_lexyacccode () =
     assert (Buffer.length parbuf = 0) ;
     if List.length !subparlist > 0 then begin
       parlist := (LexYaccCode (!code_beg, List.rev !subparlist)) :: !parlist;
       subparlist := [ ]
     end

   let push_doc () =
     if Buffer.length parbuf > 0 then begin
       let doc =
         Documentation (!big_section, !initial_spaces, Buffer.contents parbuf)
       in
       parlist := doc :: !parlist;
       big_section := false;
       Buffer.clear parbuf
     end

   let push_latex () =
     if Buffer.length parbuf > 0 then begin
       let doc =
         RawLaTeX (Buffer.contents parbuf)
       in
       parlist := doc :: !parlist;
       big_section := false;
       Buffer.clear parbuf
     end

   let push_section () =
     if !parlist ≠ [ ] then begin
       let s = { sec_contents = List.rev !parlist; sec_beg = !section_beg } in
       seclist := s :: !seclist;
       parlist := [ ]
     end

   let reset_lexer f =
     current_file := f;
     comment_or_string_start := 0;
     section_beg := 0;
     code_beg := 0;
     parlist := [ ];
     seclist := [ ];
     in_lexyacc_action := false;
     doublepercentcounter := 0

}
99.1emShortcuts for regular expressions.
let space = [' ' '\r' '\t']
let space_or_nl = [' ' '\t' '\r' '\n']
let character =
   "’" ( [^ '\\' '\''] ∣ '\\' ['\\' '\'' 'n' 't' 'b' 'r']
       ∣ '\\' ['0'–'9'] ['0'–'9'] ['0'–'9'] ) "’"
let up_to_end_of_comment =
   [^ '*']^⋆ '*' (([^ '*' ')'] [^ '*']^⋆ '*') ∣ '*')^⋆ ')'
100.1emEntry point to skip the headers. Returns when headers are skipped.
rule header = parse
   ∣ "(*"
       {   comment_or_string_start := lexeme_start lexbuf;
         skip_comment lexbuf;
         skip_spaces_until_nl lexbuf;
         header lexbuf }
   ∣ "\n"
       {   () }
   ∣ space⁺
       {   header lexbuf }
   ∣ _
       {   backtrack lexbuf }
   ∣ eof
       {   () }

To skip a comment (used by header).
and skip_comment = parse
   ∣ "(*"
       {   skip_comment lexbuf; skip_comment lexbuf }
   ∣ "*)"
       {   () }
   ∣ eof
       {   eprintf "File \"%s\", character %d\n"
           !current_file !comment_or_string_start;
         eprintf "Unterminated ocaml comment\n";
         exit 1 }
   ∣ _
       {   skip_comment lexbuf }
101.1emSame as header but for OCamlYacc
and yacc_header = parse
   ∣ "/*"
       {   comment_or_string_start := lexeme_start lexbuf;
         skip_yacc_comment lexbuf;
         skip_spaces_until_nl lexbuf;
         yacc_header lexbuf }
   ∣ "\n"
       {   () }
   ∣ space⁺
       {   yacc_header lexbuf }
   ∣ _
       {   backtrack lexbuf }
   ∣ eof
       {   () }

and skip_yacc_comment = parse
   ∣ "/*"
       {   skip_yacc_comment lexbuf; skip_yacc_comment lexbuf }
   ∣ "*/"
       {   () }
   ∣ eof
       {   eprintf "File \"%s\", character %d\n"
           !current_file !comment_or_string_start;
         eprintf "Unterminated ocamlyacc comment\n";
         exit 1 }
   ∣ _
       {   skip_yacc_comment lexbuf }
102.1emRecognizes a complete caml module body or interface, after header has been skipped. After calling that entry, the whole text read is in seclist.
and caml_implementation = parse
   ∣ _
       {   backtrack lexbuf;
         paragraph lexbuf;
         caml_implementation lexbuf }
   ∣ eof
       {   push_section () }

recognizes a paragraph of caml code or documentation. After calling that entry, the paragraph has been added to parlist.
and paragraph = parse
   ∣ space^⋆ '\n'
       {   paragraph lexbuf }
   ∣ space^⋆ ";;"
       {   paragraph lexbuf }
   ∣ space^⋆ "(*" '*'^⋆ "*)" space^⋆ '\n'
       {   paragraph lexbuf }
   ∣ space^⋆ "(*"
       {   comment_or_string_start := lexeme_start lexbuf;
         let s = lexeme lexbuf in
         initial_spaces := count_spaces (String.sub s 0 (String.length s − 2));
         start_of_documentation lexbuf;
         push_doc () }
   ∣ space^⋆ ("(*c" ∣ _ )
       {   code_beg := lexeme_start lexbuf;
         backtrack lexbuf;
         caml_subparagraph lexbuf;
         push_code() }
   ∣ eof
       {   () }

recognizes a whole lex description file, after header has been skipped. After calling that entry, the whole text read is in seclist.
and lex_description = parse
   ∣ _
       {   backtrack lexbuf;
         lex_paragraph lexbuf ;
         lex_description lexbuf }
   ∣ eof
       {   push_section () }

and yacc_description = parse
   ∣ _
       {   backtrack lexbuf ;
         yacc_paragraph lexbuf;
         yacc_description lexbuf }
   ∣ eof
       {   push_section () }

Recognizes a paragraph of a lex description file. After calling that entry, the paragraph has been added to parlist.
and lex_paragraph = parse
   ∣ space^⋆ '\n'
       {   lex_paragraph lexbuf }
   ∣ space^⋆ "(*" '*'^⋆ "*)" space^⋆ '\n'
       {   lex_paragraph lexbuf }
   ∣ space^⋆ ("(*c" ∣ _ )
       {   code_beg := lexeme_start lexbuf;
         backtrack lexbuf;
         lex_subparagraphs lexbuf ;
         push_lexyacccode() }
   ∣ space^⋆ "(*"
       {   comment_or_string_start := lexeme_start lexbuf;
         start_of_documentation lexbuf;
         push_doc () }
   ∣ eof
       {   () }

and yacc_paragraph = parse
   ∣ space^⋆ '\n'
       {   yacc_paragraph lexbuf }
   ∣ space^⋆ "/*" '*'^⋆ "*/" space^⋆ '\n'
       {   if ¬ !in_lexyacc_action
         then yacc_paragraph lexbuf
         else begin
           code_beg := lexeme_start lexbuf;
           backtrack lexbuf;
           yacc_subparagraphs lexbuf ;
           push_lexyacccode()
         end }
   ∣ space^⋆ "/*"
       {   if ¬ !in_lexyacc_action
         then begin
           comment_or_string_start := lexeme_start lexbuf;
           start_of_yacc_documentation lexbuf;
           push_doc ()
         end
         else begin
           code_beg := lexeme_start lexbuf;
           backtrack lexbuf;
           yacc_subparagraphs lexbuf ;
           push_lexyacccode()
         end }
   ∣ space^⋆ "(*" '*'^⋆ "*)" space^⋆ '\n'
       {   if !in_lexyacc_action
         then yacc_paragraph lexbuf
         else begin
           code_beg := lexeme_start lexbuf;
           backtrack lexbuf;
           yacc_subparagraphs lexbuf ;
           push_lexyacccode()
         end }
   ∣ space^⋆ "(*"
       {   if !in_lexyacc_action
         then begin
           comment_or_string_start := lexeme_start lexbuf;
           start_of_documentation lexbuf;
           push_doc ()
         end
         else begin
           code_beg := lexeme_start lexbuf;
           backtrack lexbuf;
           yacc_subparagraphs lexbuf ;
           push_lexyacccode()
         end }
   ∣ space^⋆ ("/*c" ∣ "(*c" ∣ _ )
       {   code_beg := lexeme_start lexbuf;
         backtrack lexbuf;
         yacc_subparagraphs lexbuf ;
         push_lexyacccode() }
   ∣ eof
       {   () }
103.1emAt the beginning of the documentation part, just after the "(*". If the first character is 's', then a new section is started. After calling that entry, the parbuf buffer contains the doc read.
and start_of_documentation = parse
   ∣ space_or_nl⁺
       {   in_documentation lexbuf }
   ∣ ('s' ∣ 'S') space_or_nl^⋆
       {   web_style := true; push_section ();
         section_beg := lexeme_start lexbuf;
         big_section := (lexeme_char lexbuf 0 ≡ 'S');
         in_documentation lexbuf }
   ∣ 'p' up_to_end_of_comment
       {   let s = lexeme lexbuf in
         push_in_preamble (String.sub s 1 (String.length s − 3)) }
   ∣ 'i'
       {   ignore lexbuf }
   ∣ 'l'
       {   in_documentation lexbuf; push_latex () }
   ∣ _
       {   backtrack lexbuf;
         in_documentation lexbuf }
   ∣ eof
       {   in_documentation lexbuf }

and start_of_yacc_documentation = parse
   ∣ space_or_nl⁺
       {   in_yacc_documentation lexbuf }
   ∣ ('s' ∣ 'S') space_or_nl^⋆
       {   web_style := true; push_section ();
         section_beg := lexeme_start lexbuf;
         big_section := (lexeme_char lexbuf 0 ≡ 'S');
         in_yacc_documentation lexbuf }
   ∣ 'p' up_to_end_of_comment
       {   let s = lexeme lexbuf in
         push_in_preamble (String.sub s 1 (String.length s − 3)) }
   ∣ 'i'
       {   yacc_ignore lexbuf }
   ∣ 'l'
       {   in_yacc_documentation lexbuf; push_latex () }
   ∣ _
       {   backtrack lexbuf;
         in_yacc_documentation lexbuf }
   ∣ eof
       {   in_yacc_documentation lexbuf }
104.1emInside the documentation part, anywhere after the "(*". After calling that entry, the parbuf buffer contains the doc read.
and in_documentation = parse
   ∣ "(*"
       {   push_string "(*";
         in_documentation lexbuf;
         push_string "*)";
         in_documentation lexbuf }
   ∣ "*)"
       {   () }
   ∣ '\n' " * "
       {   push_char '\n'; in_documentation lexbuf }
   ∣ '"'
       {   push_char '"'; in_string lexbuf; in_documentation lexbuf }
   ∣ character
       {   push_string (lexeme lexbuf); in_documentation lexbuf }
   ∣ _
       {   push_first_char lexbuf; in_documentation lexbuf }
   ∣ eof
       {   eprintf "File \"%s\", character %d\n"
           !current_file !comment_or_string_start;
         eprintf "Unterminated ocaml comment\n";
         exit 1 }

yacc comments are NOT nested
and in_yacc_documentation = parse
   ∣ "*/"
       {   () }
   ∣ '\n' " * "
       {   push_char '\n'; in_yacc_documentation lexbuf }
   ∣ '"'
       {   push_char '"'; in_string lexbuf; in_yacc_documentation lexbuf }
   ∣ character
       {   push_string (lexeme lexbuf); in_yacc_documentation lexbuf }
   ∣ _
       {   push_first_char lexbuf; in_yacc_documentation lexbuf }
   ∣ eof
       {   eprintf "File \"%s\", character %d\n"
           !current_file !comment_or_string_start;
         eprintf "Unterminated ocamlyacc comment\n";
         exit 1 }
105.1emRecognizes a subparagraph of caml code. After calling that entry, the parbuf buffer contains the code read.
and caml_subparagraph = parse
   ∣ space^⋆ '\n' space^⋆ '\n'
       {   backtrack lexbuf }
   ∣ ";;"
       {   backtrack lexbuf }
   ∣ eof
       {   () }
   ∣ "(*" ∣ "(*c"
       {   comment_or_string_start := lexeme_start lexbuf;
         push_string "(*";
         comment lexbuf;
         caml_subparagraph lexbuf }
   ∣ "(*i"
           {   comment_or_string_start := lexeme_start lexbuf;
             ignore lexbuf; caml_subparagraph lexbuf }
   ∣ '"' {   comment_or_string_start := lexeme_start lexbuf;
             push_char '"'; in_string lexbuf; caml_subparagraph lexbuf }
   ∣ '{' {   incr brace_depth;
             push_char '{';
             caml_subparagraph lexbuf }
   ∣ '}'
       {   if !brace_depth = 0 then backtrack lexbuf
         else
           begin
             decr brace_depth;
             push_char '}';
             caml_subparagraph lexbuf
           end }
   ∣ "%}"
       {   if !brace_depth = 0 then backtrack lexbuf
         else
           begin
             decr brace_depth;
             push_string "%}";
             caml_subparagraph lexbuf
           end }
   ∣ character
           {   push_string (lexeme lexbuf); caml_subparagraph lexbuf }
   ∣ _  {   push_first_char lexbuf; caml_subparagraph lexbuf }
106.1emRecognizes a sequence of subparagraphs of lex description, including CAML actions. After calling that entry, the subparagraphs read are in subparlist.
and lex_subparagraphs = parse
   ∣ space^⋆ '\n' space^⋆ '\n'
       {   backtrack lexbuf }
   ∣ ";;"
       {   () }
   ∣ eof
       {   if !in_lexyacc_action
         then
           begin
             eprintf "File \"%s\", character %d\n"
               !current_file !lexyacc_brace_start;
             eprintf "Unclosed brace\n" ;
             exit 1
           end }
   ∣ '}'
       {   if !in_lexyacc_action
         then
           begin
             push_char '}';
             in_lexyacc_action := false;
             lex_subparagraph lexbuf;
             push_lex_subpar();
             lex_subparagraphs lexbuf
           end
         else
           begin
             eprintf "File \"%s\", character %d\n"
               !current_file (lexeme_start lexbuf);
             eprintf "Unexpected closing brace ";
             exit 1
           end }
   ∣ _
       {   backtrack lexbuf;
         if !in_lexyacc_action
         then
           begin
             caml_subparagraph lexbuf;
             push_caml_subpar()
           end
         else
           begin
             lex_subparagraph lexbuf;
             push_lex_subpar()
           end;
         lex_subparagraphs lexbuf }

and yacc_subparagraphs = parse
   ∣ space^⋆ '\n' space^⋆ '\n'
       {   backtrack lexbuf }
   ∣ "%%"
       {   if !in_lexyacc_action then begin
           push_string "%%";
           caml_subparagraph lexbuf;
           push_caml_subpar();
           yacc_subparagraphs lexbuf
         end
         else begin
           push_yacc_subpar();
           push_string "%%";
           push_yacc_subpar();
           incr doublepercentcounter;
           if !doublepercentcounter ≥ 2 then in_lexyacc_action := true
         end }
   ∣ ";;"
       {   if !in_lexyacc_action then ()
         else begin
           push_string ";;";
           yacc_subparagraph lexbuf;
           push_yacc_subpar();
           yacc_subparagraphs lexbuf
         end }
   ∣ eof
       {   if !in_lexyacc_action ∧ !doublepercentcounter ≤ 1
         then
           begin
             eprintf "File \"%s\", character %d\n"
               !current_file !lexyacc_brace_start;
             eprintf "Unclosed brace\n" ;
             exit 1
           end }
   ∣ '}'
       {   if !in_lexyacc_action
         then
           begin
             push_char '}';
             in_lexyacc_action := false;
             yacc_subparagraph lexbuf;
             push_yacc_subpar();
             yacc_subparagraphs lexbuf
           end
         else
           begin
             eprintf "File \"%s\", character %d\n"
               !current_file (lexeme_start lexbuf);
             eprintf "Unexpected closing brace ";
             exit 1
           end }
   ∣ "%}"
       {   if !in_lexyacc_action
         then
           begin
             push_string "%}";
             in_lexyacc_action := false;
             yacc_subparagraph lexbuf;
             push_yacc_subpar();
             yacc_subparagraphs lexbuf
           end
         else
           begin
             eprintf "File \"%s\", character %d\n"
               !current_file (lexeme_start lexbuf);
             eprintf "Unexpected closing brace ";
             exit 1
           end }
   ∣ _
       {   backtrack lexbuf;
         if !in_lexyacc_action
         then
           begin
             caml_subparagraph lexbuf;
             push_caml_subpar()
           end
         else
           begin
             yacc_subparagraph lexbuf;
             push_yacc_subpar()
           end;
         yacc_subparagraphs lexbuf }
107.1emRecognizes a subparagraph of lex description. After calling that entry, the subparagraph read is in parbuf.
and lex_subparagraph = parse
   ∣ space^⋆ '\n' space^⋆ '\n'
           {   backtrack lexbuf }
   ∣ ";;" {   backtrack lexbuf }
   ∣ eof {   () }
   ∣ "(*" ∣ "(*c"
           {   comment_or_string_start := lexeme_start lexbuf;
             push_string "(*";
             comment lexbuf;
             lex_subparagraph lexbuf }
   ∣ space^⋆ "(*i"
           {   comment_or_string_start := lexeme_start lexbuf;
             ignore lexbuf; lex_subparagraph lexbuf }
   ∣ '"' {   comment_or_string_start := lexeme_start lexbuf;
             push_char '"'; in_string lexbuf; lex_subparagraph lexbuf }
   ∣ '{'
       {   lexyacc_brace_start := lexeme_start lexbuf;
         push_char '{';
         in_lexyacc_action := true }
   ∣ character
       {   push_string (lexeme lexbuf); lex_subparagraph lexbuf }
   ∣ _
       {   push_first_char lexbuf; lex_subparagraph lexbuf }

and yacc_subparagraph = parse
   ∣ space^⋆ '\n' space^⋆ '\n'
           {   backtrack lexbuf }
   ∣ "%%" {   backtrack lexbuf }
   ∣ ";;" {   backtrack lexbuf }
   ∣ eof {   () }
   ∣ "/*" ∣ "/*c"
           {   comment_or_string_start := lexeme_start lexbuf;
             push_string "/*";
             yacc_comment lexbuf;
             yacc_subparagraph lexbuf }
   ∣ space^⋆ "/*i"
           {   comment_or_string_start := lexeme_start lexbuf;
             yacc_ignore lexbuf; yacc_subparagraph lexbuf }
   ∣ '"' {   comment_or_string_start := lexeme_start lexbuf;
             push_char '"'; in_string lexbuf; yacc_subparagraph lexbuf }
   ∣ "%{"
       {   lexyacc_brace_start := lexeme_start lexbuf;
         push_string "%{";
         in_lexyacc_action := true }
   ∣ '{'
       {   lexyacc_brace_start := lexeme_start lexbuf;
         push_char '{';
         in_lexyacc_action := true }
   ∣ '<'
       {   lexyacc_brace_start := lexeme_start lexbuf;
         push_char '<';
         push_yacc_subpar ();
         yacc_type lexbuf;
         yacc_subparagraph lexbuf }
   ∣ character
       {   push_string (lexeme lexbuf); yacc_subparagraph lexbuf }
   ∣ _
       {   push_first_char lexbuf; yacc_subparagraph lexbuf }

and yacc_type = parse
   ∣ "->"
       {   push_string "->"; yacc_type lexbuf }
   ∣ '>'
       {   push_caml_subpar(); push_char '>' }
   ∣ _
       {   push_first_char lexbuf; yacc_type lexbuf }
   ∣ eof
       {   eprintf "File \"%s\", character %d\n"
           !current_file !lexyacc_brace_start;
         eprintf "Unclosed ’<’";
         exit 1 }
108.1emTo skip spaces until a newline.
and skip_spaces_until_nl = parse
   ∣ space^⋆ '\n'? {   () }
   ∣ eof {   () }
   ∣ _  {   backtrack lexbuf }
109.1emTo read a comment inside a piece of code.
and comment = parse
   ∣ "(*" ∣ "(*c"
       {   push_string "(*"; comment lexbuf; comment lexbuf }
   ∣ "*)"
       {   push_string "*)" }
   ∣ eof
       {   eprintf "File \"%s\", character %d\n"
           !current_file !comment_or_string_start;
         eprintf "Unterminated ocaml comment\n";
         exit 1 }
   ∣ _
       {   push_first_char lexbuf; comment lexbuf }

and yacc_comment = parse
   ∣ "*/"
       {   push_string "*/" }
   ∣ eof
       {   eprintf "File \"%s\", character %d\n"
           !current_file !comment_or_string_start;
         eprintf "Unterminated ocamlyacc comment\n";
         exit 1 }
   ∣ _
       {   push_first_char lexbuf; yacc_comment lexbuf }
110.1emIgnored parts, between "(*i" and "i*)". Note that such comments are not nested.
and ignore = parse
   ∣ "i*)"
       {   skip_spaces_until_nl lexbuf }
   ∣ eof
       {   eprintf "File \"%s\", character %d\n"
           !current_file !comment_or_string_start;
         eprintf "Unterminated ocamlweb comment\n";
         exit 1 }
   ∣ _
       {   ignore lexbuf }

and yacc_ignore = parse
   ∣ "i*/"
       {   skip_spaces_until_nl lexbuf }
   ∣ eof
       {   eprintf "File \"%s\", character %d\n"
           !current_file !comment_or_string_start;
         eprintf "Unterminated ocamlweb comment\n";
         exit 1 }
   ∣ _
       {   yacc_ignore lexbuf }
111.1emStrings in code.
and in_string = parse
   ∣ '"'
       {   push_char '"' }
   ∣ '\\' ['\\' '"' 'n' 't' 'b' 'r']
       {   push_string (lexeme lexbuf); in_string lexbuf }
   ∣ eof
       {   eprintf "File \"%s\", character %d\n"
           !current_file !comment_or_string_start;
         eprintf "Unterminated ocaml string\n";
         exit 1 }
   ∣ _
       {   push_first_char lexbuf; in_string lexbuf }

{
112.1emCaml files.
type caml_file = { caml_filename : string; caml_module : string }

let make_caml_file f =
{ caml_filename = f;
caml_module = module_name (Filename.chop_extension f) }

type file_type =
   ∣ File_impl of caml_file
   ∣ File_intf of caml_file
   ∣ File_lex of caml_file
   ∣ File_yacc of caml_file
   ∣ File_other of string
113.1emReading Caml files.
let raw_read_file header entry f =
   reset_lexer f;
   let c = open_in f in
   let buf = Lexing.from_channel c in
   if !skip_header then header buf;
   entry buf;
   close_in c;
   List.rev !seclist

let read header entry m =
   { content_file = m.caml_filename;
     content_name = m.caml_module;
     content_contents = raw_read_file header entry m.caml_filename; }

let read_one_file = function
   ∣ File_impl m → Implem (read header caml_implementation m)
   ∣ File_intf m → Interf (read header caml_implementation m)
   ∣ File_lex m → Lex (read header lex_description m)
   ∣ File_yacc m → Yacc (read yacc_header yacc_description m)
   ∣ File_other f → Other f

Module Main

114.1em Usage. Printed on error output.
let usage () =
   prerr_endline "";
   prerr_endline "Usage: ocamlweb <options and files>";
   prerr_endline "  -o <file>      write output in file <file>";
   prerr_endline "  --dvi          output the DVI";
   prerr_endline "  --ps           output the PostScript";
   prerr_endline "  --html         output the HTML";
   prerr_endline "  --hevea-option <opt>";
   prerr_endline "                 pass an option to hevea (HTML output)";
   prerr_endline "  -s             (short) no titles for files";
   prerr_endline "  --noweb        use manual LaTeX sectioning, not WEB";
   prerr_endline "  --header       do not skip the headers of Caml file";
   prerr_endline "  --no-preamble  suppress LaTeX header and trailer";
   prerr_endline "  --no-index     do not output the index";
   prerr_endline "  --extern-defs  keep external definitions in the index";
   prerr_endline "  --impl <file>  consider <file> as a .ml file";
   prerr_endline "  --intf <file>  consider <file> as a .mli file";
   prerr_endline "  --tex <file>   consider <file> as a .tex file";
   prerr_endline "  --latex-option <opt>";
   prerr_endline "                 pass an option to the LaTeX package ocamlweb.sty";
   prerr_endline "  --class-options <opt>";
   prerr_endline "                 set the document class options (defaults to ‘12pt’)";
   prerr_endline "  --old-fullpage uses LaTeX package fullpage with no option";
   prerr_endline "  -p <string>    insert something in LaTeX preamble";
   prerr_endline "  --files <file> read file names to process in <file>";
   prerr_endline "  --quiet        quiet mode";
   prerr_endline "  --no-greek     disable use of greek letters for type variables";
   prerr_endline "";
   prerr_endline
     "On-line documentation at http://www.lri.fr/~filliatr/ocamlweb/\n";
   exit 1
115.1emLicense informations. Printed when using the option --warranty.
let copying () =
   prerr_endline "This program is free software; you can redistribute it and/or modifyit under the terms of the GNU Library General Public License version 2, aspublished by the Free Software Foundation.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the GNU Library General Public License version 2 for more details(enclosed in the file LGPL).";
   flush stderr
116.1emBanner. Always printed. Notice that it is printed on error output, so that when the output of ocamlweb is redirected this header is not (unless both standard and error outputs are redirected, of course).
let banner () =
   eprintf "This is ocamlweb version %s, compiled on %s\n"
     Version.version Version.date;
   eprintf
     "Copyright (c) 1999-2000 Jean-Christophe Filliâtre and Claude Marché\n";
   eprintf
   "This is free software with ABSOLUTELY NO WARRANTY (use option -warranty)\n";
   flush stderr
117.1emSeparation of files. Files given on the command line are separated according to their type, which is determined by their suffix. Implementations and interfaces have respective suffixes .ml and .mli and L^AT_EX files have suffix .tex.
let check_if_file_exists f =
   if ¬ (Sys.file_exists f) then begin
     eprintf "\nocamlweb: %s: no such file\n" f;
     exit 1
   end

let what_file f =
   check_if_file_exists f;
   if check_suffix f ".ml" then
     File_impl (make_caml_file f)
   else if check_suffix f ".mli" then
     File_intf (make_caml_file f)
   else if check_suffix f ".mll" then
     File_lex (make_caml_file f)
   else if check_suffix f ".mly" then
     File_yacc (make_caml_file f)
   else if check_suffix f ".tex" then
     File_other f
   else begin
     eprintf "\nocamlweb: don’t know what to do with %s\n" f;
     exit 1
   end
118.1emReading file names from a file. File names may be given in a file instead of being given on the command line. (files_from_file f) returns the list of file names contained in the file named f. These file names must be separated by spaces, tabulations or newlines.

let files_from_file f =
   let files_from_channel ch =
     let buf = Buffer.create 80 in
     let l = ref [ ] in
     try
       while true do
         match input_char ch with
           ∣ ' ' ∣ '\t' ∣ '\n' →
               if Buffer.length buf > 0 then l := (Buffer.contents buf) :: !l;
               Buffer.clear buf
           ∣ c →
               Buffer.add_char buf c
       done; [ ]
     with End_of_file →
       List.rev !l
   in
   try
     check_if_file_exists f;
     let ch = open_in f in
     let l = files_from_channel ch in
     close_in ch;l
   with Sys_error s → begin
     eprintf "\nocamlweb: cannot read from file %s (%s)\n" f s;
     exit 1
   end
119.1emParsing of the command line. Output file, if specified, is kept in output_file.
let output_file = ref ""
let dvi = ref false
let ps = ref false
let html = ref false
let hevea_options = ref ([ ] : string list)

let parse () =
   let files = ref [ ] in
   let add_file f = files := f :: !files in
   let rec parse_rec = function
     ∣ [ ] → ()

     ∣ ("-header" ∣ "--header") :: rem →
         skip_header := false; parse_rec rem
     ∣ ("-noweb" ∣ "--noweb" ∣ "-no-web" ∣ "--no-web") :: rem →
         web := false; parse_rec rem
     ∣ ("-web" ∣ "--web") :: rem →
         web := true; parse_rec rem
     ∣ ("-nopreamble" ∣ "--nopreamble" ∣ "--no-preamble") :: rem →
         set_no_preamble true; parse_rec rem
     ∣ ("-p" ∣ "--preamble") :: s :: rem →
         push_in_preamble s; parse_rec rem
     ∣ ("-p" ∣ "--preamble") :: [ ] →
         usage ()
     ∣ ("-noindex" ∣ "--noindex" ∣ "--no-index") :: rem →
         index := false; parse_rec rem
     ∣ ("-o" ∣ "--output") :: f :: rem →
         output_file := f; parse_rec rem
     ∣ ("-o" ∣ "--output") :: [ ] →
         usage ()
     ∣ ("-s" ∣ "--short") :: rem →
         short := true; parse_rec rem
     ∣ ("-dvi" ∣ "--dvi") :: rem →
         dvi := true; parse_rec rem
     ∣ ("-ps" ∣ "--ps") :: rem →
         ps := true; parse_rec rem
     ∣ ("-html" ∣ "--html") :: rem →
         html := true; parse_rec rem
     ∣ ("-hevea-option" ∣ "--hevea-option") :: [ ] →
         usage ()
     ∣ ("-hevea-option" ∣ "--hevea-option") :: s :: rem →
         hevea_options := s :: !hevea_options; parse_rec rem
     ∣ ("-extern-defs" ∣ "--extern-defs") :: rem →
         extern_defs := true; parse_rec rem
     ∣ ("-q" ∣ "-quiet" ∣ "--quiet") :: rem →
         quiet := true; parse_rec rem

∣ ("--nogreek" ∣ "--no-greek") :: rem →
use_greek_letters := false; parse_rec rem

     ∣ ("-h" ∣ "-help" ∣ "-?" ∣ "--help") :: rem →
         banner (); usage ()
     ∣ ("-v" ∣ "-version" ∣ "--version") :: _ →
         banner (); exit 0
     ∣ ("-warranty" ∣ "--warranty") :: _ →
         copying (); exit 0

     ∣ "--class-options" :: s :: rem →
         class_options := s; parse_rec rem
     ∣ "--class-options" :: [ ] →
         usage ()
     ∣ "--latex-option" :: s :: rem →
         add_latex_option s; parse_rec rem
     ∣ "--latex-option" :: [ ] →
         usage ()
     ∣ "--old-fullpage" :: rem →
         fullpage_headings := false; parse_rec rem

     ∣ ("-impl" ∣ "--impl") :: f :: rem →
         check_if_file_exists f;
         let n =
           if Filename.check_suffix f ".mll" ∨ Filename.check_suffix f ".mly"
           then Filename.chop_extension f else f
         in
         let m = File_impl { caml_filename = f; caml_module = module_name n } in
         add_file m; parse_rec rem
     ∣ ("-impl" ∣ "--impl") :: [ ] →
         usage ()
     ∣ ("-intf" ∣ "--intf") :: f :: rem →
         check_if_file_exists f;
         let i = File_intf { caml_filename = f; caml_module = module_name f } in
         add_file i; parse_rec rem
     ∣ ("-intf" ∣ "--intf") :: [ ] →
         usage ()
     ∣ ("-tex" ∣ "--tex") :: f :: rem →
         add_file (File_other f); parse_rec rem
     ∣ ("-tex" ∣ "--tex") :: [ ] →
         usage ()
     ∣ ("-files" ∣ "--files") :: f :: rem →
         List.iter (fun f → add_file (what_file f)) (files_from_file f);
         parse_rec rem
     ∣ ("-files" ∣ "--files") :: [ ] →
         usage ()
     ∣ f :: rem →
         add_file (what_file f); parse_rec rem
   in
   parse_rec (List.tl (Array.to_list Sys.argv));
   List.rev !files
120.1emThe following function produces the output. The default output is the L^AT_EX document: in that case, we just call Web.produce_document. If option -dvi, -ps or -html is invoked, then we make calls to latex, dvips and/or hevea accordingly.
let locally dir f x =
   let cwd = Sys.getcwd () in
   try
     Sys.chdir dir; let y = f x in Sys.chdir cwd; y
   with e →
     Sys.chdir cwd; raise e

let clean_temp_files basefile =
   let remove f = try Sys.remove f with _ → () in
   remove (basefile ^ ".tex");
   remove (basefile ^ ".log");
   remove (basefile ^ ".aux");
   remove (basefile ^ ".dvi");
   remove (basefile ^ ".ps");
   remove (basefile ^ ".haux");
   remove (basefile ^ ".html")

let cat file =
   let c = open_in file in
   try
     while true do print_char (input_char c) done
   with End_of_file →
     close_in c

let copy src dst =
   let cin = open_in src
   and cout = open_out dst in
   try
     while true do Pervasives.output_char cout (input_char cin) done
   with End_of_file →
     close_in cin; close_out cout

let produce_output fl =
   if ¬ (!dvi ∨ !ps ∨ !html) then begin
     if !output_file ≠ "" then set_output_to_file !output_file;
     produce_document fl
   end else begin
     let texfile = temp_file "ocamlweb" ".tex" in
     let basefile = chop_suffix texfile ".tex" in
     set_output_to_file texfile;
     produce_document fl;
     let command =
       let file = basename texfile in
       let file =
         if !quiet then sprintf "’\\nonstopmode\\input{%s}’" file else file
       in
       sprintf "(latex %s && latex %s) 1>&2 %s" file file
         (if !quiet then "> /dev/null" else "")
     in
     let res = locally (dirname texfile) Sys.command command in
     if res ≠ 0 then begin
       eprintf "Couldn’t run LaTeX successfully\n";
       clean_and_exit basefile res
     end;
     let dvifile = basefile ^ ".dvi" in
     if !dvi then begin
       if !output_file ≠ "" then
         (∗ we cannot use Sys.rename accross file systems ∗)
         copy dvifile !output_file
       else
         cat dvifile
     end;
     if !ps then begin
       let psfile =
         if !output_file ≠ "" then !output_file else basefile ^ ".ps"
       in
       let command =
         sprintf "dvips %s -o %s %s" dvifile psfile
           (if !quiet then "> /dev/null 2>&1" else "")
       in
       let res = Sys.command command in
       if res ≠ 0 then begin
         eprintf "Couldn’t run dvips successfully\n";
         clean_and_exit basefile res
       end;
       if !output_file = "" then cat psfile
     end;
     if !html then begin
       let htmlfile =
         if !output_file ≠ "" then !output_file else basefile ^ ".html"
       in
       let options = String.concat " " (List.rev !hevea_options) in
       let command =
         sprintf "hevea %s ocamlweb.sty %s -o %s %s" options texfile htmlfile
           (if !quiet then "> /dev/null 2>&1" else "")
       in
       let res = Sys.command command in
       if res ≠ 0 then begin
         eprintf "Couldn’t run hevea successfully\n";
         clean_and_exit basefile res
       end;
       if !output_file = "" then cat htmlfile
     end;
     clean_temp_files basefile
   end
121.1emMain program. Print the banner, parse the command line, read the files and then call produce_document from module Web.
let main () =
   let files = parse() in
   if List.length files > 0 then begin
     let l = List.map read_one_file files in
     if !web_style then begin
       if ¬ !web ∧ ¬ !quiet then begin
         eprintf
           "Warning: web sections encountered while in noweb style, ignored.\n";
         flush stderr
       end
     end else
       web := false;
     if ¬ !web then add_latex_option "noweb";
     produce_output l
   end