1.1emThis is ocamlweb, a literate programming tool for Objective Caml. This document has been automatically produced using ocamlweb itself, applied to his own code, in some kind of ‘bootstrap’.
This code is split is several Caml modules, organized as follows:
Module | Function |
Output | Low level printing functions, independent of the document type |
Cross | Cross references in Caml files |
Pretty | Pretty-print of code and documentation |
Web | Production of the whole document, including the index |
Doclexer | Lexer to separate code and doc parts in Caml files |
Main | Main program |
2.1emIn that module, we concentrate all the printing functions.
Thus, it will be easy to add another kind of output, HTML for instance,
by adapting the code of that module, and nothing else.
Default output is to standard output, but it can be redirected
to a file with set_output_to_file. close_output closes the output,
if it is a file.
val set_output_to_file : string → unit
val close_output : unit → unit
3.1emThese mutable flags controls the output. If quiet, no extra
output is done on standard output, default is false. If
use_greek_letters is true, greek letters are used to display (some
of) the single-letter type variables, default is true.
val quiet : bool ref
val use_greek_letters : bool ref
val short : bool ref
4.1emThen we introduce some low level output functions, for characters
and strings.
(output_file f) copies the contents of file f on the output.
(output_verbatim s) outputs the string s ‘as is’.
val output_char : char → unit
val output_string : string → unit
val output_file : string → unit
val output_verbatim : string → unit
5.1emThe following functions are mainly useful for a LATEX output,
but will work correctly in other cases. A call to
set_no_preamble suppresses the output of the header and
trailer. (end_line ()) ends a line, (indentation n) introduces
an indentation of size n at the beggining of a line.
latex_header takes as argument the options of the LATEX
package ocamlweb.sty.
val class_options : string ref
val set_no_preamble : bool → unit
val push_in_preamble : string → unit
val fullpage_headings : bool ref
val latex_header : string → unit
val latex_trailer : unit → unit
val indentation : int → unit
val end_line : unit → unit
val end_line_string : unit → unit
val enter_math : unit → unit
val leave_math : unit → unit
6.1emThe following functions are used to pretty-print the code.
is_keyword identifies the keywords of Objective Caml.
output_ident outputs an identifier, in different faces for keywords
and other identifiers, escaping the characters that need it,
like _ for instance in LATEX.
output_escaped_char pretty-prints the reserved char of LATEX,
like &
or $
.
output_symbol pretty-prints the Caml symbols,
like → for ->.
output_type_variable s pretty-prints type variables, in particular
one-letter type variables are output as greek letters.
output_ascii_char n outputs the character of ASCII code n.
type char_type = Upper ∣ Lower ∣ Symbol
val what_is_first_char : string → char_type
val is_keyword : string → bool
val output_ident : string → unit
val output_escaped_char : char → unit
val output_symbol : string → unit
val output_type_variable : string → unit
val output_ascii_char : int → unit
output_lex_ident (resp. output_yacc_ident) outputs an
identifier as above but taking into account CAMLLEX keywords
(resp. CAMLYACC keywords)
val output_lex_ident : string → unit
val output_yacc_ident : string → unit
7.1emConstants are typeset by the following functions.
val output_integer : string → unit
val output_float : string → unit
8.1emComments inside code are opened and closed respectively by
output_bc and output_ec. The function output_hfill is called
before output_bc to justify a comment. output_byc and
output_eyc are the same for CAMLYACC comments, that is
/*
…*/
.
val output_bc : unit → unit
val output_ec : unit → unit
val output_byc : unit → unit
val output_eyc : unit → unit
val output_hfill : unit → unit
9.1emStrings inside code are opened and close respectively by
output_bs and output_es. A space character in a string is
output as a visible space, with output_vspace.
val output_bs : unit → unit
val output_es : unit → unit
val output_vspace : unit → unit
10.1emThe following functions deal with sectioning. The highest level is
the one of modules and interfaces.
The next level is the one of section. The last level is the one
of paragraphs, which are atomic pieces of documentation or code.
val output_module : string → unit
val output_interface : string → unit
val output_lexmodule : string → unit
val output_yaccmodule : string → unit
val begin_section : unit → unit
val begin_code : unit → unit
val end_code : unit → unit
val begin_dcode : unit → unit
val end_dcode : unit → unit
val begin_code_paragraph : unit → unit
val end_code_paragraph : bool → unit
val begin_doc_paragraph : bool → int → unit
val end_doc_paragraph : unit → unit
11.1emIndex functions. (output_index_entry id t def use) outputs an entry line
for identifier id, with type t, where def is the list of sections
where f is introduced and use the list of sections where f is used.
If the type of the entry is "", then it is omitted.
type α elem = Single of α ∣ Interval of α × α
val begin_index : unit → unit
val output_index_entry :
string → string → string elem list → string elem list → unit
val output_raw_index_entry :
string → string → string list → string list → unit
val end_index : unit → unit
val output_label : string → unit
12.1emThe parameters of the output engine are reset to their initial values
with reset_output.
val reset_output : unit → unit
13.1emLow level output. out_channel is a reference on the current output channel. It is initialized to the standard output and can be redirect to a file by the function set_output_to_file. The function close_output closes the output channel if it is a file. output_char, output_string and output_file are self-explainable.
let out_channel = ref stdout
let output_is_file = ref false
let set_output_to_file f =
out_channel := open_out f;
output_is_file := true
let close_output () =
if !output_is_file then close_out !out_channel
let output_char c = Pervasives.output_char !out_channel c
let output_string s = Pervasives.output_string !out_channel s
let output_file f =
let ch = open_in f in
try
while true do
Pervasives.output_char !out_channel (input_char ch)
done
with End_of_file → close_in ch
14.1emHigh level output.
In this section and the following, we introduce functions which are
LATEX dependent.
15.1emoutput_verbatim outputs a string in verbatim mode.
A valid delimiter is given by the function char_out_of_string.
It assumes that one of the four characters of fresh_chars is not used
(which is the case in practice, since output_verbatim is only used
to print quote-delimited characters).
let fresh_chars = [ '!'
; '|'
; '"'
; '+'
]
let char_out_of_string s =
let rec search = function
∣ [ ] → assert false
∣ c :: r → if String.contains s c then search r else c
in
search fresh_chars
let output_verbatim s =
let c = char_out_of_string s in
output_string (sprintf "\\verb%c%s%c" c s c)
let set_no_preamble b = no_preamble := b
let (preamble : string Queue.t) = Queue.create ()
let push_in_preamble s = Queue.add s preamble
let class_options = ref "12pt"
let fullpage_headings = ref true
let latex_header opt =
if ¬ !no_preamble then begin
output_string (sprintf "\\documentclass[%s]{article}\n" !class_options);
output_string "\\usepackage[latin1]{inputenc}\n";
if !fullpage_headings then
output_string "\\usepackage[headings]{fullpage}\n"
else
output_string "\\usepackage{fullpage}\n";
output_string "\\usepackage";
if opt ≠ "" then output_string (sprintf "[%s]" opt);
output_string "{ocamlweb}\n";
output_string "\\pagestyle{headings}\n";
Queue.iter (fun s → output_string s; output_string "\n") preamble;
output_string "\\begin{document}\n"
end;
output_string
"%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
output_string
"%% This file has been automatically generated with the command\n";
output_string "%% ";
Array.iter (fun s → output_string s; output_string " ") Sys.argv;
output_string "\n";
output_string
"%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n"
let latex_trailer () =
if ¬ !no_preamble then begin
output_string "\\end{document}\n"
end
16.1emMath mode.
We keep a boolean, math_mode, to know if we are currently
already in TEX math mode. The functions enter_math and leave_math
inserts $
if necessary, and switch that boolean.
let enter_math () =
if ¬ !math_mode then begin
output_string "$";
math_mode := true
end
let leave_math () =
if !math_mode then begin
output_string "$";
math_mode := false
end
17.1emIndentation.
An indentation at the beginning of a line of n spaces
is produced by (indentation n) (used for code only).
let indentation n =
let space = 0.5 *. (float n) in
output_string (sprintf "\\ocwindent{%2.2fem}\n" space)
18.1emEnd of lines.
(end_line ()) ends a line. (used for code only).
let end_line () =
leave_math ();
output_string "\\ocweol\n"
let end_line_string () =
output_string "\\endgraf\n"
19.1emKeywords.
Caml keywords and base type are stored in two hash tables, and the two
functions is_caml_keyword and is_base_type make the corresponding
tests.
The function output_keyword prints a keyword, with different macros
for base types and keywords.
let build_table l =
let h = Hashtbl.create 101 in
List.iter (fun key → Hashtbl.add h key ()) l;
Hashtbl.mem h
let is_caml_keyword =
build_table
[ "and"; "as"; "assert"; "begin"; "class";
"constraint"; "do"; "done"; "downto"; "else"; "end"; "exception";
"external"; "false"; "for"; "fun"; "function"; "functor"; "if";
"in"; "include"; "inherit"; "initializer"; "lazy"; "let"; "match";
"method"; "module"; "mutable"; "new"; "object"; "of"; "open";
"or"; "parser"; "private"; "rec"; "sig"; "struct"; "then"; "to";
"true"; "try"; "type"; "val"; "virtual"; "when"; "while"; "with";
"mod"; "land"; "lor"; "lxor"; "lsl"; "lsr"; "asr"
]
let is_base_type =
build_table
[ "string"; "int"; "array"; "unit"; "bool"; "char"; "list"; "option";
"float"; "ref" ]
let is_lex_keyword =
build_table
[ "rule"; "let"; "and"; "parse"; "eof" ]
let is_yacc_keyword =
build_table
[ "%token"; "%left"; "%right"; "%type"; "%start"; "%nonassoc"; "%prec";
"error" ]
let is_keyword s = is_base_type s ∨ is_caml_keyword s
let output_keyword s =
if is_base_type s then
output_string "\\ocwbt{"
else
output_string "\\ocwkw{";
output_string s;
output_string "}"
let output_lex_keyword s =
output_string "\\ocwlexkw{";
output_string s;
output_string "}"
let output_yacc_keyword s =
output_string "\\ocwyacckw{";
if String.get s 0 = '%'
then output_string "\\";
output_string s;
output_string "}"
20.1emIdentifiers.
The function output_raw_ident prints an identifier,
escaping the TEX reserved characters with output_escaped_char.
The function output_ident prints an identifier, calling
output_keyword if necessary.
let output_escaped_char c =
if c = '^'
∨ c = '~'
then leave_math();
match c with
∣ '\\'
→
output_string "\\symbol{92}"
∣ '$'
∣ '#'
∣ '%'
∣ '&'
∣ '{'
∣ '}'
∣ '_'
→
output_char '\\'
; output_char c
∣ '^'
∣ '~'
→
output_char '\\'
; output_char c; output_string "{}"
∣ '<'
∣ '>'
→
output_string "\\ensuremath{"; output_char c; output_string "}"
∣ _ →
output_char c
let output_latex_id s =
for i = 0 to String.length s − 1 do
output_escaped_char s.[i]
done
type char_type = Upper ∣ Lower ∣ Symbol
let what_char = function
∣ 'A'
..'Z'
∣ '\192'
..'\214'
∣ '\216'
..'\222'
→ Upper
∣ 'a'
..'z'
∣'\223'
..'\246'
∣ '\248'
..'\255'
∣ '_'
→ Lower
∣ _ → Symbol
let what_is_first_char s =
if String.length s > 0 then what_char s.[0] else Lower
let output_raw_ident_in_index s =
begin match what_is_first_char s with
∣ Upper → output_string "\\ocwupperid{"
∣ Lower → output_string "\\ocwlowerid{"
∣ Symbol → output_string "\\ocwsymbolid{"
end;
output_latex_id s;
output_string "}"
let output_raw_ident s =
begin match what_is_first_char s with
∣ Upper → output_string "\\ocwupperid{"
∣ Lower → output_string "\\ocwlowerid{"
∣ Symbol → output_string "\\ocwsymbolid{"
end;
try
let qualification = Filename.chop_extension s in
(∗ We extract the qualified name. ∗)
let qualified_name =
String.sub s (String.length qualification + 1)
(String.length s − String.length qualification − 1)
in
(∗ We check now whether the qualified term is a lower id or not. ∗)
match qualified_name.[0] with
∣ 'A'
..'Z'
→
(∗ The qualified term is a module or a constructor: nothing to change. ∗)
output_latex_id (s);
output_string "}"
∣ _ →
(∗ The qualified term is a value or a type:
\\ocwlowerid
used instead. ∗)
output_latex_id (qualification ^ ".");
output_string "}";
output_string "\\ocwlowerid{";
output_latex_id qualified_name;
output_string "}"
with Invalid_argument _ →
(∗ The string s is a module name or a constructor: nothing to do. ∗)
output_latex_id s;
output_string "}"
let output_ident s =
if is_keyword s then begin
leave_math (); output_keyword s
end else begin
enter_math (); output_raw_ident s
end
let output_lex_ident s =
if is_lex_keyword s then begin
leave_math (); output_lex_keyword s
end else begin
enter_math ();
output_string "\\ocwlexident{";
output_latex_id s;
output_string "}";
end
let output_yacc_ident s =
if is_yacc_keyword s then begin
leave_math (); output_yacc_keyword s
end else begin
enter_math ();
output_string "\\ocwyaccident{";
output_latex_id s;
output_string "}";
end
21.1emSymbols.
Some mathematical symbols are printed in a nice way, in order
to get a more readable code.
The type variables from 'a
to 'd
are printed as Greek
letters for the same reason.
let output_symbol = function
∣ "*" → enter_math (); output_string "\\times{}"
∣ "**" → enter_math (); output_string "*\\!*"
∣ "->" → enter_math (); output_string "\\rightarrow{}"
∣ "<-" → enter_math (); output_string "\\leftarrow{}"
∣ "<=" → enter_math (); output_string "\\le{}"
∣ ">=" → enter_math (); output_string "\\ge{}"
∣ "<>" → enter_math (); output_string "\\not="
∣ "==" → enter_math (); output_string "\\equiv"
∣ "!=" → enter_math (); output_string "\\not\\equiv"
∣ "~-" → enter_math (); output_string "-"
∣ "[<" → enter_math (); output_string "[\\langle{}"
∣ ">]" → enter_math (); output_string "\\rangle{}]"
∣ "<" ∣ ">" ∣ "(" ∣ ")" ∣ "[" ∣ "]" ∣ "[|" ∣ "|]" as s →
enter_math (); output_string s
∣ "&" ∣ "&&" →
enter_math (); output_string "\\land{}"
∣ "or" ∣ "||" →
enter_math (); output_string "\\lor{}"
∣ "not" → enter_math (); output_string "\\lnot{}"
∣ "[]" → enter_math (); output_string "[\\,]"
∣ "|" → enter_math (); output_string "\\mid{}"
∣ s → output_latex_id s
let use_greek_letters = ref true
let output_tv id =
output_string "\\ocwtv{"; output_latex_id id; output_char '}'
let output_greek l =
enter_math (); output_char '\\'
; output_string l; output_string "{}"
let output_type_variable id =
if ¬ !use_greek_letters then
output_tv id
else
match id with
∣ "a" → output_greek "alpha"
∣ "b" → output_greek "beta"
∣ "c" → output_greek "gamma"
∣ "d" → output_greek "delta"
∣ "e" → output_greek "varepsilon"
∣ "i" → output_greek "iota"
∣ "k" → output_greek "kappa"
∣ "l" → output_greek "lambda"
∣ "m" → output_greek "mu"
∣ "n" → output_greek "nu"
∣ "r" → output_greek "rho"
∣ "s" → output_greek "sigma"
∣ "t" → output_greek "tau"
∣ _ → output_tv id
let output_ascii_char n =
output_string (sprintf "\\symbol{%d}" n)
22.1emConstants.
let output_integer s =
let n = String.length s in
let base b =
let v = String.sub s 2 (n − 2) in
output_string (sprintf "\\ocw%sconst{%s}" b v)
in
if n > 1 then
match s.[1] with
∣ 'x'
∣ 'X'
→ base "hex"
∣ 'o'
∣ 'O'
→ base "oct"
∣ 'b'
∣ 'B'
→ base "bin"
∣ _ → output_string s
else
output_string s
let output_float s =
try
let i = try String.index s 'e'
with Not_found → String.index s 'E'
in
let m = String.sub s 0 i in
let e = String.sub s (succ i) (String.length s − i − 1) in
if m = "1" then
output_string (sprintf "\\ocwfloatconstexp{%s}" e)
else
output_string (sprintf "\\ocwfloatconst{%s}{%s}" m e)
with Not_found →
output_string s
23.1emComments.
let output_bc () = leave_math (); output_string "\\ocwbc{}"
let output_ec () = leave_math (); output_string "\\ocwec{}"
let output_hfill () = leave_math (); output_string "\\hfill "
let output_byc () = leave_math (); output_string "\\ocwbyc{}"
let output_eyc () = leave_math (); output_string "\\ocweyc{}"
24.1emStrings.
let output_bs () = leave_math (); output_string "\\ocwstring{\""
let output_es () = output_string "\"}"
let output_vspace () = output_string "\\ocwvspace{}"
25.1emReset of the output machine.
let reset_output () =
math_mode := false
26.1emSectioning commands.
let begin_section () =
output_string "\\allowbreak\\ocwsection\n"
let output_typeout_command filename =
output_string "\\typeout{OcamlWeb file ";
output_string filename;
output_string "}\n"
let output_module module_name =
if ¬ !short then begin
output_typeout_command (module_name^".ml");
output_string "\\ocwmodule{";
output_latex_id module_name;
output_string "}\n"
end
let output_interface module_name =
if ¬ !short then begin
output_typeout_command (module_name^".mli");
output_string "\\ocwinterface{";
output_latex_id module_name;
output_string "}\n"
end
let output_lexmodule module_name =
if ¬ !short then begin
output_typeout_command (module_name^".mll");
output_string "\\ocwlexmodule{";
output_latex_id module_name;
output_string "}\n"
end
let output_yaccmodule module_name =
if ¬ !short then begin
output_typeout_command (module_name^".mly");
output_string "\\ocwyaccmodule{";
output_latex_id module_name;
output_string "}\n"
end
let begin_code () =
if ¬ !in_code then output_string "\\ocwbegincode{}";
in_code := true
let end_code () =
if !in_code then output_string "\\ocwendcode{}";
in_code := false
let begin_dcode () =
output_string "\\ocwbegindcode{}"
let end_dcode () =
output_string "\\ocwenddcode{}"
let begin_code_paragraph () =
if ¬ !last_is_code then output_string "\\medskip\n";
last_is_code := true
let end_code_paragraph is_last_paragraph =
if is_last_paragraph then end_line() else output_string "\\medskip\n\n"
let begin_doc_paragraph is_first_paragraph n =
if ¬ is_first_paragraph then indentation n;
last_is_code := false
let end_doc_paragraph () =
output_string "\n"
27.1emIndex.
It is opened and closed with the two macros ocwbeginindex
and
ocwendindex
.
The auxiliary function print_list is a generic function to print a
list with a given printing function and a given separator.
let begin_index () =
output_string "\n\n\\ocwbeginindex{}\n"
let end_index () =
output_string "\n\n\\ocwendindex{}\n"
let print_list print sep l =
let rec print_rec = function
∣ [ ] → ()
∣ [x] → print x
∣ x::r → print x; sep(); print_rec r
in
print_rec l
28.1emIndex in WEB style.
The function output_index_entry prints one entry line, given the
name of the entry, and two lists of pre-formatted sections labels,
like 1–4,7,10–17, of type string elem list.
The first list if printed in bold face (places where the identifier is
defined) and the second one in roman (places where it is used).
type α elem = Single of α ∣ Interval of α × α
let output_ref r = output_string (sprintf "\\ref{%s}" r)
let output_elem = function
∣ Single r →
output_ref r
∣ Interval (r1,r2) →
output_ref r1;
output_string "--";
output_ref r2
let output_bf_elem n =
output_string "\\textbf{"; output_elem n; output_string "}"
let output_index_entry s t def use =
let sep () = output_string ", " in
output_string "\\ocwwebindexentry{";
enter_math ();
output_raw_ident_in_index s;
leave_math ();
if t ≠ "" then output_string (" " ^ t);
output_string "}{";
print_list output_bf_elem sep def;
output_string "}{";
if def ≠ [ ] ∧ use ≠ [ ] then output_string ", ";
print_list output_elem sep use;
output_string "}\n"
29.1emIndex in LATEX style.
When we are not in WEB style, the index in left to LATEX, and all
the work is done by the macro \ocwrefindexentry
, which takes
three arguments: the name of the entry and the two lists of labels where
it is defined and used, respectively.
let output_raw_index_entry s t def use =
let sep () = output_string ","
and sep′ () = output_string ", " in
output_string "\\ocwrefindexentry{";
enter_math ();
output_raw_ident_in_index s;
leave_math ();
if t ≠ "" then output_string (" " ^ t);
output_string "}{";
print_list output_string sep def;
output_string "}{";
print_list output_string sep use;
output_string "}{";
print_list output_ref sep′ def;
output_string "}{";
print_list output_ref sep′ use;
output_string "}\n"
let output_label l =
output_string "\\label{"; output_string l; output_string "}%\n"
30.1emIn actions, we reuse the location type for lex files.
open Lex_syntax
open Yacc_syntax
let dummy_loc =
{ start_pos = Lexing.dummy_pos;
end_pos = Lexing.dummy_pos;
start_line = 0 ;
start_col = 0 }
31.1emYacc tokens.
%token Ttoken Tstart Ttype Tleft Tright Tnonassoc Tprec Terror
%token <
Yacc_syntax.ident
> Tident
%token <
Yacc_syntax.location
> Taction Ttypedecl
%token Tor Tsemicolon Tcolon Tmark
%token EOF
%start yacc_definitions
%type <
Yacc_syntax.yacc_definitions
> yacc_definitions
32.1emStart symbol for yacc description files
yacc_definitions::=
∣ header tokendecls Tmark rules header EOF
{
{ header = $1 ;
decls = $2;
rules = $4;
trailer = $5 }
}
header ::=
∣ Taction
{
$1
}
∣ /∗ ε ∗/
{
dummy_loc
}
33.1emToken declarations.
tokendecls ::=
∣ tokendecl tokendecls
{
$1::$2
}
∣ /∗epsilon∗/
{
[ ]
}
tokendecl ::=
∣ Ttoken Ttypedecl idlist
{
Typed_tokens($2,$3)
}
∣ Ttoken idlist
{
Untyped_tokens($2)
}
∣ Ttype Ttypedecl idlist
{
Non_terminals_type($2,$3)
}
∣ Tstart idlist
{
Start_symbols($2)
}
∣ Tleft idlist
{
Tokens_assoc($2)
}
∣ Tnonassoc idlist
{
Tokens_assoc($2)
}
∣ Tright idlist
{
Tokens_assoc($2)
}
idlist::=
∣ Tident
{
[$1]
}
∣ Tident idlist
{
$1 :: $2
}
34.1emParsing of rules.
rules::=
∣ /∗ ε ∗/
{
[ ]
}
∣ general_rule rules
{
$1 :: $2
}
Ocamlyacc manual asks for a semicolon at end of each rules. But ocamlyacc accepts if they are missing. We issue a warning for non conformity to ocamlyacc documentation.
general_rule::=
∣ rule Tsemicolon
{
$1
}
∣ rule
{
Yacc_syntax.issue_warning "ocamlyacc documentation recommends adding a semicolon at end of each grammar rules";
$1
}
rule ::=
∣ Tident Tcolon right_part
{
($1,$3)
}
∣ Tident Tcolon Tor right_part
{
($1,$4)
}
right_part ::=
∣ word Taction
{
[($1,$2)]
}
∣ word Taction Tor right_part
{
($1,$2) :: $4
}
word ::=
∣ /∗ ε ∗/
{
[ ]
}
∣ Tident word
{
$1 :: $2
}
∣ Tprec Tident word
{
$2 :: $3
}
∣ Terror word
{
$2
}
open Lex_syntax
open Yacc_syntax
open Yacc_parser
Auxiliaries for the lexical analyzer
let brace_depth = ref 0
and comment_depth = ref 0
and mark_count = ref 0
exception Lexical_error of string × int × int
let handle_lexical_error fn lexbuf =
let line = !current_line_num
and column = Lexing.lexeme_start lexbuf − !current_line_start_pos in
try
fn lexbuf
with Lexical_error(msg, _, _) →
raise(Lexical_error(msg, line, column))
36.1emyacc keywords
let keyword_table = Hashtbl.create 17
let _ =
List.iter (fun (kwd, tok) → Hashtbl.add keyword_table kwd tok)
[ "token", Ttoken;
"start", Tstart;
"type", Ttype;
"left", Tleft;
"right", Tright;
"nonassoc", Tnonassoc;
"prec", Tprec ]
let keyword_token lexbuf =
try
Hashtbl.find keyword_table (Lexing.lexeme lexbuf)
with
Not_found →
raise(Lexical_error
("unknown keyword " ^ String.escaped(Lexing.lexeme lexbuf),
!current_line_num, Lexing.lexeme_start lexbuf − !current_line_start_pos))
let cur_loc lexbuf =
{ start_pos = Lexing.lexeme_start_p lexbuf;
end_pos = Lexing.lexeme_end_p lexbuf;
start_line = !current_line_num;
start_col = Lexing.lexeme_start lexbuf − !current_line_start_pos }
let reset_lexer f lexbuf =
current_file_name := f;
mark_count := 0;
current_line_num := 1;
current_line_start_pos := 0;
current_lexbuf := lexbuf
}
37.1emmain rule for tokens in yacc files
rule main = parse
∣ [' '
'\013'
'\009'
'\012'
] +
{
main lexbuf
}
Although few grammar files include commas anywhere commas are
skipped in yacc. The original yacc code is used for ocaml. See
yacc/reader.c:nextc(),read_grammar()
from the ocaml 3.04 distribution.
We issue a warning for non conformity to ocamlyacc documentation.
∣ ','
{
issue_warning
"use of commas in mly files is allowed but not conform to ocamlyacc documentation";
main lexbuf
}
∣ '\010'
{
current_line_start_pos := Lexing.lexeme_end lexbuf;
incr current_line_num;
main lexbuf
}
∣ "/*"
{
handle_lexical_error yacc_comment lexbuf;
main lexbuf
}
∣ ['A'
–'Z'
'a'
–'z'
] ['A'
–'Z'
'a'
–'z'
'\''
'_'
'0'
–'9'
] ⋆
{
match Lexing.lexeme lexbuf with
"error" → Terror
∣ s → let l = cur_loc lexbuf in
Tident (s,l)
}
∣ '{'
{
let n1 = Lexing.lexeme_end_p lexbuf
and l1 = !current_line_num
and s1 = !current_line_start_pos in
brace_depth := 1;
let n2 = handle_lexical_error action lexbuf in
Taction({start_pos = n1; end_pos = n2;
start_line = l1; start_col = n1.Lexing.pos_cnum − s1})
}
∣ '|'
{
Tor
}
∣ ';'
{
Tsemicolon
}
∣ ':'
{
Tcolon
}
∣ '%'
{
yacc_keyword lexbuf
}
∣ '<'
{
let n1 = Lexing.lexeme_end_p lexbuf
and l1 = !current_line_num
and s1 = !current_line_start_pos in
let n2 = handle_lexical_error typedecl lexbuf in
Ttypedecl({start_pos = n1; end_pos = n2;
start_line = l1; start_col = n1.Lexing.pos_cnum − s1})
}
∣ eof
{
EOF
}
∣ _
{
raise(Lexical_error
("illegal character " ^ String.escaped(Lexing.lexeme lexbuf),
!current_line_num, Lexing.lexeme_start lexbuf − !current_line_start_pos))
}
and yacc_keyword = parse
∣ '%'
{
incr mark_count;
if !mark_count = 1 then Tmark else
let n1 = Lexing.lexeme_end_p lexbuf
and l1 = !current_line_num
and s1 = !current_line_start_pos in
brace_depth := 0;
let n2 = handle_lexical_error action lexbuf in
Taction({start_pos = n1; end_pos = n2;
start_line = l1; start_col = n1.Lexing.pos_cnum − s1})
}
∣ '{'
{
let n1 = Lexing.lexeme_end_p lexbuf
and l1 = !current_line_num
and s1 = !current_line_start_pos in
brace_depth := 1;
let n2 = handle_lexical_error action lexbuf in
Taction({start_pos = n1; end_pos = n2;
start_line = l1; start_col = n1.Lexing.pos_cnum − s1})
}
∣ ['a'
–'z'
] +
{
keyword_token lexbuf
}
∣ _
{
raise(Lexical_error
("illegal character " ^ String.escaped(Lexing.lexeme lexbuf),
!current_line_num, Lexing.lexeme_start lexbuf − !current_line_start_pos))
}
38.1emrecognizes a CAML action
and action = parse
∣ '{'
{
incr brace_depth;
action lexbuf
}
∣ '}'
{
decr brace_depth;
if !brace_depth = 0
then Lexing.lexeme_start_p lexbuf else action lexbuf
}
∣ "%}"
{
decr brace_depth;
if !brace_depth = 0 then Lexing.lexeme_start_p lexbuf else
raise(Lexical_error
("ill-balanced brace ",
!current_line_num, Lexing.lexeme_start lexbuf − !current_line_start_pos))
}
∣ '"'
{
string lexbuf;
action lexbuf
}
∣ "’" [^ '\\'
] "’"
{
action lexbuf
}
∣ "’" '\\'
['\\'
'\''
'n'
't'
'b'
'r'
] "’"
{
action lexbuf
}
∣ "’" '\\'
['0'
–'9'
] ['0'
–'9'
] ['0'
–'9'
] "’"
{
action lexbuf
}
∣ "(*"
{
comment_depth := 1;
comment lexbuf;
action lexbuf
}
∣ eof
{
if !brace_depth = 0 then Lexing.lexeme_start_p lexbuf else
raise (Lexical_error("unterminated action", 0, 0))
}
∣ '\010'
{
current_line_start_pos := Lexing.lexeme_end lexbuf;
incr current_line_num;
action lexbuf
}
∣ _
{
action lexbuf
}
39.1emrecognizes a CAML type between < and >
and typedecl = parse
∣ '>'
{
Lexing.lexeme_start_p lexbuf
}
∣ eof
{
raise (Lexical_error("unterminated type declaration", 0, 0))
}
∣ '\010'
{
current_line_start_pos := Lexing.lexeme_end lexbuf;
incr current_line_num;
typedecl lexbuf
}
∣ "->"
{
typedecl lexbuf
}
∣ _
{
typedecl lexbuf
}
and string = parse
'"'
{
()
}
∣ '\\'
[' '
'\013'
'\009'
'\012'
] ⋆ '\010'
[' '
'\013'
'\009'
'\012'
] ⋆
{
current_line_start_pos := Lexing.lexeme_end lexbuf;
incr current_line_num;
string lexbuf
}
∣ '\\'
['\\'
'"'
'n'
't'
'b'
'r'
]
{
string lexbuf
}
∣ '\\'
['0'
–'9'
] ['0'
–'9'
] ['0'
–'9'
]
{
string lexbuf
}
∣ eof
{
raise(Lexical_error("unterminated string", 0, 0))
}
∣ '\010'
{
current_line_start_pos := Lexing.lexeme_end lexbuf;
incr current_line_num;
string lexbuf
}
∣ _
{
string lexbuf
}
and comment = parse
"(*"
{
incr comment_depth; comment lexbuf
}
∣ "*)"
{
decr comment_depth;
if !comment_depth = 0 then () else comment lexbuf
}
∣ '"'
{
string lexbuf;
comment lexbuf
}
∣ "’’"
{
comment lexbuf
}
∣ "’" [^ '\\'
'\''
] "’"
{
comment lexbuf
}
∣ "’\\" ['\\'
'\''
'n'
't'
'b'
'r'
] "’"
{
comment lexbuf
}
∣ "’\\" ['0'
–'9'
] ['0'
–'9'
] ['0'
–'9'
] "’"
{
comment lexbuf
}
∣ eof
{
raise(Lexical_error("unterminated comment", 0, 0))
}
∣ '\010'
{
current_line_start_pos := Lexing.lexeme_end lexbuf;
incr current_line_num;
comment lexbuf
}
∣ _
{
comment lexbuf
}
and yacc_comment = parse
∣ "*/"
{
()
}
∣ eof
{
raise(Lexical_error("unterminated yacc comment", 0, 0))
}
∣ '\010'
{
current_line_start_pos := Lexing.lexeme_end lexbuf;
incr current_line_num;
yacc_comment lexbuf
}
∣ _
{
yacc_comment lexbuf
}
40.1emThat module exports to global tables used and defined, indexed by identifiers (strings) and containing respectively the sets of locations where they are defined and used. Those locations are of type where, which contain the name of the file and the absolute position in the source.
type where = { w_filename : string; w_loc : int }
type entry_type =
∣ Value
∣ Constructor
∣ Field
∣ Label
∣ Type
∣ Exception
∣ Module
∣ ModuleType
∣ Class
∣ Method
∣ LexParseRule (∗ CAMLLEX entry points ∗)
∣ RegExpr (∗ CAMLLEX regular expressions ∗)
∣ YaccNonTerminal (∗ CAMLYACC non-terminal symbols ∗)
∣ YaccTerminal (∗ CAMLYACC terminal symbols, i.e. tokens ∗)
type index_entry = { e_name : string; e_type : entry_type }
module Idmap : Map.S with type key = index_entry
module Stringset : Set.S with type elt = string
module Whereset : Set.S with type elt = where
val used : Whereset.t Idmap.t ref
val defined : Whereset.t Idmap.t ref
41.1emThe two following functions fill the above tables for a given file.
val cross_implem : string → string → unit
val cross_interf : string → string → unit
cross-referencing lex and yacc description files
val cross_lex : string → string → unit
val cross_yacc : string → string → unit
42.1emCross references inside Caml files are kept in the following two
global tables, which keep the places where things are defined and
used, to produce the final indexes.
type where = { w_filename : string; w_loc : int }
module Whereset = Set.Make(struct type t = where let compare = compare end)
type entry_type =
∣ Value
∣ Constructor
∣ Field
∣ Label
∣ Type
∣ Exception
∣ Module
∣ ModuleType
∣ Class
∣ Method
∣ LexParseRule (∗ CAMLLEX entry points ∗)
∣ RegExpr (∗ CAMLLEX regular expressions ∗)
∣ YaccNonTerminal (∗ CAMLYACC non-terminal symbols ∗)
∣ YaccTerminal (∗ CAMLYACC terminal symbols, i.e. tokens ∗)
type index_entry = { e_name : string; e_type : entry_type }
module Idmap = Map.Make(struct type t = index_entry let compare = compare end)
let defined = ref Idmap.empty
let used = ref Idmap.empty
43.1emThe function add_global is a generic function to add an entry in one
table. add_def is used to add the definition of an identifier (so in the
table defined).
let add_global table k i =
try
let s = Idmap.find k !table in
table := Idmap.add k (Whereset.add i s) !table
with Not_found →
table := Idmap.add k (Whereset.singleton i) !table
let current_location loc =
{ w_filename = !current_file;
w_loc = !current_offset + loc.loc_start.pos_cnum }
let add_def loc t s =
if String.length s > 0 then
let e = { e_name = s; e_type = t } in
add_global defined e (current_location loc)
44.1emAnother table, locals, keeps the bound variables, in order to
distinguish them from global identifiers. Then the function add_uses
registers that an identifier is used (in the table used), taking care
of the fact that it is not a bound variable (in the table locals).
add_uses_q iters add_uses on a qualified identifier.
module Stringset = Set.Make(struct type t = string let compare = compare end)
let locals = ref Stringset.empty
let reset_cross f offs =
assert (Stringset.cardinal !locals = 0);
locals := Stringset.empty;
current_file := f;
current_offset := offs
let add_local s =
locals := Stringset.add s !locals
let is_uppercase = function 'A'
..'Z'
→ true ∣ _ → false
let add_uses loc t s =
if String.length s > 0 ∧
¬ (is_keyword s) ∧ ¬ (Stringset.mem s !locals)
then
let e = { e_name = s; e_type = t } in
add_global used e (current_location loc)
let add_uses_q loc t q =
let rec addmod = function
∣ Lident s → add_uses loc Module s
∣ Ldot (q,s) → addmod q; add_uses loc Module s
∣ Lapply (q1,q2) → addmod q1; addmod q2
in
match q with
∣ Lident s → add_uses loc t s
∣ Ldot (q,s) → addmod q; add_uses loc t s
∣ Lapply (q1,q2) → addmod q1; addmod q2
45.1emSome useful functions.
let iter_fst f = List.iter (fun x → f (fst x))
let iter_snd f = List.iter (fun x → f (snd x))
let option_iter f = function None → () ∣ Some x → f x
46.1emWhen traversing a pattern, we must collect all its identifiers, in order
to declare them as bound variables (or definitions behind a let
construction). That is the job of the function ids_of_a_pattern.
Then pattern_for_def declares all the identifiers of a pattern as
new definitions.
let ids_of_a_pattern p =
let r = ref [ ] in
let add id = r := id :: !r in
let rec pattern_d = function
∣ Ppat_any → ()
∣ Ppat_var id → add id
∣ Ppat_alias (p,id) → add id; pattern p
∣ Ppat_constant _ → ()
∣ Ppat_tuple pl → List.iter pattern pl
∣ Ppat_construct (_,po,_) → option_iter pattern po
∣ Ppat_record l → iter_snd pattern l
∣ Ppat_array pl → List.iter pattern pl
∣ Ppat_or (p1,p2) → pattern p1; pattern p2
∣ Ppat_constraint (p,_) → pattern p
∣ Ppat_variant (_,po) → option_iter pattern po
∣ Ppat_type _ → ()
and pattern p =
pattern_d p.ppat_desc
in
pattern p; !r
let pattern_for_def p =
let loc = p.ppat_loc in
let ids = ids_of_a_pattern p in
List.iter (add_def loc Value) ids
47.1emThe following function locally adds some given variables to the set of
bound variables, during the time of the application of a given function
on a given argument.
let bind_variables ids f x =
let save = !locals in
List.iter add_local ids;
f x;
locals := save
48.1emTraversing of Caml abstract syntax trees.
Each type t in those abstract
syntax trees is associated to a function tr_t which traverses it,
declaring the identifiers used and defined. Those types are defined
in the Caml module interface Paresetree.mli contained in the Caml source
distribution.
The following code is quite code, but systematic and easy to understand.
49.1emCore types.
let rec tr_core_type t =
tr_core_type_desc t.ptyp_loc t.ptyp_desc
and tr_core_type_desc loc = function
∣ Ptyp_any ∣ Ptyp_var _ →
()
∣ Ptyp_arrow (l,t1,t2) →
add_def loc Label l; tr_core_type t1; tr_core_type t2
∣ Ptyp_tuple tl →
List.iter tr_core_type tl
∣ Ptyp_constr (q,tl) →
add_uses_q loc Type q; List.iter tr_core_type tl
∣ Ptyp_object l →
List.iter tr_core_field_type l
∣ Ptyp_class (id,l,ll) →
add_uses_q loc Class id;
List.iter (add_def loc Label) ll;
List.iter tr_core_type l
∣ Ptyp_alias (ct,_) →
tr_core_type ct
∣ Ptyp_variant (l,_,_) →
List.iter tr_row_field l
∣ Ptyp_poly (_,t) →
tr_core_type t
and tr_row_field = function
∣ Rtag (_,_,ctl) → List.iter tr_core_type ctl
∣ Rinherit t → tr_core_type t
and tr_core_field_type ft =
tr_core_field_desc ft.pfield_loc ft.pfield_desc
and tr_core_field_desc loc = function
∣ Pfield (id,ct) →
add_uses loc Method id;
tr_core_type ct
∣ Pfield_var → ()
50.1emType expressions for the class language.
let tr_class_infos f p =
add_def p.pci_loc Class p.pci_name;
f p.pci_expr
51.1emValue expressions for the core language.
let bind_pattern f (p,e) =
bind_variables (ids_of_a_pattern p) f e
let bind_patterns f pl e =
let ids = List.flatten (List.map ids_of_a_pattern pl) in
bind_variables ids f e
let rec tr_expression e =
tr_expression_desc e.pexp_loc e.pexp_desc
and tr_expression_desc loc = function
∣ Pexp_ident q →
add_uses_q loc Value q
∣ Pexp_apply (e,lel) →
tr_expression e;
List.iter (fun (l,e) → add_uses loc Label l; tr_expression e) lel
∣ Pexp_ifthenelse (e1,e2,e3) →
tr_expression e1; tr_expression e2; option_iter tr_expression e3
∣ Pexp_sequence (e1,e2) →
tr_expression e1; tr_expression e2
∣ Pexp_while (e1,e2) →
tr_expression e1; tr_expression e2
∣ Pexp_tuple el →
List.iter tr_expression el
∣ Pexp_construct (q,e,_) →
add_uses_q loc Constructor q;
option_iter tr_expression e
∣ Pexp_function (l,eo,pel) →
add_def loc Label l;
option_iter tr_expression eo;
List.iter (bind_pattern tr_expression) pel
∣ Pexp_match (e,pel) →
tr_expression e; List.iter (bind_pattern tr_expression) pel
∣ Pexp_try (e,pel) →
tr_expression e; List.iter (bind_pattern tr_expression) pel
∣ Pexp_let (recf,pel,e) →
let pl = List.map fst pel in
if recf = Recursive then
iter_snd (bind_patterns tr_expression pl) pel
else
iter_snd tr_expression pel;
bind_patterns tr_expression pl e
∣ Pexp_record (l,e) →
iter_fst (add_uses_q loc Field) l; iter_snd tr_expression l;
option_iter tr_expression e
∣ Pexp_field (e,q) →
tr_expression e; add_uses_q loc Field q
∣ Pexp_setfield (e1,q,e2) →
tr_expression e1; add_uses_q loc Field q; tr_expression e2
∣ Pexp_array el →
List.iter tr_expression el
∣ Pexp_for (i,e1,e2,_,e) →
tr_expression e1; tr_expression e2; bind_variables [i] tr_expression e
∣ Pexp_constraint (e,t1,t2) →
tr_expression e; option_iter tr_core_type t1; option_iter tr_core_type t2
∣ Pexp_when (e1,e2) →
tr_expression e1; tr_expression e2
∣ Pexp_letmodule (x,m,e) →
tr_module_expr m; bind_variables [x] tr_expression e
∣ Pexp_constant _ →
()
∣ Pexp_send (e,id) →
add_uses loc Method id; tr_expression e
∣ Pexp_new id →
add_uses_q loc Class id
∣ Pexp_setinstvar (id,e) →
add_uses loc Value id; tr_expression e
∣ Pexp_override l →
iter_fst (add_uses loc Method) l; iter_snd tr_expression l
∣ Pexp_variant (_,eo) →
option_iter tr_expression eo
∣ Pexp_assert e →
tr_expression e
∣ Pexp_assertfalse →
()
∣ Pexp_lazy e →
tr_expression e
∣ Pexp_poly (e, t) →
tr_expression e; option_iter tr_core_type t
∣ Pexp_object cs →
tr_class_structure cs
52.1emValue descriptions.
and tr_value_description vd =
tr_core_type vd.pval_type
53.1emType declarations.
and tr_type_declaration td =
tr_type_kind td.ptype_loc td.ptype_kind;
option_iter tr_core_type td.ptype_manifest
and tr_type_kind loc = function
∣ Ptype_abstract → ()
∣ Ptype_variant (cl,_) →
iter_fst (add_def loc Constructor) cl;
iter_snd (List.iter tr_core_type) cl
∣ Ptype_record (fl,_) →
List.iter (fun (f,_,t) → add_def loc Field f; tr_core_type t) fl
and tr_exception_declaration ed =
List.iter tr_core_type ed
54.1emType expressions for the class language.
and tr_class_type c =
tr_class_type_desc c.pcty_loc c.pcty_desc
and tr_class_type_desc loc = function
∣ Pcty_constr (id,l) →
add_uses_q loc Class id;
List.iter tr_core_type l
∣ Pcty_signature cs →
tr_class_signature cs
∣ Pcty_fun (l,co,cl) →
add_def loc Label l;
tr_core_type co;
tr_class_type cl
and tr_class_signature (ct,l) =
tr_core_type ct;
List.iter tr_class_type_field l
and tr_class_type_field = function
∣ Pctf_inher ct →
tr_class_type ct
∣ Pctf_val (id,_,ct,loc) →
add_def loc Value id;
option_iter tr_core_type ct
∣ Pctf_virt (id,_,ct,loc) →
add_def loc Method id;
tr_core_type ct
∣ Pctf_meth (id,_,ct,loc) →
add_def loc Method id;
tr_core_type ct
∣ Pctf_cstr (ct1,ct2,_) →
tr_core_type ct1;
tr_core_type ct2
and tr_class_description x = tr_class_infos tr_class_type x
and tr_class_type_declaration x = tr_class_infos tr_class_type x
55.1emValue expressions for the class language.
and tr_class_expr ce = tr_class_expr_desc ce.pcl_loc ce.pcl_desc
and tr_class_expr_desc loc = function
∣ Pcl_constr (id,l) →
add_uses_q loc Class id;
List.iter tr_core_type l
∣ Pcl_structure cs →
tr_class_structure cs
∣ Pcl_fun (l,eo,p,ce) →
add_def loc Label l;
option_iter tr_expression eo;
bind_variables (ids_of_a_pattern p) tr_class_expr ce
∣ Pcl_apply (ce,l) →
tr_class_expr ce;
List.iter (fun (l,e) → add_uses loc Label l; tr_expression e) l
∣ Pcl_let (recf,pel,ce) →
let pl = List.map fst pel in
if recf = Recursive then
iter_snd (bind_patterns tr_expression pl) pel
else
iter_snd tr_expression pel;
bind_patterns tr_class_expr pl ce
∣ Pcl_constraint (ce,ct) →
tr_class_expr ce;
tr_class_type ct
and tr_class_structure (p,l) =
List.iter (fun f → bind_pattern tr_class_field (p,f)) l
and tr_class_field = function
∣ Pcf_inher (ce,_) →
tr_class_expr ce
∣ Pcf_val (id,_,e,loc) →
add_def loc Value id;
tr_expression e
∣ Pcf_virt(id,_,ct,loc) →
add_def loc Method id;
tr_core_type ct
∣ Pcf_meth (id,_,e,loc) →
add_def loc Method id;
tr_expression e
∣ Pcf_cstr (ct1,ct2,_) →
tr_core_type ct1;
tr_core_type ct2
∣ Pcf_let (recf,pel,_) →
let pl = List.map fst pel in
if recf = Recursive then
iter_snd (bind_patterns tr_expression pl) pel
else
iter_snd tr_expression pel
∣ Pcf_init e →
tr_expression e
and tr_class_declaration x = tr_class_infos tr_class_expr x
56.1emType expressions for the module language.
and tr_module_type mt =
tr_module_type_desc mt.pmty_loc mt.pmty_desc
and tr_module_type_desc loc = function
∣ Pmty_ident id →
add_uses_q loc ModuleType id
∣ Pmty_signature s →
tr_signature s
∣ Pmty_functor (id,mt1,mt2) →
tr_module_type mt1;
bind_variables [id] tr_module_type mt2
∣ Pmty_with (mt,cl) →
tr_module_type mt;
List.iter
(fun (id,c) → add_uses_q loc Type id; tr_with_constraint loc c) cl
and tr_signature s =
List.iter tr_signature_item s
and tr_signature_item i =
tr_signature_item_desc i.psig_loc i.psig_desc
and tr_signature_item_desc loc = function
∣ Psig_value (x,vd) →
add_def loc Value x; tr_value_description vd
∣ Psig_type l →
iter_fst (add_def loc Type) l; iter_snd tr_type_declaration l
∣ Psig_exception (id,ed) →
add_def loc Exception id; tr_exception_declaration ed
∣ Psig_module (id,mt) →
add_def loc Module id; tr_module_type mt
∣ Psig_recmodule l →
List.iter (fun (id,mt) → add_def loc Module id; tr_module_type mt) l
∣ Psig_modtype (id,mtd) →
add_def loc ModuleType id; tr_modtype_declaration mtd
∣ Psig_open q →
add_uses_q loc Module q
∣ Psig_include mt →
tr_module_type mt
∣ Psig_class l →
List.iter tr_class_description l
∣ Psig_class_type l →
List.iter tr_class_type_declaration l
and tr_modtype_declaration = function
∣ Pmodtype_abstract → ()
∣ Pmodtype_manifest mt → tr_module_type mt
and tr_with_constraint loc = function
∣ Pwith_type td → tr_type_declaration td
∣ Pwith_module id → add_uses_q loc Module id
57.1emValue expressions for the module language.
and tr_module_expr me =
tr_module_expr_desc me.pmod_loc me.pmod_desc
and tr_module_expr_desc loc = function
∣ Pmod_ident id →
add_uses_q loc Module id
∣ Pmod_structure s →
tr_structure s
∣ Pmod_functor (id,mt,me) →
tr_module_type mt;
bind_variables [id] tr_module_expr me
∣ Pmod_apply (me1,me2) →
tr_module_expr me1;
tr_module_expr me2
∣ Pmod_constraint (me,mt) →
tr_module_expr me;
tr_module_type mt
and tr_structure l =
List.iter tr_structure_item l
and tr_structure_item i =
tr_structure_item_desc i.pstr_loc i.pstr_desc
and tr_structure_item_desc loc = function
∣ Pstr_eval e →
tr_expression e
∣ Pstr_value (_,pel) →
iter_fst pattern_for_def pel; iter_snd tr_expression pel
∣ Pstr_primitive (id,vd) →
add_def loc Value id; tr_value_description vd
∣ Pstr_type l →
iter_fst (add_def loc Type) l; iter_snd tr_type_declaration l
∣ Pstr_exception (id,ed) →
add_def loc Exception id; tr_exception_declaration ed
∣ Pstr_module (id,me) →
add_def loc Module id; tr_module_expr me
∣ Pstr_recmodule l →
List.iter
(fun (id,mt,me) →
add_def loc Module id; tr_module_type mt; tr_module_expr me) l
∣ Pstr_modtype (id,mt) →
add_def loc ModuleType id; tr_module_type mt
∣ Pstr_open m →
add_uses_q loc Module m
∣ Pstr_class l →
List.iter tr_class_declaration l
∣ Pstr_class_type l →
List.iter tr_class_type_declaration l
∣ Pstr_exn_rebind (id,q) →
add_def loc Exception id;
add_uses_q loc Exception q
∣ Pstr_include me →
tr_module_expr me
58.1emGiven all that collecting functions, we can now define two functions
cross_implem and cross_interf which respectively compute the
cross-references in implementations and interfaces.
let zero = { pos_fname = ""; pos_lnum = 0; pos_bol = 0; pos_cnum = 9 }
let add_module m =
add_def { loc_start = zero; loc_end = zero; loc_ghost = false } Module m
let wrapper parsing_function traverse_function f m =
reset_cross f 0;
add_module m;
let c = open_in f in
let lexbuf = Lexing.from_channel c in
try
traverse_function (parsing_function lexbuf);
close_in c
with Syntaxerr.Error _ ∣ Syntaxerr.Escape_error ∣ Lexer.Error _ → begin
if ¬ !quiet then
eprintf " ** warning: syntax error while parsing %s\n" f;
close_in c
end
let cross_implem = wrapper Parse.implementation tr_structure
let cross_interf = wrapper Parse.interface tr_signature
59.1emcross-referencing lex and yacc description files
let input_string_inside_file ic loc =
seek_in ic loc.Lex_syntax.start_pos.pos_cnum;
let len =
loc.Lex_syntax.end_pos.pos_cnum − loc.Lex_syntax.start_pos.pos_cnum
in
let buf = String.create len in
try
really_input ic buf 0 len;
buf
with End_of_file → assert false
let lexer_function_inside_file ic loc =
seek_in ic loc.Lex_syntax.start_pos.pos_cnum;
let left =
ref (loc.Lex_syntax.end_pos.pos_cnum − loc.Lex_syntax.start_pos.pos_cnum)
in
fun buf len →
let m = input ic buf 0 (min !left len) in
for i=0 to pred m do
if String.get buf i = '$'
then String.set buf i ' '
done;
left := !left − m;
m
let cross_action_inside_file msg f m loc =
reset_cross f loc.Lex_syntax.start_pos.pos_cnum;
let c = open_in f in
let lexbuf = Lexing.from_function (lexer_function_inside_file c loc) in
try
tr_structure (Parse.implementation lexbuf);
close_in c
with Syntaxerr.Error _ ∣ Syntaxerr.Escape_error ∣ Lexer.Error _ → begin
if ¬ !quiet then begin
eprintf "File \"%s\", character %d\n"
f loc.Lex_syntax.start_pos.pos_cnum;
eprintf " ** warning: syntax error while parsing %s\n" msg
end;
close_in c
end
let cross_type_inside_file f m loc =
reset_cross f (loc.Lex_syntax.start_pos.pos_cnum − 7);
let c = open_in f in
let lexbuf =
Lexing.from_string ("type t=" ^ input_string_inside_file c loc) in
try
tr_structure (Parse.implementation lexbuf);
close_in c
with Syntaxerr.Error _ ∣ Syntaxerr.Escape_error ∣ Lexer.Error _ → begin
if ¬ !quiet then begin
eprintf "File \"%s\", character %d\n"
f loc.Lex_syntax.start_pos.pos_cnum;
eprintf " ** warning: syntax error while parsing type\n"
end;
close_in c
end
let transl_loc loc =
{ loc_start = loc.Lex_syntax.start_pos;
loc_end = loc.Lex_syntax.end_pos;
loc_ghost = false }
60.1emcross-referencing lex description files
let rec add_used_regexps f m r =
match r with
Lex_syntax.Ident (id,loc) →
add_uses (transl_loc loc) RegExpr id
∣ Lex_syntax.Sequence(r1,r2) →
add_used_regexps f m r1;
add_used_regexps f m r2
∣ Lex_syntax.Alternative(r1,r2) →
add_used_regexps f m r1;
add_used_regexps f m r2
∣ Lex_syntax.Repetition(r) → add_used_regexps f m r
∣ Lex_syntax.Epsilon
∣ Lex_syntax.Characters _ → ()
let traverse_lex_defs f m lexdefs =
(∗ Caution : header, actions and trailer must be traversed last,
since traversing an action changes the location offset ∗)
(∗ traverse named regexps ∗)
List.iter
(fun (id,loc,regexp) →
add_def (transl_loc loc) RegExpr id;
add_used_regexps f m regexp)
lexdefs.Lex_syntax.named_regexps;
traverse lexer rules
List.iter
(fun (id,loc,rules) →
add_def (transl_loc loc) LexParseRule id;
List.iter
(fun (r,_) → add_used_regexps f m r)
rules)
lexdefs.Lex_syntax.entrypoints;
(∗ now we can traverse actions ∗)
(∗ traverse header ∗)
cross_action_inside_file "header" f m lexdefs.Lex_syntax.header;
(∗ traverse actions ∗)
List.iter
(fun (id,loc,rules) →
List.iter
(fun (regexp,action) →
add_used_regexps f m regexp;
cross_action_inside_file "action" f m action)
rules)
lexdefs.Lex_syntax.entrypoints;
(∗ traverse trailer ∗)
cross_action_inside_file "trailer" f m lexdefs.Lex_syntax.trailer
let cross_lex f m =
reset_cross f 0;
add_module m;
let c = open_in f in
let lexbuf = Lexing.from_channel c in
try
let lexdefs = Lex_parser.lexer_definition Lex_lexer.main lexbuf in
traverse_lex_defs f m lexdefs;
close_in c
with Parsing.Parse_error ∣ Lex_lexer.Lexical_error _ → begin
if ¬ !quiet then
eprintf " ** warning: syntax error while parsing lex file %s\n" f;
close_in c
end
61.1emcross-referencing yacc description files
let traverse_yacc f m yacc_defs =
(∗ Caution : header, actions and trailer must be traversed last,
since traversing an action changes the location offset ∗)
(∗ traverse decls ∗)
let tokens =
List.fold_left
(fun acc decl →
match decl with
∣ Yacc_syntax.Typed_tokens(typ,idl) →
List.fold_left
(fun acc (id,loc) →
add_def (transl_loc loc) YaccTerminal id;
Stringset.add id acc)
acc
idl
∣ Yacc_syntax.Untyped_tokens(idl) →
List.fold_left
(fun acc (id,loc) →
add_def (transl_loc loc) YaccTerminal id;
Stringset.add id acc)
acc
idl
∣ Yacc_syntax.Non_terminals_type(typ,idl) →
List.iter
(fun (id,loc) →
add_uses (transl_loc loc) YaccNonTerminal id)
idl;
acc
∣ Yacc_syntax.Start_symbols(idl) →
List.iter
(fun (id,loc) →
add_uses (transl_loc loc) YaccNonTerminal id)
idl;
acc
∣ Yacc_syntax.Tokens_assoc(idl) →
List.iter
(fun (id,loc) →
add_uses (transl_loc loc) YaccTerminal id)
idl;
acc)
Stringset.empty
yacc_defs.Yacc_syntax.decls
in
(∗ traverse grammar rules ∗)
List.iter
(fun ((id,loc),rhss) →
add_def (transl_loc loc) YaccNonTerminal id;
List.iter
(fun (rhs,_) →
List.iter
(fun (id,loc) →
if Stringset.mem id tokens
then add_uses (transl_loc loc) YaccTerminal id
else add_uses (transl_loc loc) YaccNonTerminal id)
rhs)
rhss)
yacc_defs.Yacc_syntax.rules;
(∗ now let’s traverse types, actions, header, trailer ∗)
(∗ traverse header ∗)
cross_action_inside_file "header" f m yacc_defs.Yacc_syntax.header;
(∗ traverse types in decls ∗)
List.iter
(function
∣ Yacc_syntax.Typed_tokens(typ,idl) →
cross_type_inside_file f m typ
∣ Yacc_syntax.Non_terminals_type(typ,idl) →
cross_type_inside_file f m typ
∣ _ → ())
yacc_defs.Yacc_syntax.decls;
(∗ traverse actions ∗)
List.iter
(fun (_,rhss) →
List.iter
(fun (_,action) →
cross_action_inside_file "action" f m action)
rhss)
yacc_defs.Yacc_syntax.rules;
(∗ traverse trailer ∗)
cross_action_inside_file "trailer" f m yacc_defs.Yacc_syntax.trailer
let cross_yacc f m =
reset_cross f 0;
add_module m;
let c = open_in f in
let lexbuf = Lexing.from_channel c in
try
Yacc_lexer.reset_lexer f lexbuf;
let yacc_defs = Yacc_parser.yacc_definitions Yacc_lexer.main lexbuf in
traverse_yacc f m yacc_defs;
close_in c
with
∣ Parsing.Parse_error → begin
Yacc_syntax.issue_warning "syntax error";
close_in c
end
∣ Yacc_lexer.Lexical_error(msg,line,col) → begin
Yacc_syntax.issue_warning ("lexical error (" ^ msg ^ ")");
close_in c
end
62.1emThe following functions pretty-print the paragraphs of code and
documentation, respectively. The boolean argument indicates
whether the given paragraph is the last one for
pretty_print_code or the first one for pretty_print_doc.
val pretty_print_code : bool → string → unit
val pretty_print_doc : bool → bool × int × string → unit
63.1emThese three functions pretty-print subparagraphs of Caml code,
Camllex code and Camlyacc code respectively
val pretty_print_caml_subpar : string → unit
val pretty_print_lex_subpar : string → unit
val pretty_print_yacc_subpar : string → unit
64.1emThis function sets values in order to reset the lexer, so we could
call it on an another file.
val reset_pretty : unit → unit
val count_spaces : string → int
65.1em
{
open Printf
open Output
open Lexing
66.1emGlobal variables and functions used by the lexer.
comment_depth indicates how many opening-braces we saw, so we know how
many closing-braces we have to look for, in order to respect
caml’s-specifications concerning comments imbrication.
let comment_depth = ref 0
Accounts for braket-depth in order to imbricate them.
let bracket_depth = ref 0
Set a reference on the starting character when we see \verb.
let verb_delim = ref (Char.chr 0)
counts occurences of "
let yaccdoublepercentcounter = ref 0
This function returns the first char of a lexbuf.
let first_char lexbuf = lexeme_char lexbuf 0
The count_spaces function acccounts for spaces in order to respect
alignment (in the LATEX-outputed file) concerning left margins.
let count_spaces s =
let c = ref 0 in
for i = 0 to String.length s − 1 do
if s.[i] = '\t'
then
c := !c + (8 − (!c mod 8))
else
incr c
done;
!c
This boolean value is true if we enter in math mode, false otherwise.
let user_math_mode = ref false
user_math function does everything is needed to set the math mode
when it is called, particularly it checks/sets the user_math_mode
value as needed.
let user_math () =
if ¬ !user_math_mode then begin
user_math_mode := true;
enter_math ()
end else begin
user_math_mode := false;
leave_math ()
end
Checks if we are in maths mode and prints char c considering the case.
let check_user_math c =
if !user_math_mode then output_char c else output_escaped_char c
This function sets values in order to reset the lexer, so we could call it
on an another file.
let reset_pretty () =
reset_output ();
yaccdoublepercentcounter := 0;
user_math_mode := false
}
67.1emShortcuts for regular expressions.
let space = [' '
'\t'
]
let lowercase = ['a'
–'z'
'\223'
–'\246'
'\248'
–'\255'
'_'
]
let uppercase = ['A'
–'Z'
'\192'
–'\214'
'\216'
–'\222'
]
(∗ This is for the identifiers as specified in caml’s specifications. ∗)
let identchar =
['A'
–'Z'
'a'
–'z'
'_'
'\192'
–'\214'
'\216'
–'\246'
'\248'
–'\255'
'\''
'0'
–'9'
]
let identifier = (lowercase ∣ uppercase) identchar⋆
(∗ This one helps protecting special caracters. ∗)
let symbolchar =
['!'
'$'
'%'
'&'
'*'
'+'
'-'
'.'
'/'
':'
'<'
'='
'>'
'?'
'@'
'^'
'|'
'~'
]
let caml_token =
"[" ∣ "]" ∣ "[|" ∣ "|]" ∣ "[<" ∣ ">]" ∣ "{" ∣ "}" ∣ "{<" ∣ ">}" ∣ "[]"
∣ "(" ∣ ")" ∣ "or" ∣ "not" ∣ "||"
let symbol_token =
caml_token ∣ (symbolchar +)
let character =
"’" ( [^ '\\'
'\''
] ∣ '\\'
['\\'
'\''
'n'
't'
'b'
'r'
]
∣ '\\'
['0'
–'9'
] ['0'
–'9'
] ['0'
–'9'
] ) "’"
let decimal_literal = ['0'
–'9'
] +
let hex_literal = '0'
['x'
'X'
] ['0'
–'9'
'A'
–'F'
'a'
–'f'
] +
let oct_literal = '0'
['o'
'O'
] ['0'
–'7'
] +
let bin_literal = '0'
['b'
'B'
] ['0'
–'1'
] +
let float_literal =
['0'
–'9'
] + ('.'
['0'
–'9'
]⋆)? (['e'
'E'
] ['+'
'-'
]? ['0'
–'9'
] +)?
68.1emPretty-printing of code. Main entry points for Caml and Lex and
Yacc files, counts for spaces in order to respect alignment. The
following function pretty-prints some code and assumes that we are
at the beginning of a line.
rule pr_camlcode = parse
∣ space⋆ {
let n = count_spaces (lexeme lexbuf) in indentation n;
pr_camlcode_inside lexbuf; pr_camlcode lexbuf
}
∣ eof {
leave_math ()
}
and pr_lexcode = parse
∣ space⋆ {
let n = count_spaces (lexeme lexbuf) in indentation n;
pr_lexcode_inside lexbuf; pr_lexcode lexbuf
}
∣ eof {
leave_math ()
}
and pr_yacccode = parse
∣ space⋆ {
let n = count_spaces (lexeme lexbuf) in indentation n;
pr_yacccode_inside lexbuf; pr_yacccode lexbuf
}
∣ eof {
leave_math ()
}
69.1emThat function pretty-prints the Caml code anywhere else.
and pr_camlcode_inside = parse
∣ '\n'
{
end_line ()
}
∣ space +
{
output_char '~'
; pr_camlcode_inside lexbuf
}
∣ character
{
output_verbatim (lexeme lexbuf); pr_camlcode_inside lexbuf
}
∣ "’" identifier
{
let id = lexeme lexbuf in
output_type_variable (String.sub id 1 (pred (String.length id)));
pr_camlcode_inside lexbuf
}
∣ "(*r"
{
output_hfill (); output_bc (); comment_depth := 1;
pr_comment lexbuf; pr_camlcode_inside lexbuf
}
∣ "(*"
{
output_bc (); comment_depth := 1;
pr_comment lexbuf; pr_camlcode_inside lexbuf
}
∣ '"'
{
output_bs (); pr_code_string lexbuf; pr_camlcode_inside lexbuf
}
∣ symbol_token
{
output_symbol (lexeme lexbuf); pr_camlcode_inside lexbuf
}
∣ (identifier '.'
)⋆ identifier
{
output_ident (lexeme lexbuf); pr_camlcode_inside lexbuf
}
∣ eof
{
()
}
∣ decimal_literal ∣ hex_literal ∣ oct_literal ∣ bin_literal
{
output_integer (lexeme lexbuf); pr_camlcode_inside lexbuf
}
∣ float_literal
{
output_float (lexeme lexbuf); pr_camlcode_inside lexbuf
}
∣ _
{
output_escaped_char (first_char lexbuf); pr_camlcode_inside lexbuf
}
70.1emThat function pretty-prints the Lex code anywhere else.
and pr_lexcode_inside = parse
∣ '_'
{
output_string "\\ocwlexwc"; pr_lexcode_inside lexbuf
}
∣ '*'
{
enter_math ();
output_string "^\\star{}";
pr_lexcode_inside lexbuf
}
∣ '+'
{
enter_math ();
output_string "^{\\scriptscriptstyle +}";
pr_lexcode_inside lexbuf
}
∣ '-'
{
leave_math ();
output_string "--";
pr_lexcode_inside lexbuf
}
∣ '|'
{
enter_math (); output_string "\\mid{}"; pr_lexcode_inside lexbuf
}
∣ '\n'
{
end_line ()
}
∣ space +
{
output_char '~'
; pr_lexcode_inside lexbuf
}
∣ character
{
output_verbatim (lexeme lexbuf); pr_lexcode_inside lexbuf
}
∣ "(*" {
output_bc (); comment_depth := 1;
pr_comment lexbuf; pr_lexcode_inside lexbuf
}
∣ "(*r"
{
output_hfill (); output_bc (); comment_depth := 1;
pr_comment lexbuf; pr_lexcode_inside lexbuf
}
∣ '"'
{
output_bs (); pr_code_string lexbuf; pr_lexcode_inside lexbuf
}
∣ identifier
{
output_lex_ident (lexeme lexbuf); pr_lexcode_inside lexbuf
}
∣ eof {
()
}
∣ _
{
output_escaped_char (first_char lexbuf); pr_lexcode_inside lexbuf
}
71.1emThat function pretty-prints the Yacc code anywhere else.
and pr_yacccode_inside = parse
∣ "%%"
{
incr yaccdoublepercentcounter;
output_string
(if !yaccdoublepercentcounter = 1
then "\\ocwyaccrules"
else "\\ocwyacctrailer");
pr_yacccode_inside lexbuf
}
∣ "%{"
{
output_string "\\ocwyaccopercentbrace";
pr_yacccode_inside lexbuf
}
∣ "%}"
{
output_string "\\ocwyacccpercentbrace";
pr_yacccode_inside lexbuf
}
∣ ":"
{
output_string "\\ocwyacccolon";
pr_yacccode_inside lexbuf
}
∣ ";"
{
output_string "\\ocwyaccendrule";
pr_yacccode_inside lexbuf
}
∣ "|"
{
output_string "\\ocwyaccpipe";
pr_yacccode_inside lexbuf
}
∣ '\n'
{
end_line ();
}
∣ space +
{
output_char '~'
; pr_yacccode_inside lexbuf
}
∣ "/*r"
{
output_hfill (); output_byc (); pr_yacc_comment lexbuf;
pr_yacccode_inside lexbuf
}
∣ "/*"
{
output_byc (); pr_yacc_comment lexbuf; pr_yacccode_inside lexbuf
}
∣ '%'
? identifier
{
output_yacc_ident (lexeme lexbuf); pr_yacccode_inside lexbuf
}
∣ _
{
output_escaped_char (first_char lexbuf); pr_yacccode_inside lexbuf
}
∣ eof
{
()
}
72.1emComments.
and pr_comment = parse
∣ "(*"
{
output_bc ();
incr comment_depth;
pr_comment lexbuf
}
∣ "*)"
{
output_ec ();
decr comment_depth;
if !comment_depth > 0 then pr_comment lexbuf
}
∣ '"'
{
output_bs ();
pr_code_string lexbuf;
pr_comment lexbuf;
}
∣ '['
{
if !user_math_mode then
output_char '['
else begin
bracket_depth := 1;
begin_dcode (); escaped_code lexbuf; end_dcode ()
end;
pr_comment lexbuf
}
∣ eof
{
()
}
∣ "\\$"
{
output_string (lexeme lexbuf); pr_comment lexbuf
}
∣ '$'
{
user_math(); pr_comment lexbuf
}
∣ _
{
output_char (first_char lexbuf); pr_comment lexbuf
}
The C_like_comments are not inbricable
and pr_yacc_comment = parse
∣ "*/" {
output_eyc ();
}
∣ '\n'
space⋆ '*'
' '
{
output_string "\n "; pr_yacc_comment lexbuf
}
∣ '['
{
if !user_math_mode then
output_char '['
else begin
bracket_depth := 1;
begin_dcode (); escaped_code lexbuf; end_dcode ()
end;
pr_yacc_comment lexbuf
}
∣ eof {
()
}
∣ "\\$" {
output_string (lexeme lexbuf); pr_yacc_comment lexbuf
}
∣ '$'
{
user_math(); pr_yacc_comment lexbuf
}
∣ _ {
output_char (first_char lexbuf); pr_yacc_comment lexbuf
}
73.1emStrings in code.
and pr_code_string = parse
∣ '"'
{
output_es ()
}
∣ '\n'
{
end_line_string (); pr_code_string lexbuf
}
∣ ' '
{
output_vspace (); pr_code_string lexbuf
}
∣ '\\'
['"'
't'
'b'
'r'
]
{
output_escaped_char '\\'
;
output_char (lexeme_char lexbuf 1);
pr_code_string lexbuf
}
∣ '\\'
'\n'
{
output_escaped_char '\\'
; end_line_string ();
pr_code_string lexbuf
}
∣ '\\'
'\\'
{
output_escaped_char '\\'
; output_escaped_char '\\'
;
pr_code_string lexbuf
}
∣ eof {
()
}
∣ '-'
{
output_ascii_char 45; pr_code_string lexbuf
}
∣ _ {
output_escaped_char (first_char lexbuf); pr_code_string lexbuf
}
74.1emEscaped code.
and escaped_code = parse
∣ '['
{
output_char '['
; incr bracket_depth; escaped_code lexbuf
}
∣ ']'
{
decr bracket_depth;
if !bracket_depth > 0 then begin
output_char ']'
; escaped_code lexbuf
end else
if ¬ !user_math_mode then leave_math ()
}
∣ '"'
{
output_bs (); pr_code_string lexbuf; escaped_code lexbuf
}
∣ space +
{
output_char '~'
; escaped_code lexbuf
}
∣ character
{
output_verbatim (lexeme lexbuf); escaped_code lexbuf
}
∣ "’" identifier
{
let id = lexeme lexbuf in
output_type_variable (String.sub id 1 (pred (String.length id)));
escaped_code lexbuf
}
∣ symbol_token
{
output_symbol (lexeme lexbuf); escaped_code lexbuf
}
∣ identifier
{
output_ident (lexeme lexbuf); escaped_code lexbuf
}
∣ eof {
if ¬ !user_math_mode then leave_math ()
}
∣ decimal_literal ∣ hex_literal ∣ oct_literal ∣ bin_literal
{
output_integer (lexeme lexbuf); escaped_code lexbuf
}
∣ float_literal
{
output_float (lexeme lexbuf); escaped_code lexbuf
}
∣ _ {
output_escaped_char (first_char lexbuf); escaped_code lexbuf
}
75.1emDocumentation.
It is output ‘as is’, except for quotations.
and pr_doc = parse
∣ '['
{
if !user_math_mode then
output_char '['
else begin
bracket_depth := 1;
begin_dcode (); escaped_code lexbuf; end_dcode ()
end;
pr_doc lexbuf
}
∣ "\\$"
{
output_string (lexeme lexbuf); pr_doc lexbuf
}
∣ '$'
{
user_math(); pr_doc lexbuf
}
∣ "\\verb" _
{
verb_delim := lexeme_char lexbuf 5;
output_string (lexeme lexbuf);
pr_verb lexbuf; pr_doc lexbuf
}
∣ "\\begin{verbatim}"
{
output_string (lexeme lexbuf);
pr_verbatim lexbuf; pr_doc lexbuf
}
∣ eof
{
()
}
∣ _
{
output_char (first_char lexbuf); pr_doc lexbuf
}
and pr_doc_title = parse
∣ '['
{
if !user_math_mode then
output_char '['
else begin
bracket_depth := 1;
begin_dcode (); escaped_code lexbuf; end_dcode ()
end;
pr_doc_title lexbuf
}
∣ '.'
{
output_string ".}\\quad{}"
}
∣ eof
{
()
}
∣ _
{
output_char (first_char lexbuf); pr_doc_title lexbuf
}
and pr_verb = parse
∣ eof {
()
}
∣ _ {
let c = lexeme_char lexbuf 0 in
output_char c;
if c ≡ !verb_delim then () else pr_verb lexbuf
}
and pr_verbatim = parse
∣ "\\end{verbatim}"
{
output_string (lexeme lexbuf)
}
∣ eof {
()
}
∣ _ {
output_char (lexeme_char lexbuf 0); pr_verbatim lexbuf
}
{
76.1empretty-printing subparagraphs
let pretty_print_caml_subpar s =
pr_camlcode (Lexing.from_string s)
let pretty_print_lex_subpar s =
pr_lexcode (Lexing.from_string s)
let pretty_print_yacc_subpar s =
pr_yacccode (Lexing.from_string s)
77.1emThen we can introduce two functions pretty_print_code and
pretty_print_doc, which pretty-print respectively code and
documentation parts.
let pretty_print_code is_last_paragraph s =
pr_camlcode (Lexing.from_string s);
end_code_paragraph is_last_paragraph
let pretty_print_doc is_first_paragraph (big,n,s) =
begin_doc_paragraph is_first_paragraph n;
let lb = Lexing.from_string s in
if big then begin output_string "\\textbf{"; pr_doc_title lb end;
pr_doc lb;
end_doc_paragraph ()
78.1emThis module is the heart of the program. The only function is
produce_document, which takes a list of files and produces the
final LATEX document.
79.1emSource file structure.
A source file is splitted into paragraphs of code and
documentation. A new paragraph begins either when switching between
code and comment or, within code, when an empty line occurs.
A paragraph of documentation contains arbitrary text. A paragraph of
CAML code is arbitrary CAML source text. A paragraph of LEX/YACC
code is again a sequence of subparagraphs, which are either CAML
source (actions), CAMLLEX syntax or CAMLYACC syntax
type sub_paragraph =
∣ CamlCode of string
∣ LexCode of string
∣ YaccCode of string
type paragraph =
∣ Documentation of bool × int × string
∣ RawLaTeX of string
∣ Code of int × string
∣ LexYaccCode of int × (sub_paragraph list)
A web section is a numbered part of a source file, which contains a
sequence of paragraphs. The sec_beg field is the character
position of the beginning of the web section inside the whole file
type raw_section = {
sec_contents : paragraph list;
sec_beg : int }
Finally, the contents of a source file is a sequence of web
sections. The content_file field is the whole file name
(including dirname and extension) whereas the content_name field
is the corresponding module name
type content = {
content_file : string;
content_name : string;
content_contents : raw_section list }
80.1emA source file is either an implementation, an interface, a camllex
description, a camlyacc description, or any other file, which is
then considered as a LATEX file.
type file =
∣ Implem of content
∣ Interf of content
∣ Lex of content
∣ Yacc of content
∣ Other of string
81.1emOptions.
index indicates whether the index is to be produced; default value is true.
extern_defs indicates whether identifiers used but not defined should appear in the index; default value is false.
web indicates WEB style or not; default value is true.
add_latex_option passed an option to the ocamlweb LATEX
package.
val extern_defs : bool ref
val add_latex_option : string → unit
val index : bool ref
val web : bool ref
82.1emMain entry: production of the document from a list of files.
val produce_document : file list → unit
83.1em
type sub_paragraph =
∣ CamlCode of string
∣ LexCode of string
∣ YaccCode of string
type paragraph =
∣ Documentation of bool × int × string
∣ RawLaTeX of string
∣ Code of int × string
∣ LexYaccCode of int × (sub_paragraph list)
type raw_section = {
sec_contents : paragraph list;
sec_beg : int }
type content = {
content_file : string;
content_name : string;
content_contents : raw_section list }
type file =
∣ Implem of content
∣ Interf of content
∣ Lex of content
∣ Yacc of content
∣ Other of string
84.1emOptions of the engine.
let index = ref true
let add_latex_option s =
if !latex_options = "" then
latex_options := s
else
latex_options := !latex_options ^ "," ^ s
85.1emConstruction of the global index.
let index_file = function
∣ Implem i → cross_implem i.content_file i.content_name
∣ Interf i → cross_interf i.content_file i.content_name
∣ Yacc i → cross_yacc i.content_file i.content_name
∣ Lex i → cross_lex i.content_file i.content_name
∣ Other _ → ()
let build_index l = List.iter index_file l
86.1emThe locations tables.
module Smap = Map.Make(struct type t = string let compare = compare end)
let sec_locations = ref Smap.empty
let code_locations = ref Smap.empty
let add_loc table file ((_,s) as loc) =
let l = try Smap.find file !table with Not_found → [(0,s)] in
table := Smap.add file (loc :: l) !table
let add_par_loc =
let par_counter = ref 0 in
fun f p → match p with
∣ Code (l,_) →
incr par_counter;
add_loc code_locations f (l,!par_counter)
∣ LexYaccCode (l,_) →
incr par_counter;
add_loc code_locations f (l,!par_counter)
∣ Documentation _ → ()
∣ RawLaTeX _ → ()
let add_sec_loc =
let sec_counter = ref 0 in
fun f s →
incr sec_counter;
add_loc sec_locations f (s.sec_beg,!sec_counter);
List.iter (add_par_loc f) s.sec_contents
let add_file_loc it =
List.iter (add_sec_loc it.content_file) it.content_contents
let locations_for_a_file = function
∣ Implem i → add_file_loc i
∣ Interf i → add_file_loc i
∣ Lex i → add_file_loc i
∣ Yacc i → add_file_loc i
∣ Other _ → ()
let find_where w =
let rec lookup = function
∣ [ ] → raise Not_found
∣ (l,n) :: r → if w.w_loc ≥ l then ((w.w_filename,l),n) else lookup r
in
let table = if !web then !sec_locations else !code_locations in
lookup (Smap.find w.w_filename table)
87.1emPrinting of the index.
88.1emAlphabetic order for index entries.
To sort index entries, we define the following order relation
alpha_string. It puts symbols first (identifiers that do not begin
with a letter), and symbols are compared using Caml’s generic order
relation. For real identifiers, we first normalize them by translating
lowercase characters to uppercase ones and by removing all the accents,
and then we use Caml’s comparison.
let norm_char c = match Char.uppercase c with
∣ '\192'
..'\198'
→ 'A'
∣ '\199'
→ 'C'
∣ '\200'
..'\203'
→ 'E'
∣ '\204'
..'\207'
→ 'I'
∣ '\209'
→ 'N'
∣ '\210'
..'\214'
∣ '\216'
→ 'O'
∣ '\217'
..'\220'
→ 'U'
∣ '\221'
→ 'Y'
∣ c → c
let norm_string s =
let u = String.copy s in
for i = 0 to String.length s − 1 do
u.[i] ← norm_char s.[i]
done;
u
let alpha_string s1 s2 =
match what_is_first_char s1, what_is_first_char s2 with
∣ Symbol, Symbol → s1 < s2
∣ Symbol, _ → true
∣ _, Symbol → false
∣ _,_ → norm_string s1 < norm_string s2
let order_entry e1 e2 =
(alpha_string e1.e_name e2.e_name) ∨
(e1.e_name = e2.e_name ∧ e1.e_type < e2.e_type)
89.1emThe following function collects all the index entries and sort them
using alpha_string, returning a list.
module Idset = Set.Make(struct type t = index_entry let compare = compare end)
let all_entries () =
let s = Idmap.fold (fun x _ s → Idset.add x s) !used Idset.empty in
let s = Idmap.fold (fun x _ s → Idset.add x s) !defined s in
Sort.list order_entry (Idset.elements s)
90.1emWhen we are in LATEX style, an index entry only consists in two lists
of labels, which are treated by the LATEX macro \ocwrefindexentry
.
When we are in WEB style, we can do a bit better, replacing a list
like 1,2,3,4,7,8,10 by 1–4,7,8,10, as in usual LATEX indexes.
The following function intervals is used to group together the lists
of at least three consecutive integers.
let intervals l =
let rec group = function
∣ (acc, [ ]) → List.rev acc
∣ (Interval (s1,(_,n2)) :: acc, (f,n) :: rem) when n = succ n2 →
group (Interval (s1,(f,n)) :: acc, rem)
∣ ((Single _)::(Single (f1,n1))::acc, (f,n)::rem) when n = n1 + 2 →
group (Interval ((f1,n1),(f,n)) :: acc, rem)
∣ (acc, (f,n) :: rem) →
group ((Single (f,n)) :: acc, rem)
in
group ([ ],l)
let make_label_name (f,n) = f ^ ":" ^ (string_of_int n)
let label_list l =
List.map (fun x → make_label_name (fst x)) l
let elem_map f = function
∣ Single x → Single (f x)
∣ Interval (x,y) → Interval (f x, f y)
let web_list l =
let l = intervals l in
List.map (elem_map (fun x → make_label_name (fst x))) l
91.1emPrinting one index entry.
The function call (list_in_table id t) collects all the sections for
the identifier id in the table t, using the function find_where,
and sort the result thanks to the counter which was associated to each
new location (see section 86). It also removes the duplicates
labels.
let rec uniquize = function
∣ [ ] ∣ [_] as l → l
∣ x :: (y :: r as l) → if x = y then uniquize l else x :: (uniquize l)
let map_succeed_nf f l =
let rec map = function
∣ [ ] → [ ]
∣ x :: l → try (f x) :: (map l) with Not_found → map l
in
map l
let list_in_table id t =
try
let l = Whereset.elements (Idmap.find id t) in
let l = map_succeed_nf find_where l in
let l = Sort.list (fun x x′ → snd x < snd x′) l in
uniquize l
with Not_found →
[ ]
let entry_type_name = function
∣ Value ∣ Constructor → ""
∣ Field → "(field)"
∣ Label → "(label)"
∣ Type → "(type)"
∣ Exception → "(exn)"
∣ Module → "(module)"
∣ ModuleType → "(sig)"
∣ Class → "(class)"
∣ Method → "(method)"
∣ LexParseRule → "(camllex parsing rule)"
∣ RegExpr → "(camllex regexpr)"
∣ YaccNonTerminal → "(camlyacc non-terminal)"
∣ YaccTerminal → "(camlyacc token)"
let print_one_entry id =
let def = list_in_table id !defined in
if !extern_defs ∨ def ≠ [ ] then begin
let use = list_in_table id !used in
let s = id.e_name in
let t = entry_type_name id.e_type in
if !web then
output_index_entry s t (web_list def) (web_list use)
else
output_raw_index_entry s t (label_list def) (label_list use)
end
92.1emThen printing the index is just iterating print_one_entry on all the
index entries, given by (all_entries()).
let print_index () =
begin_index ();
List.iter print_one_entry (all_entries());
end_index ()
93.1emPretty-printing of the document.
let rec pretty_print_sub_paragraph = function
∣ CamlCode(s) →
pretty_print_caml_subpar s
∣ YaccCode(s) →
pretty_print_yacc_subpar s
∣ LexCode(s) →
pretty_print_lex_subpar s
let pretty_print_paragraph is_first_paragraph is_last_paragraph f = function
∣ Documentation (b,n,s) →
end_code ();
pretty_print_doc is_first_paragraph (b,n,s);
end_line() ∣ RawLaTeX s →
end_code ();
output_string s;
end_line()
∣ Code (l,s) →
if l > 0 then output_label (make_label_name (f,l));
begin_code_paragraph ();
begin_code ();
pretty_print_code is_last_paragraph s
∣ LexYaccCode (l,s) →
if l > 0 then output_label (make_label_name (f,l));
begin_code_paragraph ();
begin_code ();
List.iter pretty_print_sub_paragraph s;
end_code_paragraph is_last_paragraph
let pretty_print_section first f s =
if !web then begin_section ();
if first ∧ s.sec_beg > 0 then output_label (make_label_name (f,0));
output_label (make_label_name (f,s.sec_beg));
let rec loop is_first_paragraph = function
∣ [ ] →
()
∣ [ p ] →
pretty_print_paragraph is_first_paragraph true f p
∣ p :: rest →
pretty_print_paragraph is_first_paragraph false f p;
loop false rest
in
loop true s.sec_contents;
end_code ()
let pretty_print_sections f = function
∣ [ ] → ()
∣ s :: r →
pretty_print_section true f s;
List.iter (pretty_print_section false f) r
let pretty_print_content output_header content =
reset_pretty();
output_header content.content_name;
pretty_print_sections content.content_file content.content_contents
let pretty_print_file = function
∣ Implem i → pretty_print_content output_module i
∣ Interf i → pretty_print_content output_interface i
∣ Lex i → pretty_print_content output_lexmodule i
∣ Yacc i → pretty_print_content output_yaccmodule i
∣ Other f → output_file f
94.1emProduction of the document. We proceed in three steps:
let produce_document l =
List.iter locations_for_a_file l;
build_index l;
latex_header !latex_options;
List.iter pretty_print_file l;
if !index then print_index ();
latex_trailer ();
close_output ()
95.1emCaml files are represented by the record caml_file, which
contains their file names and their module names. The functions
module_name and make_caml_file are used to construct such
values.
type caml_file = { caml_filename : string; caml_module : string }
val module_name : string → string
val make_caml_file : string → caml_file
type file_type =
∣ File_impl of caml_file
∣ File_intf of caml_file
∣ File_lex of caml_file
∣ File_yacc of caml_file
∣ File_other of string
96.1emThe following function read_one_file reads a Caml file,
separating the sections, and separating the paragraphs inside the
sections. The boolean reference skip_header indicates whether the
header must be skipped. web_style records if web sections were
used anywhere in any file.
val skip_header : bool ref
val read_one_file : file_type → Web.file
97.1em
{
open Printf
open Lexing
open Output
open Web
open Pretty
98.1emGlobal variables and functions used by the lexer.
skip_header tells whether option --header
has been
selected by the user.
let skip_header = ref true
for displaying error message if any, current_file records the
name of the file currently read, and comment_or_string_start records
the starting position of the comment or the string currently being
read.
let current_file = ref ""
let comment_or_string_start = ref 0
brace_depth records the current depth of imbrication of braces
{..}
, to know in lex files whether we are in an action or
not. lexyacc_brace_start records the position of the starting brace
of the current action, to display an error message if this brace is
unclosed.
let in_lexyacc_action = ref false
let doublepercentcounter = ref 0
let brace_depth = ref 0
let lexyacc_brace_start = ref 0
web_style records if web sections were used anywhere in any file.
let web_style = ref false
global variables for temporarily recording data, for building
the Web.file structure.
let parbuf = Buffer.create 8192
let ignoring = ref false
let push_char c =
if ¬ !ignoring then Buffer.add_char parbuf c
let push_first_char lexbuf =
if ¬ !ignoring then Buffer.add_char parbuf (lexeme_char lexbuf 0)
let push_string s =
if ¬ !ignoring then Buffer.add_string parbuf s
let subparlist = ref ([ ] : sub_paragraph list)
let push_caml_subpar () =
if Buffer.length parbuf > 0 then begin
subparlist := (CamlCode (Buffer.contents parbuf)) :: !subparlist;
Buffer.clear parbuf
end
let push_lex_subpar () =
if Buffer.length parbuf > 0 then begin
subparlist := (LexCode (Buffer.contents parbuf)) :: !subparlist;
Buffer.clear parbuf
end
let push_yacc_subpar () =
if Buffer.length parbuf > 0 then begin
subparlist := (YaccCode (Buffer.contents parbuf)) :: !subparlist;
Buffer.clear parbuf
end
let parlist = ref ([ ] : paragraph list)
let code_beg = ref 0
let push_code () =
assert (List.length !subparlist = 0);
if Buffer.length parbuf > 0 then begin
parlist := (Code (!code_beg, Buffer.contents parbuf)) :: !parlist;
Buffer.clear parbuf
end
let push_lexyacccode () =
assert (Buffer.length parbuf = 0) ;
if List.length !subparlist > 0 then begin
parlist := (LexYaccCode (!code_beg, List.rev !subparlist)) :: !parlist;
subparlist := [ ]
end
let push_doc () =
if Buffer.length parbuf > 0 then begin
let doc =
Documentation (!big_section, !initial_spaces, Buffer.contents parbuf)
in
parlist := doc :: !parlist;
big_section := false;
Buffer.clear parbuf
end
let push_latex () =
if Buffer.length parbuf > 0 then begin
let doc =
RawLaTeX (Buffer.contents parbuf)
in
parlist := doc :: !parlist;
big_section := false;
Buffer.clear parbuf
end
let seclist = ref ([ ] : raw_section list)
let section_beg = ref 0
let push_section () =
if !parlist ≠ [ ] then begin
let s = { sec_contents = List.rev !parlist; sec_beg = !section_beg } in
seclist := s :: !seclist;
parlist := [ ]
end
let reset_lexer f =
current_file := f;
comment_or_string_start := 0;
section_beg := 0;
code_beg := 0;
parlist := [ ];
seclist := [ ];
in_lexyacc_action := false;
doublepercentcounter := 0
let backtrack lexbuf =
lexbuf.lex_curr_pos ← lexbuf.lex_start_pos
}
99.1emShortcuts for regular expressions.
let space = [' '
'\r'
'\t'
]
let space_or_nl = [' '
'\t'
'\r'
'\n'
]
let character =
"’" ( [^ '\\'
'\''
] ∣ '\\'
['\\'
'\''
'n'
't'
'b'
'r'
]
∣ '\\'
['0'
–'9'
] ['0'
–'9'
] ['0'
–'9'
] ) "’"
let up_to_end_of_comment =
[^ '*'
]⋆ '*'
(([^ '*'
')'
] [^ '*'
]⋆ '*'
) ∣ '*'
)⋆ ')'
100.1emEntry point to skip the headers. Returns when headers are skipped.
rule header = parse
∣ "(*"
{
comment_or_string_start := lexeme_start lexbuf;
skip_comment lexbuf;
skip_spaces_until_nl lexbuf;
header lexbuf
}
∣ "\n"
{
()
}
∣ space +
{
header lexbuf
}
∣ _
{
backtrack lexbuf
}
∣ eof
{
()
}
To skip a comment (used by header).
and skip_comment = parse
∣ "(*"
{
skip_comment lexbuf; skip_comment lexbuf
}
∣ "*)"
{
()
}
∣ eof
{
eprintf "File \"%s\", character %d\n"
!current_file !comment_or_string_start;
eprintf "Unterminated ocaml comment\n";
exit 1
}
∣ _
{
skip_comment lexbuf
}
101.1emSame as header but for OCamlYacc
and yacc_header = parse
∣ "/*"
{
comment_or_string_start := lexeme_start lexbuf;
skip_yacc_comment lexbuf;
skip_spaces_until_nl lexbuf;
yacc_header lexbuf
}
∣ "\n"
{
()
}
∣ space +
{
yacc_header lexbuf
}
∣ _
{
backtrack lexbuf
}
∣ eof
{
()
}
and skip_yacc_comment = parse
∣ "/*"
{
skip_yacc_comment lexbuf; skip_yacc_comment lexbuf
}
∣ "*/"
{
()
}
∣ eof
{
eprintf "File \"%s\", character %d\n"
!current_file !comment_or_string_start;
eprintf "Unterminated ocamlyacc comment\n";
exit 1
}
∣ _
{
skip_yacc_comment lexbuf
}
102.1emRecognizes a complete caml module body or interface, after header
has been skipped. After calling that entry, the whole text read is in
seclist.
and caml_implementation = parse
∣ _
{
backtrack lexbuf;
paragraph lexbuf;
caml_implementation lexbuf
}
∣ eof
{
push_section ()
}
recognizes a paragraph of caml code or documentation. After calling
that entry, the paragraph has been added to parlist.
and paragraph = parse
∣ space⋆ '\n'
{
paragraph lexbuf
}
∣ space⋆ ";;"
{
paragraph lexbuf
}
∣ space⋆ "(*" '*'
⋆ "*)" space⋆ '\n'
{
paragraph lexbuf
}
∣ space⋆ "(*"
{
comment_or_string_start := lexeme_start lexbuf;
let s = lexeme lexbuf in
initial_spaces := count_spaces (String.sub s 0 (String.length s − 2));
start_of_documentation lexbuf;
push_doc ()
}
∣ space⋆ ("(*c" ∣ _ )
{
code_beg := lexeme_start lexbuf;
backtrack lexbuf;
caml_subparagraph lexbuf;
push_code()
}
∣ eof
{
()
}
recognizes a whole lex description file, after header has been
skipped. After calling that entry, the whole text read is in
seclist.
and lex_description = parse
∣ _
{
backtrack lexbuf;
lex_paragraph lexbuf ;
lex_description lexbuf
}
∣ eof
{
push_section ()
}
and yacc_description = parse
∣ _
{
backtrack lexbuf ;
yacc_paragraph lexbuf;
yacc_description lexbuf
}
∣ eof
{
push_section ()
}
Recognizes a paragraph of a lex description file. After calling
that entry, the paragraph has been added to parlist.
and lex_paragraph = parse
∣ space⋆ '\n'
{
lex_paragraph lexbuf
}
∣ space⋆ "(*" '*'
⋆ "*)" space⋆ '\n'
{
lex_paragraph lexbuf
}
∣ space⋆ ("(*c" ∣ _ )
{
code_beg := lexeme_start lexbuf;
backtrack lexbuf;
lex_subparagraphs lexbuf ;
push_lexyacccode()
}
∣ space⋆ "(*"
{
comment_or_string_start := lexeme_start lexbuf;
start_of_documentation lexbuf;
push_doc ()
}
∣ eof
{
()
}
and yacc_paragraph = parse
∣ space⋆ '\n'
{
yacc_paragraph lexbuf
}
∣ space⋆ "/*" '*'
⋆ "*/" space⋆ '\n'
{
if ¬ !in_lexyacc_action
then yacc_paragraph lexbuf
else begin
code_beg := lexeme_start lexbuf;
backtrack lexbuf;
yacc_subparagraphs lexbuf ;
push_lexyacccode()
end
}
∣ space⋆ "/*"
{
if ¬ !in_lexyacc_action
then begin
comment_or_string_start := lexeme_start lexbuf;
start_of_yacc_documentation lexbuf;
push_doc ()
end
else begin
code_beg := lexeme_start lexbuf;
backtrack lexbuf;
yacc_subparagraphs lexbuf ;
push_lexyacccode()
end
}
∣ space⋆ "(*" '*'
⋆ "*)" space⋆ '\n'
{
if !in_lexyacc_action
then yacc_paragraph lexbuf
else begin
code_beg := lexeme_start lexbuf;
backtrack lexbuf;
yacc_subparagraphs lexbuf ;
push_lexyacccode()
end
}
∣ space⋆ "(*"
{
if !in_lexyacc_action
then begin
comment_or_string_start := lexeme_start lexbuf;
start_of_documentation lexbuf;
push_doc ()
end
else begin
code_beg := lexeme_start lexbuf;
backtrack lexbuf;
yacc_subparagraphs lexbuf ;
push_lexyacccode()
end
}
∣ space⋆ ("/*c" ∣ "(*c" ∣ _ )
{
code_beg := lexeme_start lexbuf;
backtrack lexbuf;
yacc_subparagraphs lexbuf ;
push_lexyacccode()
}
∣ eof
{
()
}
103.1emAt the beginning of the documentation part, just after the
"(*"
. If the first character is 's'
, then a new section is
started. After calling that entry, the parbuf buffer contains the
doc read.
and start_of_documentation = parse
∣ space_or_nl +
{
in_documentation lexbuf
}
∣ ('s'
∣ 'S'
) space_or_nl⋆
{
web_style := true; push_section ();
section_beg := lexeme_start lexbuf;
big_section := (lexeme_char lexbuf 0 ≡ 'S'
);
in_documentation lexbuf
}
∣ 'p'
up_to_end_of_comment
{
let s = lexeme lexbuf in
push_in_preamble (String.sub s 1 (String.length s − 3))
}
∣ 'i'
{
ignore lexbuf
}
∣ 'l'
{
in_documentation lexbuf; push_latex ()
}
∣ _
{
backtrack lexbuf;
in_documentation lexbuf
}
∣ eof
{
in_documentation lexbuf
}
and start_of_yacc_documentation = parse
∣ space_or_nl +
{
in_yacc_documentation lexbuf
}
∣ ('s'
∣ 'S'
) space_or_nl⋆
{
web_style := true; push_section ();
section_beg := lexeme_start lexbuf;
big_section := (lexeme_char lexbuf 0 ≡ 'S'
);
in_yacc_documentation lexbuf
}
∣ 'p'
up_to_end_of_comment
{
let s = lexeme lexbuf in
push_in_preamble (String.sub s 1 (String.length s − 3))
}
∣ 'i'
{
yacc_ignore lexbuf
}
∣ 'l'
{
in_yacc_documentation lexbuf; push_latex ()
}
∣ _
{
backtrack lexbuf;
in_yacc_documentation lexbuf
}
∣ eof
{
in_yacc_documentation lexbuf
}
104.1emInside the documentation part, anywhere after the "(*". After
calling that entry, the parbuf buffer contains the doc read.
and in_documentation = parse
∣ "(*"
{
push_string "(*";
in_documentation lexbuf;
push_string "*)";
in_documentation lexbuf
}
∣ "*)"
{
()
}
∣ '\n'
" * "
{
push_char '\n'
; in_documentation lexbuf
}
∣ '"'
{
push_char '"'
; in_string lexbuf; in_documentation lexbuf
}
∣ character
{
push_string (lexeme lexbuf); in_documentation lexbuf
}
∣ _
{
push_first_char lexbuf; in_documentation lexbuf
}
∣ eof
{
eprintf "File \"%s\", character %d\n"
!current_file !comment_or_string_start;
eprintf "Unterminated ocaml comment\n";
exit 1
}
yacc comments are NOT nested
and in_yacc_documentation = parse
∣ "*/"
{
()
}
∣ '\n'
" * "
{
push_char '\n'
; in_yacc_documentation lexbuf
}
∣ '"'
{
push_char '"'
; in_string lexbuf; in_yacc_documentation lexbuf
}
∣ character
{
push_string (lexeme lexbuf); in_yacc_documentation lexbuf
}
∣ _
{
push_first_char lexbuf; in_yacc_documentation lexbuf
}
∣ eof
{
eprintf "File \"%s\", character %d\n"
!current_file !comment_or_string_start;
eprintf "Unterminated ocamlyacc comment\n";
exit 1
}
105.1emRecognizes a subparagraph of caml code. After calling that entry,
the parbuf buffer contains the code read.
and caml_subparagraph = parse
∣ space⋆ '\n'
space⋆ '\n'
{
backtrack lexbuf
}
∣ ";;"
{
backtrack lexbuf
}
∣ eof
{
()
}
∣ "(*" ∣ "(*c"
{
comment_or_string_start := lexeme_start lexbuf;
push_string "(*";
comment lexbuf;
caml_subparagraph lexbuf
}
∣ "(*i"
{
comment_or_string_start := lexeme_start lexbuf;
ignore lexbuf; caml_subparagraph lexbuf
}
∣ '"'
{
comment_or_string_start := lexeme_start lexbuf;
push_char '"'
; in_string lexbuf; caml_subparagraph lexbuf
}
∣ '{'
{
incr brace_depth;
push_char '{'
;
caml_subparagraph lexbuf
}
∣ '}'
{
if !brace_depth = 0 then backtrack lexbuf
else
begin
decr brace_depth;
push_char '}'
;
caml_subparagraph lexbuf
end
}
∣ "%}"
{
if !brace_depth = 0 then backtrack lexbuf
else
begin
decr brace_depth;
push_string "%}";
caml_subparagraph lexbuf
end
}
∣ character
{
push_string (lexeme lexbuf); caml_subparagraph lexbuf
}
∣ _ {
push_first_char lexbuf; caml_subparagraph lexbuf
}
106.1emRecognizes a sequence of subparagraphs of lex description,
including CAML actions. After calling that entry, the subparagraphs
read are in subparlist.
and lex_subparagraphs = parse
∣ space⋆ '\n'
space⋆ '\n'
{
backtrack lexbuf
}
∣ ";;"
{
()
}
∣ eof
{
if !in_lexyacc_action
then
begin
eprintf "File \"%s\", character %d\n"
!current_file !lexyacc_brace_start;
eprintf "Unclosed brace\n" ;
exit 1
end
}
∣ '}'
{
if !in_lexyacc_action
then
begin
push_char '}'
;
in_lexyacc_action := false;
lex_subparagraph lexbuf;
push_lex_subpar();
lex_subparagraphs lexbuf
end
else
begin
eprintf "File \"%s\", character %d\n"
!current_file (lexeme_start lexbuf);
eprintf "Unexpected closing brace ";
exit 1
end
}
∣ _
{
backtrack lexbuf;
if !in_lexyacc_action
then
begin
caml_subparagraph lexbuf;
push_caml_subpar()
end
else
begin
lex_subparagraph lexbuf;
push_lex_subpar()
end;
lex_subparagraphs lexbuf
}
and yacc_subparagraphs = parse
∣ space⋆ '\n'
space⋆ '\n'
{
backtrack lexbuf
}
∣ "%%"
{
if !in_lexyacc_action then begin
push_string "%%";
caml_subparagraph lexbuf;
push_caml_subpar();
yacc_subparagraphs lexbuf
end
else begin
push_yacc_subpar();
push_string "%%";
push_yacc_subpar();
incr doublepercentcounter;
if !doublepercentcounter ≥ 2 then in_lexyacc_action := true
end
}
∣ ";;"
{
if !in_lexyacc_action then ()
else begin
push_string ";;";
yacc_subparagraph lexbuf;
push_yacc_subpar();
yacc_subparagraphs lexbuf
end
}
∣ eof
{
if !in_lexyacc_action ∧ !doublepercentcounter ≤ 1
then
begin
eprintf "File \"%s\", character %d\n"
!current_file !lexyacc_brace_start;
eprintf "Unclosed brace\n" ;
exit 1
end
}
∣ '}'
{
if !in_lexyacc_action
then
begin
push_char '}'
;
in_lexyacc_action := false;
yacc_subparagraph lexbuf;
push_yacc_subpar();
yacc_subparagraphs lexbuf
end
else
begin
eprintf "File \"%s\", character %d\n"
!current_file (lexeme_start lexbuf);
eprintf "Unexpected closing brace ";
exit 1
end
}
∣ "%}"
{
if !in_lexyacc_action
then
begin
push_string "%}";
in_lexyacc_action := false;
yacc_subparagraph lexbuf;
push_yacc_subpar();
yacc_subparagraphs lexbuf
end
else
begin
eprintf "File \"%s\", character %d\n"
!current_file (lexeme_start lexbuf);
eprintf "Unexpected closing brace ";
exit 1
end
}
∣ _
{
backtrack lexbuf;
if !in_lexyacc_action
then
begin
caml_subparagraph lexbuf;
push_caml_subpar()
end
else
begin
yacc_subparagraph lexbuf;
push_yacc_subpar()
end;
yacc_subparagraphs lexbuf
}
107.1emRecognizes a subparagraph of lex description. After
calling that entry, the subparagraph read is in parbuf.
and lex_subparagraph = parse
∣ space⋆ '\n'
space⋆ '\n'
{
backtrack lexbuf
}
∣ ";;" {
backtrack lexbuf
}
∣ eof {
()
}
∣ "(*" ∣ "(*c"
{
comment_or_string_start := lexeme_start lexbuf;
push_string "(*";
comment lexbuf;
lex_subparagraph lexbuf
}
∣ space⋆ "(*i"
{
comment_or_string_start := lexeme_start lexbuf;
ignore lexbuf; lex_subparagraph lexbuf
}
∣ '"'
{
comment_or_string_start := lexeme_start lexbuf;
push_char '"'
; in_string lexbuf; lex_subparagraph lexbuf
}
∣ '{'
{
lexyacc_brace_start := lexeme_start lexbuf;
push_char '{'
;
in_lexyacc_action := true
}
∣ character
{
push_string (lexeme lexbuf); lex_subparagraph lexbuf
}
∣ _
{
push_first_char lexbuf; lex_subparagraph lexbuf
}
and yacc_subparagraph = parse
∣ space⋆ '\n'
space⋆ '\n'
{
backtrack lexbuf
}
∣ "%%" {
backtrack lexbuf
}
∣ ";;" {
backtrack lexbuf
}
∣ eof {
()
}
∣ "/*" ∣ "/*c"
{
comment_or_string_start := lexeme_start lexbuf;
push_string "/*";
yacc_comment lexbuf;
yacc_subparagraph lexbuf
}
∣ space⋆ "/*i"
{
comment_or_string_start := lexeme_start lexbuf;
yacc_ignore lexbuf; yacc_subparagraph lexbuf
}
∣ '"'
{
comment_or_string_start := lexeme_start lexbuf;
push_char '"'
; in_string lexbuf; yacc_subparagraph lexbuf
}
∣ "%{"
{
lexyacc_brace_start := lexeme_start lexbuf;
push_string "%{";
in_lexyacc_action := true
}
∣ '{'
{
lexyacc_brace_start := lexeme_start lexbuf;
push_char '{'
;
in_lexyacc_action := true
}
∣ '<'
{
lexyacc_brace_start := lexeme_start lexbuf;
push_char '<'
;
push_yacc_subpar ();
yacc_type lexbuf;
yacc_subparagraph lexbuf
}
∣ character
{
push_string (lexeme lexbuf); yacc_subparagraph lexbuf
}
∣ _
{
push_first_char lexbuf; yacc_subparagraph lexbuf
}
and yacc_type = parse
∣ "->"
{
push_string "->"; yacc_type lexbuf
}
∣ '>'
{
push_caml_subpar(); push_char '>'
}
∣ _
{
push_first_char lexbuf; yacc_type lexbuf
}
∣ eof
{
eprintf "File \"%s\", character %d\n"
!current_file !lexyacc_brace_start;
eprintf "Unclosed ’<’";
exit 1
}
108.1emTo skip spaces until a newline.
and skip_spaces_until_nl = parse
∣ space⋆ '\n'
? {
()
}
∣ eof {
()
}
∣ _ {
backtrack lexbuf
}
109.1emTo read a comment inside a piece of code.
and comment = parse
∣ "(*" ∣ "(*c"
{
push_string "(*"; comment lexbuf; comment lexbuf
}
∣ "*)"
{
push_string "*)"
}
∣ eof
{
eprintf "File \"%s\", character %d\n"
!current_file !comment_or_string_start;
eprintf "Unterminated ocaml comment\n";
exit 1
}
∣ _
{
push_first_char lexbuf; comment lexbuf
}
and yacc_comment = parse
∣ "*/"
{
push_string "*/"
}
∣ eof
{
eprintf "File \"%s\", character %d\n"
!current_file !comment_or_string_start;
eprintf "Unterminated ocamlyacc comment\n";
exit 1
}
∣ _
{
push_first_char lexbuf; yacc_comment lexbuf
}
110.1emIgnored parts, between "(*i" and "i*)". Note that such comments
are not nested.
and ignore = parse
∣ "i*)"
{
skip_spaces_until_nl lexbuf
}
∣ eof
{
eprintf "File \"%s\", character %d\n"
!current_file !comment_or_string_start;
eprintf "Unterminated ocamlweb comment\n";
exit 1
}
∣ _
{
ignore lexbuf
}
and yacc_ignore = parse
∣ "i*/"
{
skip_spaces_until_nl lexbuf
}
∣ eof
{
eprintf "File \"%s\", character %d\n"
!current_file !comment_or_string_start;
eprintf "Unterminated ocamlweb comment\n";
exit 1
}
∣ _
{
yacc_ignore lexbuf
}
111.1emStrings in code.
and in_string = parse
∣ '"'
{
push_char '"'
}
∣ '\\'
['\\'
'"'
'n'
't'
'b'
'r'
]
{
push_string (lexeme lexbuf); in_string lexbuf
}
∣ eof
{
eprintf "File \"%s\", character %d\n"
!current_file !comment_or_string_start;
eprintf "Unterminated ocaml string\n";
exit 1
}
∣ _
{
push_first_char lexbuf; in_string lexbuf
}
{
112.1emCaml files.
type caml_file = { caml_filename : string; caml_module : string }
let module_name f = String.capitalize (Filename.basename f)
let make_caml_file f =
{ caml_filename = f;
caml_module = module_name (Filename.chop_extension f) }
type file_type =
∣ File_impl of caml_file
∣ File_intf of caml_file
∣ File_lex of caml_file
∣ File_yacc of caml_file
∣ File_other of string
113.1emReading Caml files.
let raw_read_file header entry f =
reset_lexer f;
let c = open_in f in
let buf = Lexing.from_channel c in
if !skip_header then header buf;
entry buf;
close_in c;
List.rev !seclist
let read header entry m =
{ content_file = m.caml_filename;
content_name = m.caml_module;
content_contents = raw_read_file header entry m.caml_filename; }
let read_one_file = function
∣ File_impl m → Implem (read header caml_implementation m)
∣ File_intf m → Interf (read header caml_implementation m)
∣ File_lex m → Lex (read header lex_description m)
∣ File_yacc m → Yacc (read yacc_header yacc_description m)
∣ File_other f → Other f
114.1emUsage. Printed on error output.
let usage () =
prerr_endline "";
prerr_endline "Usage: ocamlweb <options and files>";
prerr_endline " -o <file> write output in file <file>";
prerr_endline " --dvi output the DVI";
prerr_endline " --ps output the PostScript";
prerr_endline " --html output the HTML";
prerr_endline " --hevea-option <opt>";
prerr_endline " pass an option to hevea (HTML output)";
prerr_endline " -s (short) no titles for files";
prerr_endline " --noweb use manual LaTeX sectioning, not WEB";
prerr_endline " --header do not skip the headers of Caml file";
prerr_endline " --no-preamble suppress LaTeX header and trailer";
prerr_endline " --no-index do not output the index";
prerr_endline " --extern-defs keep external definitions in the index";
prerr_endline " --impl <file> consider <file> as a .ml file";
prerr_endline " --intf <file> consider <file> as a .mli file";
prerr_endline " --tex <file> consider <file> as a .tex file";
prerr_endline " --latex-option <opt>";
prerr_endline " pass an option to the LaTeX package ocamlweb.sty";
prerr_endline " --class-options <opt>";
prerr_endline " set the document class options (defaults to ‘12pt’)";
prerr_endline " --old-fullpage uses LaTeX package fullpage with no option";
prerr_endline " -p <string> insert something in LaTeX preamble";
prerr_endline " --files <file> read file names to process in <file>";
prerr_endline " --quiet quiet mode";
prerr_endline " --no-greek disable use of greek letters for type variables";
prerr_endline "";
prerr_endline
"On-line documentation at http://www.lri.fr/~filliatr/ocamlweb/\n";
exit 1
115.1emLicense informations. Printed when using the option
--warranty
.
let copying () =
prerr_endline "This program is free software; you can redistribute it and/or modifyit under the terms of the GNU Library General Public License version 2, aspublished by the Free Software Foundation.This program is distributed in the hope that it will be useful,but WITHOUT ANY WARRANTY; without even the implied warranty ofMERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.See the GNU Library General Public License version 2 for more details(enclosed in the file LGPL).";
flush stderr
116.1emBanner. Always printed. Notice that it is printed on error
output, so that when the output of ocamlweb is redirected this header
is not (unless both standard and error outputs are redirected, of
course).
let banner () =
eprintf "This is ocamlweb version %s, compiled on %s\n"
Version.version Version.date;
eprintf
"Copyright (c) 1999-2000 Jean-Christophe Filliâtre and Claude Marché\n";
eprintf
"This is free software with ABSOLUTELY NO WARRANTY (use option -warranty)\n";
flush stderr
117.1emSeparation of files. Files given on the command line are
separated according to their type, which is determined by their suffix.
Implementations and interfaces have respective suffixes .ml
and .mli
and LATEX files have suffix .tex
.
let check_if_file_exists f =
if ¬ (Sys.file_exists f) then begin
eprintf "\nocamlweb: %s: no such file\n" f;
exit 1
end
let what_file f =
check_if_file_exists f;
if check_suffix f ".ml" then
File_impl (make_caml_file f)
else if check_suffix f ".mli" then
File_intf (make_caml_file f)
else if check_suffix f ".mll" then
File_lex (make_caml_file f)
else if check_suffix f ".mly" then
File_yacc (make_caml_file f)
else if check_suffix f ".tex" then
File_other f
else begin
eprintf "\nocamlweb: don’t know what to do with %s\n" f;
exit 1
end
118.1emReading file names from a file.
File names may be given
in a file instead of being given on the command
line. (files_from_file f) returns the list of file names contained
in the file named f. These file names must be separated by spaces,
tabulations or newlines.
let files_from_file f =
let files_from_channel ch =
let buf = Buffer.create 80 in
let l = ref [ ] in
try
while true do
match input_char ch with
∣ ' '
∣ '\t'
∣ '\n'
→
if Buffer.length buf > 0 then l := (Buffer.contents buf) :: !l;
Buffer.clear buf
∣ c →
Buffer.add_char buf c
done; [ ]
with End_of_file →
List.rev !l
in
try
check_if_file_exists f;
let ch = open_in f in
let l = files_from_channel ch in
close_in ch;l
with Sys_error s → begin
eprintf "\nocamlweb: cannot read from file %s (%s)\n" f s;
exit 1
end
119.1emParsing of the command line. Output file, if specified, is kept
in output_file.
let output_file = ref ""
let dvi = ref false
let ps = ref false
let html = ref false
let hevea_options = ref ([ ] : string list)
let parse () =
let files = ref [ ] in
let add_file f = files := f :: !files in
let rec parse_rec = function
∣ [ ] → ()
∣ ("-header" ∣ "--header") :: rem →
skip_header := false; parse_rec rem
∣ ("-noweb" ∣ "--noweb" ∣ "-no-web" ∣ "--no-web") :: rem →
web := false; parse_rec rem
∣ ("-web" ∣ "--web") :: rem →
web := true; parse_rec rem
∣ ("-nopreamble" ∣ "--nopreamble" ∣ "--no-preamble") :: rem →
set_no_preamble true; parse_rec rem
∣ ("-p" ∣ "--preamble") :: s :: rem →
push_in_preamble s; parse_rec rem
∣ ("-p" ∣ "--preamble") :: [ ] →
usage ()
∣ ("-noindex" ∣ "--noindex" ∣ "--no-index") :: rem →
index := false; parse_rec rem
∣ ("-o" ∣ "--output") :: f :: rem →
output_file := f; parse_rec rem
∣ ("-o" ∣ "--output") :: [ ] →
usage ()
∣ ("-s" ∣ "--short") :: rem →
short := true; parse_rec rem
∣ ("-dvi" ∣ "--dvi") :: rem →
dvi := true; parse_rec rem
∣ ("-ps" ∣ "--ps") :: rem →
ps := true; parse_rec rem
∣ ("-html" ∣ "--html") :: rem →
html := true; parse_rec rem
∣ ("-hevea-option" ∣ "--hevea-option") :: [ ] →
usage ()
∣ ("-hevea-option" ∣ "--hevea-option") :: s :: rem →
hevea_options := s :: !hevea_options; parse_rec rem
∣ ("-extern-defs" ∣ "--extern-defs") :: rem →
extern_defs := true; parse_rec rem
∣ ("-q" ∣ "-quiet" ∣ "--quiet") :: rem →
quiet := true; parse_rec rem
∣ ("--nogreek" ∣ "--no-greek") :: rem →
use_greek_letters := false; parse_rec rem
∣ ("-h" ∣ "-help" ∣ "-?" ∣ "--help") :: rem →
banner (); usage ()
∣ ("-v" ∣ "-version" ∣ "--version") :: _ →
banner (); exit 0
∣ ("-warranty" ∣ "--warranty") :: _ →
copying (); exit 0
∣ "--class-options" :: s :: rem →
class_options := s; parse_rec rem
∣ "--class-options" :: [ ] →
usage ()
∣ "--latex-option" :: s :: rem →
add_latex_option s; parse_rec rem
∣ "--latex-option" :: [ ] →
usage ()
∣ "--old-fullpage" :: rem →
fullpage_headings := false; parse_rec rem
∣ ("-impl" ∣ "--impl") :: f :: rem →
check_if_file_exists f;
let n =
if Filename.check_suffix f ".mll" ∨ Filename.check_suffix f ".mly"
then Filename.chop_extension f else f
in
let m = File_impl { caml_filename = f; caml_module = module_name n } in
add_file m; parse_rec rem
∣ ("-impl" ∣ "--impl") :: [ ] →
usage ()
∣ ("-intf" ∣ "--intf") :: f :: rem →
check_if_file_exists f;
let i = File_intf { caml_filename = f; caml_module = module_name f } in
add_file i; parse_rec rem
∣ ("-intf" ∣ "--intf") :: [ ] →
usage ()
∣ ("-tex" ∣ "--tex") :: f :: rem →
add_file (File_other f); parse_rec rem
∣ ("-tex" ∣ "--tex") :: [ ] →
usage ()
∣ ("-files" ∣ "--files") :: f :: rem →
List.iter (fun f → add_file (what_file f)) (files_from_file f);
parse_rec rem
∣ ("-files" ∣ "--files") :: [ ] →
usage ()
∣ f :: rem →
add_file (what_file f); parse_rec rem
in
parse_rec (List.tl (Array.to_list Sys.argv));
List.rev !files
120.1emThe following function produces the output. The default output is
the LATEX document: in that case, we just call Web.produce_document.
If option -dvi
, -ps
or -html
is invoked, then
we make calls to latex
, dvips
and/or hevea
accordingly.
let locally dir f x =
let cwd = Sys.getcwd () in
try
Sys.chdir dir; let y = f x in Sys.chdir cwd; y
with e →
Sys.chdir cwd; raise e
let clean_temp_files basefile =
let remove f = try Sys.remove f with _ → () in
remove (basefile ^ ".tex");
remove (basefile ^ ".log");
remove (basefile ^ ".aux");
remove (basefile ^ ".dvi");
remove (basefile ^ ".ps");
remove (basefile ^ ".haux");
remove (basefile ^ ".html")
let clean_and_exit basefile res = clean_temp_files basefile; exit res
let cat file =
let c = open_in file in
try
while true do print_char (input_char c) done
with End_of_file →
close_in c
let copy src dst =
let cin = open_in src
and cout = open_out dst in
try
while true do Pervasives.output_char cout (input_char cin) done
with End_of_file →
close_in cin; close_out cout
let produce_output fl =
if ¬ (!dvi ∨ !ps ∨ !html) then begin
if !output_file ≠ "" then set_output_to_file !output_file;
produce_document fl
end else begin
let texfile = temp_file "ocamlweb" ".tex" in
let basefile = chop_suffix texfile ".tex" in
set_output_to_file texfile;
produce_document fl;
let command =
let file = basename texfile in
let file =
if !quiet then sprintf "’\\nonstopmode\\input{%s}’" file else file
in
sprintf "(latex %s && latex %s) 1>&2 %s" file file
(if !quiet then "> /dev/null" else "")
in
let res = locally (dirname texfile) Sys.command command in
if res ≠ 0 then begin
eprintf "Couldn’t run LaTeX successfully\n";
clean_and_exit basefile res
end;
let dvifile = basefile ^ ".dvi" in
if !dvi then begin
if !output_file ≠ "" then
(∗ we cannot use Sys.rename accross file systems ∗)
copy dvifile !output_file
else
cat dvifile
end;
if !ps then begin
let psfile =
if !output_file ≠ "" then !output_file else basefile ^ ".ps"
in
let command =
sprintf "dvips %s -o %s %s" dvifile psfile
(if !quiet then "> /dev/null 2>&1" else "")
in
let res = Sys.command command in
if res ≠ 0 then begin
eprintf "Couldn’t run dvips successfully\n";
clean_and_exit basefile res
end;
if !output_file = "" then cat psfile
end;
if !html then begin
let htmlfile =
if !output_file ≠ "" then !output_file else basefile ^ ".html"
in
let options = String.concat " " (List.rev !hevea_options) in
let command =
sprintf "hevea %s ocamlweb.sty %s -o %s %s" options texfile htmlfile
(if !quiet then "> /dev/null 2>&1" else "")
in
let res = Sys.command command in
if res ≠ 0 then begin
eprintf "Couldn’t run hevea successfully\n";
clean_and_exit basefile res
end;
if !output_file = "" then cat htmlfile
end;
clean_temp_files basefile
end
121.1emMain program. Print the banner, parse the command line,
read the files and then call produce_document from module Web.
let main () =
let files = parse() in
if List.length files > 0 then begin
let l = List.map read_one_file files in
if !web_style then begin
if ¬ !web ∧ ¬ !quiet then begin
eprintf
"Warning: web sections encountered while in noweb style, ignored.\n";
flush stderr
end
end else
web := false;
if ¬ !web then add_latex_option "noweb";
produce_output l
end
let _ = Printexc.catch main ()
This document was translated from LATEX by HEVEA.