type ichar = char * int

type regexp =
  | Epsilon
  | Character of ichar
  | Union of regexp * regexp
  | Concat of regexp * regexp
  | Star of regexp

let count = ref 0
let rec index = function
  | Epsilon -> Epsilon
  | Character (c, _) -> incr count; Character (c, !count)
  | Union (r1, r2) -> Union (index r1, index r2)
  | Concat (r1, r2) -> Concat (index r1, index r2)
  | Star r -> Star (index r)

let rec null = function
  | Epsilon | Star _ -> true
  | Character _ -> false
  | Union (r1, r2) -> null r1 || null r2
  | Concat (r1, r2) -> null r1 && null r2

module Cset = Set.Make(struct type t = ichar let compare = compare end)

let rec first = function
  | Epsilon ->
      Cset.empty
  | Character c ->
      Cset.singleton c
  | Union (r1, r2) ->
      Cset.union (first r1) (first r2)
  | Concat (r1, r2) ->
      if null r1 then Cset.union (first r1) (first r2) else first r1
  | Star r ->
      first r

let rec last = function
  | Epsilon ->
      Cset.empty
  | Character c ->
      Cset.singleton c
  | Union (r1, r2) ->
      Cset.union (last r1) (last r2)
  | Concat (r1, r2) ->
      if null r2 then Cset.union (last r1) (last r2) else last r2
  | Star r ->
      last r

let rec follow c = function
  | Epsilon | Character _ ->
      Cset.empty
  | Union (r1, r2) ->
      Cset.union (follow c r1) (follow c r2)
  | Concat (r1, r2) ->
      let s = Cset.union (follow c r1) (follow c r2) in
      if Cset.mem c (last r1) then Cset.union s (first r2) else s
  | Star r ->
      let s = follow c r in
      if Cset.mem c (last r) then Cset.union s (first r) else s

module Cmap = Map.Make(Char)
module Smap = Map.Make(Cset)

type state = Cset.t

type autom = {
  start : state;
  trans : state Cmap.t Smap.t
}

let eof = ('#', -1)

(* état résultat de la transition depuis q par c *)
let next_state r q c =
  Cset.fold
    (fun ((c',_) as ci) q' ->
       if c' = c then Cset.union q' (follow ci r) else q')
    q Cset.empty

let make_dfa r =
  let r = Concat (r, Character eof) in
  (* transitions en cours de construction *)
  let trans = ref Smap.empty in
  (* la fonction transitions construit toutes les transitions de l'état q,
     si c'est la première fois que q est visité *)
  let rec transitions q =
    if not (Smap.mem q !trans) then begin
      trans := Smap.add q Cmap.empty !trans;
      Cset.iter
        (fun (c,_) ->
           let t = Smap.find q !trans in
           if not (Cmap.mem c t) then begin
             let q' = next_state r q c in
             trans := Smap.add q (Cmap.add c q' t) !trans;
             transitions q'
           end)
        q
    end
  in
  (* on part de l'état initial q0 *)
  let q0 = first r in
  transitions q0;
  { start = q0; trans = !trans }

(* visualisation avec dot *)

open Format

let fprint_state fmt q =
  Cset.iter (fun (c,i) ->
    if c = '#' then fprintf fmt "# " else fprintf fmt "%c%i " c i) q

let fprint_transition fmt q c q' =
  fprintf fmt "\"%a\" -> \"%a\" [label=\"%c\"];@\n"
    fprint_state q
    fprint_state q'
    c

let fprint_autom fmt a =
  fprintf fmt "digraph A {@\n";
  fprintf fmt "  @[\"%a\" [ shape = \"rect\"];@\n" fprint_state a.start;
  Smap.iter
    (fun q t -> Cmap.iter (fun c q' -> fprint_transition fmt q c q') t)
    a.trans;
  fprintf fmt "@]@\n}@."

let save_autom file a =
  let ch = open_out file in
  fprintf (formatter_of_out_channel ch) "%a" fprint_autom a;
  close_out ch

(* Test avec (a|b)*a(a|b)  *)
let r1 = Concat (Star (Union (Character ('a', 1), Character ('b', 1))),
                Concat (Character ('a', 2),
                        Union (Character ('a', 3), Character ('b', 2))))
let a = make_dfa r1
let () = save_autom "autom.dot" a

(* Reconnaissance d'un mot *)

let accepting q = Cset.mem eof q

let recognize a s =
  let n = String.length s in
  let rec loop q i =
    if i = n then
      accepting q
    else
      let c = s.[i] in
      let t = Smap.find q a.trans in
      loop (Cmap.find c t) (succ i)
  in
  try loop a.start 0 with Not_found -> false

(* tests positifs *)
let () = assert (recognize a "aa")
let () = assert (recognize a "ab")
let () = assert (recognize a "abababaab")
let () = assert (recognize a "babababab")
let () = assert (recognize a (String.make 1000 'b' ^ "ab"))

(* tests négatifs *)
let () = assert (not (recognize a ""))
let () = assert (not (recognize a "a"))
let () = assert (not (recognize a "b"))
let () = assert (not (recognize a "ba"))
let () = assert (not (recognize a "aba"))
let () = assert (not (recognize a "abababaaba"))


(* Test : un nombre pair de b = (a* + ba*b)*  *)
let r2 = Star (Union (Star (Character ('a', 1)),
                     Concat (Character ('b', 1),
                             Concat (Star (Character ('a',2)),
                                     Character ('b', 2)))))
let a = make_dfa r2
let () = save_autom "autom2.dot" a

(* positifs *)
let () = assert (recognize a "")
let () = assert (recognize a "bb")
let () = assert (recognize a "aaa")
let () = assert (recognize a "aaabbaaababaaa")
let () = assert (recognize a "bbbbbbbbbbbbbb")
let () = assert (recognize a "bbbbabbbbabbbabbb")

(* négatifs *)
let () = assert (not (recognize a "b"))
let () = assert (not (recognize a "ba"))
let () = assert (not (recognize a "ab"))
let () = assert (not (recognize a "aaabbaaaaabaaa"))
let () = assert (not (recognize a "bbbbbbbbbbbbb"))
let () = assert (not (recognize a "bbbbabbbbabbbabbbb"))


(* génération d'un analyseur lexical *)

let fprint_autom_ml fmt a =
  (* on numérote tous les états *)
  let num =
    let n = ref 0 in
    Smap.fold (fun s _ num -> incr n; Smap.add s !n num) a.trans Smap.empty in
  let print_trans c s =
    if c <> '#' then
      fprintf fmt "| '%c' -> state%d b@\n" c (Smap.find s num) in
  let first = ref true in
  let print_state s tr =
    fprintf fmt "@[<hov 2>";
    if !first then begin first := false; fprintf fmt "let rec " end
    else fprintf fmt "and ";
    fprintf fmt "state%d b =@\n" (Smap.find s num);
    if accepting s then fprintf fmt "b.last <- b.current;@\n";
    fprintf fmt "match next_char b with@\n";
    Cmap.iter print_trans tr;
    fprintf fmt "| _ -> failwith \"lexical error\"";
    fprintf fmt "@]@\n"
  in
  Smap.iter print_state a.trans;
  fprintf fmt "@\nlet start = state%d@\n" (Smap.find a.start num)

let generate file a =
  let ch = open_out file in
  let fmt = formatter_of_out_channel ch in
  fprintf fmt "@[
type buffer = { text: string; mutable current: int; mutable last: int }
let next_char b =
  if b.current = String.length b.text then raise End_of_file;
  let c = b.text.[b.current] in
  b.current <- b.current + 1;
  c
";
  fprintf fmt "%a@]@." fprint_autom_ml a;
  close_out ch

(* mots alternant a et b *)
let r3 =
  Concat (Union (Character ('b', 1), Epsilon),
  Concat (Star (Concat (Character ('a', 1), Character ('b', 2))),
          Union (Character ('a', 2), Epsilon)))
let a = make_dfa r3
let () = save_autom "autom3.dot" a
let () = generate "a.ml" a

This document was generated using caml2html