type ichar = char * int

type regexp = 
  | Epsilon
  | Character of ichar
  | Union of regexp * regexp
  | Concat of regexp * regexp
  | Star of regexp

let count = ref 0
let rec index = function
  | Epsilon -> Epsilon
  | Character (c, _) -> incr count; Character (c, !count)
  | Union (r1, r2) -> Union (index r1, index r2)
  | Concat (r1, r2) -> Concat (index r1, index r2)
  | Star r -> Star (index r)

let rec null = function
  | Epsilon | Star _ -> true
  | Character _ -> false
  | Union (r1, r2) -> null r1 || null r2
  | Concat (r1, r2) -> null r1 && null r2

module Cset = Set.Make(struct type t = ichar let compare = compare end)

let rec first = function
  | Epsilon -> 
      Cset.empty
  | Character c -> 
      Cset.singleton c
  | Union (r1, r2) -> 
      Cset.union (first r1) (first r2)
  | Concat (r1, r2) ->
      if null r1 then Cset.union (first r1) (first r2) else first r1
  | Star r ->
      first r

let rec last = function
  | Epsilon -> 
      Cset.empty
  | Character c -> 
      Cset.singleton c
  | Union (r1, r2) ->
      Cset.union (last r1) (last r2)
  | Concat (r1, r2) ->
      if null r2 then Cset.union (last r1) (last r2) else last r2
  | Star r ->
      last r

let rec follow c = function
  | Epsilon | Character _ -> 
      Cset.empty
  | Union (r1, r2) ->
      Cset.union (follow c r1) (follow c r2)
  | Concat (r1, r2) ->
      let s = Cset.union (follow c r1) (follow c r2) in
      if Cset.mem c (last r1) then Cset.union s (first r2) else s
  | Star r ->
      let s = follow c r in
      if Cset.mem c (last r) then Cset.union s (first r) else s

module Cmap = Map.Make(Char)
module Smap = Map.Make(Cset)

type state = Cset.t

type autom = {
  start : state;
  trans : state Cmap.t Smap.t
}

let eof = ('#', -1)

(* état résultat de la transition depuis q par c *)
let next_state r q c = 
  Cset.fold 
    (fun ((c',_) as ci) q' -> 
       if c' = c then Cset.union q' (follow ci r) else q')
    q Cset.empty

let make_dfa r =
  let r = Concat (r, Character eof) in
  (* transitions en cours de construction *)
  let trans = ref Smap.empty in
  (* la fonction transitions construit toutes les transitions de l'état q,
     si c'est la première fois que q est visité *)
  let rec transitions q =
    if not (Smap.mem q !trans) then begin
      trans := Smap.add q Cmap.empty !trans;
      Cset.iter
        (fun (c,_) ->
           let t = Smap.find q !trans in
           if not (Cmap.mem c t) then begin
             let q' = next_state r q c in
             trans := Smap.add q (Cmap.add c q' t) !trans;
             transitions q'
           end)
        q
    end
  in
  (* on part de l'état initial q0 *)
  let q0 = first r in
  transitions q0;
  { start = q0; trans = !trans }

let accepting q = Cset.mem eof q

let recognize a s =
  let n = String.length s in
  let rec loop q i =
    if i = n then
      accepting q
    else          
      let c = s.[i] in
      let t = Smap.find q a.trans in
      loop (Cmap.find c t) (succ i)
  in
  try loop a.start 0 with Not_found -> false

(*  (a|b)*a(a|b)  *)
let r = Concat (Star (Union (Character ('a', 1), Character ('b', 1))),
                Concat (Character ('a', 2),
                        Union (Character ('a', 3), Character ('b', 2))))
let a = make_dfa r

(* tests positifs *)
let _ = recognize a "aa"
let _ = recognize a "ab"
let _ = recognize a "abababaab"
let _ = recognize a "babababab"
let _ = recognize a (String.make 1000 'b' ^ "ab")

(* tests négatifs *)
let _ = recognize a ""
let _ = recognize a "a"
let _ = recognize a "b"
let _ = recognize a "ba"
let _ = recognize a "aba"
let _ = recognize a "abababaaba"


(* un nombre pair de b (a* + ba*b)*  *)
let r = Star (Union (Star (Character ('a', 1)),
                     Concat (Character ('b', 1),
                             Concat (Star (Character ('a',2)),
                                     Character ('b', 2)))))
let a = make_dfa r

(* positifs *)
let _ = recognize a ""
let _ = recognize a "bb"
let _ = recognize a "aaa"
let _ = recognize a "aaabbaaababaaa"
let _ = recognize a "bbbbbbbbbbbbbb"
let _ = recognize a "bbbbabbbbabbbabbb"

(* négatifs *)
let _ = recognize a "b"
let _ = recognize a "ba"
let _ = recognize a "ab"
let _ = recognize a "aaabbaaaaabaaa"
let _ = recognize a "bbbbbbbbbbbbb"
let _ = recognize a "bbbbabbbbabbbabbbb"


This document was generated using caml2html