type ichar = char * int type regexp = Epsilon Character of ichar Union of regexp * regexp Concat of regexp * regexp Star of regexp let count = ref 0 let rec index = function Epsilon -> Epsilon Character (c, _) -> incr count; Character (c, !count) Union (r1, r2) -> Union (index r1, index r2) Concat (r1, r2) -> Concat (index r1, index r2) Star r -> Star (index r) let rec null = function Epsilon Star _ -> true Character _ -> false Union (r1, r2) -> null r1 || null r2 Concat (r1, r2) -> null r1 && null r2 module Cset = Set.Make(struct type t = ichar let compare = compare end) let rec first = function Epsilon -> Cset.empty Character c -> Cset.singleton c Union (r1, r2) -> Cset.union (first r1) (first r2) Concat (r1, r2) -> if null r1 then Cset.union (first r1) (first r2) else first r1 Star r -> first r let rec last = function Epsilon -> Cset.empty Character c -> Cset.singleton c Union (r1, r2) -> Cset.union (last r1) (last r2) Concat (r1, r2) -> if null r2 then Cset.union (last r1) (last r2) else last r2 Star r -> last r let rec follow c = function Epsilon Character _ -> Cset.empty Union (r1, r2) -> Cset.union (follow c r1) (follow c r2) Concat (r1, r2) -> let s = Cset.union (follow c r1) (follow c r2) in if Cset.mem c (last r1) then Cset.union s (first r2) else s Star r -> let s = follow c r in if Cset.mem c (last r) then Cset.union s (first r) else s module Cmap = Map.Make(Char) module Smap = Map.Make(Cset) type state = Cset.t type autom = { start : state; trans : state Cmap.t Smap.t } let eof = ('#', -1) (* état résultat de la transition depuis q par c *) let next_state r q c = Cset.fold (fun ((c',_) as ci) q' -> if c' = c then Cset.union q' (follow ci r) else q') q Cset.empty let make_dfa r = let r = Concat (r, Character eof) in (* transitions en cours de construction *) let trans = ref Smap.empty in (* la fonction transitions construit toutes les transitions de l'état q, si c'est la première fois que q est visité *) let rec transitions q = if not (Smap.mem q !trans) then begin trans := Smap.add q Cmap.empty !trans; Cset.iter (fun (c,_) -> let t = Smap.find q !trans in if not (Cmap.mem c t) then begin let q' = next_state r q c in trans := Smap.add q (Cmap.add c q' t) !trans; transitions q' end) q end in (* on part de l'état initial q0 *) let q0 = first r in transitions q0; { start = q0; trans = !trans } (* visualisation avec dot *) open Format let fprint_state fmt q = Cset.iter (fun (c,i) -> if c = '#' then fprintf fmt "# " else fprintf fmt "%c%i " c i) q let fprint_transition fmt q c q' = fprintf fmt "\"%a\" -> \"%a\" [label=\"%c\"];@\n" fprint_state q fprint_state q' c let fprint_autom fmt a = fprintf fmt "digraph A {@\n"; fprintf fmt " @[\"%a\" [ shape = \"rect\"];@\n" fprint_state a.start; Smap.iter (fun q t -> Cmap.iter (fun c q' -> fprint_transition fmt q c q') t) a.trans; fprintf fmt "@]@\n}@." let save_autom file a = let ch = open_out file in fprintf (formatter_of_out_channel ch) "%a" fprint_autom a; close_out ch (* Test avec (a|b)*a(a|b) *) let r1 = Concat (Star (Union (Character ('a', 1), Character ('b', 1))), Concat (Character ('a', 2), Union (Character ('a', 3), Character ('b', 2)))) let a = make_dfa r1 let () = save_autom "autom.dot" a (* Reconnaissance d'un mot *) let accepting q = Cset.mem eof q let recognize a s = let n = String.length s in let rec loop q i = if i = n then accepting q else let c = s.[i] in let t = Smap.find q a.trans in loop (Cmap.find c t) (succ i) in try loop a.start 0 with Not_found -> false (* tests positifs *) let () = assert (recognize a "aa") let () = assert (recognize a "ab") let () = assert (recognize a "abababaab") let () = assert (recognize a "babababab") let () = assert (recognize a (String.make 1000 'b' ^ "ab")) (* tests négatifs *) let () = assert (not (recognize a "")) let () = assert (not (recognize a "a")) let () = assert (not (recognize a "b")) let () = assert (not (recognize a "ba")) let () = assert (not (recognize a "aba")) let () = assert (not (recognize a "abababaaba")) (* Test : un nombre pair de b = (a* + ba*b)* *) let r2 = Star (Union (Star (Character ('a', 1)), Concat (Character ('b', 1), Concat (Star (Character ('a',2)), Character ('b', 2))))) let a = make_dfa r2 let () = save_autom "autom2.dot" a (* positifs *) let () = assert (recognize a "") let () = assert (recognize a "bb") let () = assert (recognize a "aaa") let () = assert (recognize a "aaabbaaababaaa") let () = assert (recognize a "bbbbbbbbbbbbbb") let () = assert (recognize a "bbbbabbbbabbbabbb") (* négatifs *) let () = assert (not (recognize a "b")) let () = assert (not (recognize a "ba")) let () = assert (not (recognize a "ab")) let () = assert (not (recognize a "aaabbaaaaabaaa")) let () = assert (not (recognize a "bbbbbbbbbbbbb")) let () = assert (not (recognize a "bbbbabbbbabbbabbbb")) (* génération d'un analyseur lexical *) let fprint_autom_ml fmt a = (* on numérote tous les états *) let num = let n = ref 0 in Smap.fold (fun s _ num -> incr n; Smap.add s !n num) a.trans Smap.empty in let print_trans c s = if c <> '#' then fprintf fmt "| '%c' -> state%d b@\n" c (Smap.find s num) in let first = ref true in let print_state s tr = fprintf fmt "@[<hov 2>"; if !first then begin first := false; fprintf fmt "let rec " end else fprintf fmt "and "; fprintf fmt "state%d b =@\n" (Smap.find s num); if accepting s then fprintf fmt "b.last <- b.current;@\n"; fprintf fmt "match next_char b with@\n"; Cmap.iter print_trans tr; fprintf fmt "| _ -> failwith \"lexical error\""; fprintf fmt "@]@\n" in Smap.iter print_state a.trans; fprintf fmt "@\nlet start = state%d@\n" (Smap.find a.start num) let generate file a = let ch = open_out file in let fmt = formatter_of_out_channel ch in fprintf fmt "@[ type buffer = { text: string; mutable current: int; mutable last: int } let next_char b = if b.current = String.length b.text then raise End_of_file; let c = b.text.[b.current] in b.current <- b.current + 1; c "; fprintf fmt "%a@]@." fprint_autom_ml a; close_out ch (* mots alternant a et b *) let r3 = Concat (Union (Character ('b', 1), Epsilon), Concat (Star (Concat (Character ('a', 1), Character ('b', 2))), Union (Character ('a', 2), Epsilon))) let a = make_dfa r3 let () = save_autom "autom3.dot" a let () = generate "a.ml" a
This document was generated using caml2html