(**************************************************************************)
(*                                                                        *)
(*  Copyright (C) Jean-Christophe Filliatre                               *)
(*                                                                        *)
(*  This software is free software; you can redistribute it and/or        *)
(*  modify it under the terms of the GNU Library General Public           *)
(*  License version 2.1, with the special exception on linking            *)
(*  described in file LICENSE.                                            *)
(*                                                                        *)
(*  This software is distributed in the hope that it will be useful,      *)
(*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *)
(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.                  *)
(*                                                                        *)
(**************************************************************************)

(* Suffix trees *)

(* Following Ukkonen's algorithm as described in Gusfield's book "Algorithms 
   on Strings, Trees and Sequences" *)

module type Alphabet = sig
  type t 
  val dummy : t
  val equal : t -> t -> bool
  val compare : t -> t -> int
  val print :  Format.formatter -> t -> unit
  type s
  val length : s -> int
  val get : s -> int -> t
end

module type Branching = sig
  type key
  type 'a t 
  val create : unit -> 'a t
  val add : 'a t -> key -> 'a -> unit
  val find : 'a t -> key -> 'a
  val iter : (key -> 'a -> unit) -> 'a t -> unit
end

let debug = false

module Make(A : Alphabet)(B : Branching with type key = A.t) = struct

  type node = {
    mutable label_start : int;
    mutable label_end : int;
    mutable father : node;
    mutable node_type : node_type;
    mutable suffix_link : node;
    path_position : int
  }

  and node_type = Leaf of int | Branch of node B.t

  type t = { tree_string : A.s; tree_root : node; }

  open Format

  let print fmt t =
    let m = A.length t.tree_string in
    let rec print_node n depth =
      if depth > 0 then begin
        for i = 1 to depth - 1 do printf "|"; done;
        fprintf fmt "+";
        for i = n.label_start to n.label_end do
          A.print fmt (if i == m+1 then A.dummy else A.get t.tree_string (i-1))
        done;
        if debug then
          fprintf fmt "  \t\t\t(%d,%d)" n.label_start n.label_end;
        fprintf fmt "@\n"
      end;
      match n.node_type with 
        | Leaf _ -> ()
        | Branch b -> B.iter (fun _ n -> print_node n (depth+1)) b
    in
    print_node t.tree_root 0
      
  (* a dummy node that will never appear in returned suffix trees *)
  let rec dummy_node = 
    { label_start = 0; label_end = 0; father = dummy_node; 
      node_type = Leaf (-1); suffix_link = dummy_node; path_position = 0 }

  let create_node ?(suffix_link=dummy_node) ~father ls le nt pp = 
    { label_start = ls; label_end = le; father = father; 
      node_type = nt; suffix_link = suffix_link; path_position = pp }

  type rule = Rule1 | Rule2 | Rule3

  type position = { mutable pos_node : node; mutable pos_in_edge : int }

  type extension_rule_2 = New_son | Split

  (* Ukkonen's algorithm *)
  let create s = 
    let m = A.length s in
    (* we do not build [s$]; instead we override [get] to cover [1..m+1] *)
    let get i = 
      assert (1 <= i && i <= m+1);
      if i <= m then A.get s (i-1) else A.dummy
    in
    (* the root tree and the first node *)
    let root_sons = B.create () in
    let rec root = 
      { label_start = 0; label_end = 0; father = root; 
        node_type = Branch root_sons; suffix_link = root; path_position = 0 } 
    in
    let tree = { tree_string = s; tree_root = root } in
    let n = create_node ~suffix_link:root ~father:root 1 (m+1) (Leaf 1) 1 in
    B.add root_sons (get 1) n;
    (* label end: during construction label end of a leaf is [!e] *)
    let e = ref 0 in
    let label_end n = match n.node_type with
      | Leaf _ -> !e
      | Branch _ -> n.label_end
    in
    (* DEBUG *)
    let print fmt t =
      let m = A.length t.tree_string in
      let rec print_node n depth =
        let e = label_end n in
        if depth > 0 then begin
          for i = 1 to depth - 1 do printf "|"; done;
          fprintf fmt "+";
          for i = n.label_start to e do
            A.print fmt 
              (if i == m+1 then A.dummy else A.get t.tree_string (i-1))
          done;
          if debug then
            fprintf fmt "  \t\t\t(%d,%d)" n.label_start e;
          fprintf fmt "@\n"
        end;
        match n.node_type with 
          | Leaf _ -> ()
          | Branch b -> B.iter (fun _ n -> print_node n (depth+1)) b
      in
      print_node t.tree_root 0
    in
    (* END DEBUG *)
    let suffix_less = ref dummy_node in
    let set_suffix_link n = 
      if !suffix_less != dummy_node then begin
        !suffix_less.suffix_link <- n;
        suffix_less := dummy_node
      end
    in
    let pos = { pos_node = root; pos_in_edge = 0 } in
    let label_length n = label_end n - n.label_start + 1 in
    let last_char_in_edge () = 
      if debug then printf "last_char_in_edge = %b@." 
        (pos.pos_in_edge = label_length pos.pos_node - 1);
      pos.pos_in_edge = label_length pos.pos_node - 1 
    in
    (* select the right branch *)
    let find_son n c = match n.node_type with
      | Leaf _ -> dummy_node
      | Branch b -> (try B.find b c with Not_found -> dummy_node)
    in
    (* tracing a string down the tree (updating [pos]) *)
    let trace_single_edge (gs,ge) ~skip =
      let cont_node = find_son pos.pos_node (get gs) in
      if cont_node == dummy_node then begin
        pos.pos_in_edge <- label_length pos.pos_node - 1;
        0, true
      end else begin
        pos.pos_node <- cont_node;
        let length = label_length pos.pos_node in
        let str_len = ge - gs + 1 in
        if skip then begin
          if length <= str_len then begin
            pos.pos_in_edge <- length - 1;
            length, not (length < str_len)
          end else begin
            pos.pos_in_edge <- str_len - 1;
            str_len, true
          end
        end else begin
          let length = if str_len < length then str_len else length in
          pos.pos_in_edge <- 1;
          let rec loop chars_found =
            if pos.pos_in_edge < length then begin
              if not (A.equal 
                        (get (pos.pos_node.label_start + pos.pos_in_edge))
                        (get (gs + pos.pos_in_edge))) 
              then begin
                pos.pos_in_edge <- pos.pos_in_edge - 1;
                chars_found, true
              end else begin
                pos.pos_in_edge <- pos.pos_in_edge + 1;
                loop (succ chars_found)
              end
            end else begin
              pos.pos_in_edge <- pos.pos_in_edge - 1;
              chars_found, not (chars_found < str_len)
            end
          in
          loop 1
        end
      end
    in
    let trace_string gamma ~skip =
      let rec trace chars_found ((gs,ge) as g) =
        pos.pos_in_edge <- 0;
        let edge_chars_found, search_done = trace_single_edge g ~skip in
        let chars_found = chars_found + edge_chars_found in
        if not search_done then 
          trace chars_found (gs + edge_chars_found, ge)
        else
          chars_found
      in
      trace 0 gamma
    in
    (* following the suffix link *)
    let follow_suffix_link () =
      if pos.pos_node != root then begin
        if pos.pos_node.suffix_link == dummy_node || not (last_char_in_edge ())
        then begin
          if pos.pos_node.father == root then
            pos.pos_node <- root
          else begin
            let s = pos.pos_node.label_start in 
            let gamma = (s, s + pos.pos_in_edge) in
            pos.pos_node <- pos.pos_node.father.suffix_link;
            ignore (trace_string gamma ~skip:true)
          end
        end else begin
          pos.pos_node <- pos.pos_node.suffix_link;
          pos.pos_in_edge <- label_length pos.pos_node - 1
        end
      end
    in
    (* node creation (Rule 2) *)
    let apply_extension_rule_2 ls le path_pos edge_pos = function
      | New_son ->
          if debug then printf "rule 2: new leaf (%d,%d)@." ls le;
          let leaf = 
            create_node ls le ~father:pos.pos_node (Leaf path_pos) path_pos
          in
          begin match pos.pos_node.node_type with
            | Branch b -> B.add b (get ls) leaf; leaf
            | Leaf _ -> assert false
          end
      | Split ->
          if debug then printf "rule 2: split (%d,%d)@." ls le;
          let node = pos.pos_node in
          let b = B.create () in
          let intl = 
            create_node node.label_start (node.label_start + edge_pos) 
              ~father:node.father (Branch b) node.path_position
          in
          node.label_start <- node.label_start + edge_pos + 1;
          let leaf = create_node ~father:intl ls le (Leaf path_pos) path_pos in
          B.add b (get node.label_start) node;
          B.add b (get ls) leaf;
          node.father <- intl;
          begin match intl.father.node_type with
            | Branch b -> B.add b (get intl.label_start) intl; intl
            | Leaf _ -> assert false
          end
    in
    (* Single Extension Algorithm (SEA) *)
    let sea j0 i_1 after_rule_3 =
      if debug then begin
        printf "%a@." print tree;
        printf "extension: %d  phase+1: %d" j0 i_1;
        if after_rule_3 then 
          printf "   starting at (%d,%d | %d)\n" pos.pos_node.label_start 
            (label_end pos.pos_node) pos.pos_in_edge
        else
          printf "   followed from (%d,%d | %d)\n" pos.pos_node.label_start
            (label_end pos.pos_node) pos.pos_in_edge
      end;
      let j = ref j0 in
      if not after_rule_3 then follow_suffix_link ();
      let chars_found = 
        if pos.pos_node == root then begin
          trace_string (!j,i_1) ~skip:false 
        end else begin
          j := i_1;
          if last_char_in_edge () then begin
            let tmp = find_son pos.pos_node (get i_1) in
            if tmp != dummy_node then begin
              pos.pos_node <- tmp;
              pos.pos_in_edge <- 0;
              1
            end else
              0
          end else 
            if 
              A.equal (get (pos.pos_node.label_start + pos.pos_in_edge + 1)) 
                (get i_1) 
            then begin    
              pos.pos_in_edge <- pos.pos_in_edge + 1;
              1
            end else 
              0
        end
      in
      if debug then printf "chars_found = %d@." chars_found;
      if chars_found = i_1 - !j + 1 then begin 
        (* Rule 3 applies *)
        if debug then printf "rule 3 (%d,%d)@." !j i_1;
        set_suffix_link pos.pos_node.father;
        Rule3
      end else if last_char_in_edge () || pos.pos_node == root then 
        match pos.pos_node.node_type with
          | Branch b ->
              ignore 
                (apply_extension_rule_2 (!j + chars_found) i_1 j0 0 New_son);
              set_suffix_link pos.pos_node;
              Rule2
          | Leaf _ ->
              Rule1
      else begin
        let tmp = 
          apply_extension_rule_2 (!j+chars_found) i_1 j0 pos.pos_in_edge Split
        in
        if !suffix_less != dummy_node then !suffix_less.suffix_link <- tmp;
        if label_length tmp = 1 && tmp.father == root then begin
          tmp.suffix_link <- root;
          suffix_less := dummy_node
        end else
          suffix_less := tmp;
        pos.pos_node <- tmp;
        Rule2
      end
    in
    (* Ukkonen main loop *)
    let extension = ref 2 in
    let last_rule_is_3 = ref false in
    for i = 2 to m do (* phase [i+1] *)
      e := i+1;
      let rec spa () = 
        if !extension <= i+1 then begin
          let rule = sea !extension (i+1) !last_rule_is_3 in
          last_rule_is_3 := (rule = Rule3);
          if not !last_rule_is_3 then begin incr extension; spa () end
        end
      in
      spa ()
    done;
    let rec set_leaf_end n = match n.node_type with
      | Leaf _ -> n.label_end <- m+1
      | Branch b -> B.iter (fun _ n -> set_leaf_end n) b
    in
    set_leaf_end root;
    tree

  let find t s =
    let m = A.length t.tree_string in
    let p = A.length s in
    let get i = 
      assert (1 <= i && i <= m+1);
      if i <= m then A.get t.tree_string (i-1) else A.dummy
    in
    let find_son n c = match n.node_type with
      | Leaf _ -> raise Not_found
      | Branch b -> B.find b c
    in
    let rec descend n j =
      let rec descend_edge j k =
        if j < p && k <= n.label_end && A.equal (get k) (A.get s j) then
          descend_edge (succ j) (succ k)
        else if j = p then
          { pos_node = n; pos_in_edge = k }
        else if k > n.label_end then
          descend (find_son n (A.get s j)) j
        else
          raise Not_found
      in
      descend_edge j n.label_start
    in
    let n0 = match t.tree_root.node_type with
      | Branch b -> B.find b (if p = 0 then A.dummy else A.get s 0) 
      | Leaf _ -> assert false
    in
    descend n0 0

  let substring t s =
    if A.length s = 0 then 
      0 
    else
      let p = find t s in
      p.pos_node.path_position - 1

  let leaves f p =
    let rec iter n = match n.node_type with
      | Leaf j -> f (j - 1)
      | Branch b -> B.iter (fun _ n -> iter n) b
    in
    iter p.pos_node

end

  
(* some usual branching implementations *)

module Bmap(X : Map.OrderedType) : Branching with type key = X.t = 
struct
  type key = X.t
  module M = Map.Make(X)
  type 'a t = 'a M.t ref
  let create () = ref M.empty
  let add m k v = m := M.add k v !m
  let find m k = M.find k !m
  let iter f m = M.iter f !m
end

module Barray(A : sig val size : int end) : Branching with type key = int = 
struct
  type key = int
  type 'a t = 'a option array
  let create () = Array.create A.size None
  let add m k v = m.(k) <- Some v
  let find m k = match m.(k) with Some v -> v | None -> raise Not_found
  let iter f m =
    Array.iteri (fun k v -> match v with None -> () | Some v -> f k v) m
end

module Blist(X : sig type t val equal : t -> t -> bool end) 
  : Branching with type key = X.t = 
struct
  type key = X.t
  type 'a t = (key * 'a) list ref
  let create () = ref []
  let add m k v = 
    let rec replace = function
      | [] -> raise Not_found
      | (a,_ as p) :: l -> if X.equal k a then (a,v) :: l else p :: replace l
    in
    m := try replace !m with Not_found -> (k,v) :: !m
  let find m k = 
    let rec lookup = function
      | [] -> raise Not_found 
      | (a,b) :: l -> if X.equal k a then b else lookup l
    in
    lookup !m
  let iter f m = List.iter (fun (a,b) -> f a b) !m
end

module Bhash(A : sig val size : int end)(X : Hashtbl.HashedType) 
  : Branching with type key = X.t = 
struct
  type key = X.t
  module H = Hashtbl.Make(X)
  type 'a t = 'a H.t
  let create () = H.create A.size
  let add m k v = H.replace m k v
  let find m k = H.find m k
  let iter = H.iter
end

module CharString = struct
  type t = char
  let dummy = Char.chr 0
  let equal x y = x == y
  let compare x y = Char.code x - Char.code y
  let hash c = Char.code c
  let print fmt c = Format.fprintf fmt "%c" c
  type s = string
  let length = String.length
  let get = String.unsafe_get
end

module IntArray = struct
  type t = int
  let dummy = -1
  let equal x y = x == y
  let compare = compare
  let hash c = Char.code c
  let print fmt c = Format.fprintf fmt "%d" c
  type s = int array
  let length = Array.length
  let get = Array.unsafe_get
end

(* manually defunctorized code for type string *)
module Ukkonen = struct

  (*module B = Bhash(struct let size = 17 end)(CharString)*)
  module B = Bmap(CharString)

  type node = {
    mutable label_start : int;
    mutable label_end : int;
    mutable father : node;
    mutable node_type : node_type;
    mutable suffix_link : node;
    path_position : int;
  }

  and node_type = Leaf of int | Branch of node B.t

  type t = { tree_string : string; tree_root : node; }

  open Format

  let print fmt t =
    let m = String.length t.tree_string in
    let rec print_node n depth =
      if depth > 0 then begin
        for i = 1 to depth - 1 do printf "|"; done;
        fprintf fmt "+";
        for i = n.label_start to n.label_end do
          fprintf fmt "%c"
            (if i == m+1 then '$' else String.unsafe_get t.tree_string (i-1))
        done;
        if debug then
          fprintf fmt "  \t\t\t(%d,%d)" n.label_start n.label_end;
        fprintf fmt "@\n"
      end;
      match n.node_type with 
        | Leaf _ -> ()
        | Branch b -> B.iter (fun _ n -> print_node n (depth+1)) b
    in
    print_node t.tree_root 0
      
  (* a dummy node that will never appear in returned suffix trees *)
  let rec dummy_node = 
    { label_start = 0; label_end = 0; father = dummy_node; 
      node_type = Leaf (-1); suffix_link = dummy_node; path_position = 0 }

  let create_node ?(suffix_link=dummy_node) ~father ls le nt pp = 
    { label_start = ls; label_end = le; father = father; 
      node_type = nt; suffix_link = suffix_link; path_position = pp }

  type rule = Rule1 | Rule2 | Rule3

  type position = { mutable pos_node : node; mutable pos_in_edge : int }

  type extension_rule_2 = New_son | Split

  (* Ukkonen's algorithm *)
  let create s = 
    let m = String.length s in
    let s0 = String.make (m+2) '\000' in
    String.blit s 0 s0 1 m;
    (* we do not build [s$]; instead we override [get] to cover [1..m+1] *)
    let get i = 
      assert (1 <= i && i <= m+1);
      String.unsafe_get s0 i
    in
    (* the root tree and the first node *)
    let root_sons = B.create () in
    let rec root = 
      { label_start = 0; label_end = 0; father = root; 
        node_type = Branch root_sons; suffix_link = root; path_position = 0 } 
    in
    let tree = { tree_string = s; tree_root = root } in
    let n = create_node ~suffix_link:root ~father:root 1 (m+1) (Leaf 1) 1 in
    B.add root_sons (get 1) n;
    (* label end: during construction label end of a leaf is [!e] *)
    let e = ref 0 in
    let label_end n = match n.node_type with
      | Leaf _ -> !e
      | Branch _ -> n.label_end
    in
    (* DEBUG *)
    let print fmt t =
      let m = String.length t.tree_string in
      let rec print_node n depth =
        let e = label_end n in
        if depth > 0 then begin
          for i = 1 to depth - 1 do printf "|"; done;
          fprintf fmt "+";
          for i = n.label_start to e do
            fprintf fmt "%c"
              (if i == m+1 then '$' else String.unsafe_get t.tree_string (i-1))
          done;
          if debug then
            fprintf fmt "  \t\t\t(%d,%d)" n.label_start e;
          fprintf fmt "@\n"
        end;
        match n.node_type with 
          | Leaf _ -> ()
          | Branch b -> B.iter (fun _ n -> print_node n (depth+1)) b
      in
      print_node t.tree_root 0
    in
    (* END DEBUG *)
    let suffix_less = ref dummy_node in
    let set_suffix_link n = 
      if !suffix_less != dummy_node then begin
        !suffix_less.suffix_link <- n;
        suffix_less := dummy_node
      end
    in
    let pos = { pos_node = root; pos_in_edge = 0 } in
    let label_length n = label_end n - n.label_start + 1 in
    let last_char_in_edge () = 
      if debug then printf "last_char_in_edge = %b@." 
        (pos.pos_in_edge = label_length pos.pos_node - 1);
      pos.pos_in_edge = label_length pos.pos_node - 1 
    in
    (* select the right branch *)
    let find_son n c = match n.node_type with
      | Leaf _ -> dummy_node
      | Branch b -> (try B.find b c with Not_found -> dummy_node)
    in
    (* tracing a string down the tree (updating [pos]) *)
    let trace_single_edge (gs,ge) ~skip =
      let cont_node = find_son pos.pos_node s0.[gs] in
      if cont_node == dummy_node then begin
        pos.pos_in_edge <- label_length pos.pos_node - 1;
        0, true
      end else begin
        pos.pos_node <- cont_node;
        let length = label_length pos.pos_node in
        let str_len = ge - gs + 1 in
        if skip then begin
          if length <= str_len then begin
            pos.pos_in_edge <- length - 1;
            length, not (length < str_len)
          end else begin
            pos.pos_in_edge <- str_len - 1;
            str_len, true
          end
        end else begin
          let length = if str_len < length then str_len else length in
          pos.pos_in_edge <- 1;
          let rec loop chars_found =
            if pos.pos_in_edge < length then begin
              if not (s0.[pos.pos_node.label_start + pos.pos_in_edge] ==
                        s0.[gs + pos.pos_in_edge]) 
              then begin
                pos.pos_in_edge <- pos.pos_in_edge - 1;
                chars_found, true
              end else begin
                pos.pos_in_edge <- pos.pos_in_edge + 1;
                loop (succ chars_found)
              end
            end else begin
              pos.pos_in_edge <- pos.pos_in_edge - 1;
              chars_found, not (chars_found < str_len)
            end
          in
          loop 1
        end
      end
    in
    let trace_string gamma ~skip =
      let rec trace chars_found ((gs,ge) as g) =
        pos.pos_in_edge <- 0;
        let edge_chars_found, search_done = trace_single_edge g ~skip in
        let chars_found = chars_found + edge_chars_found in
        if not search_done then 
          trace chars_found (gs + edge_chars_found, ge)
        else
          chars_found
      in
      trace 0 gamma
    in
    (* following the suffix link *)
    let follow_suffix_link () =
      if pos.pos_node != root then begin
        if pos.pos_node.suffix_link == dummy_node || not (last_char_in_edge ())
        then begin
          if pos.pos_node.father == root then
            pos.pos_node <- root
          else begin
            let s = pos.pos_node.label_start in 
            let gamma = (s, s + pos.pos_in_edge) in
            pos.pos_node <- pos.pos_node.father.suffix_link;
            ignore (trace_string gamma ~skip:true)
          end
        end else begin
          pos.pos_node <- pos.pos_node.suffix_link;
          pos.pos_in_edge <- label_length pos.pos_node - 1
        end
      end
    in
    (* node creation (Rule 2) *)
    let apply_extension_rule_2 ls le path_pos edge_pos = function
      | New_son ->
          if debug then printf "rule 2: new leaf (%d,%d)@." ls le;
          let leaf = 
            create_node ls le ~father:pos.pos_node (Leaf path_pos) path_pos 
          in
          begin match pos.pos_node.node_type with
            | Branch b -> B.add b s0.[ls] leaf; leaf
            | Leaf _ -> assert false
          end
      | Split ->
          if debug then printf "rule 2: split (%d,%d)@." ls le;
          let node = pos.pos_node in
          let b = B.create () in
          let intl = 
            create_node node.label_start (node.label_start + edge_pos) 
              ~father:node.father (Branch b) node.path_position
          in
          node.label_start <- node.label_start + edge_pos + 1;
          let leaf = create_node ~father:intl ls le (Leaf path_pos) path_pos in
          B.add b s0.[node.label_start] node;
          B.add b s0.[ls] leaf;
          node.father <- intl;
          begin match intl.father.node_type with
            | Branch b -> B.add b s0.[intl.label_start] intl; intl
            | Leaf _ -> assert false
          end
    in
    (* Single Extension Algorithm (SEA) *)
    let sea j0 i_1 after_rule_3 =
      if debug then begin
        printf "%a@." print tree;
        printf "extension: %d  phase+1: %d" j0 i_1;
        if after_rule_3 then 
          printf "   starting at (%d,%d | %d)\n" pos.pos_node.label_start 
            (label_end pos.pos_node) pos.pos_in_edge
        else
          printf "   followed from (%d,%d | %d)\n" pos.pos_node.label_start
            (label_end pos.pos_node) pos.pos_in_edge
      end;
      let j = ref j0 in
      if not after_rule_3 then follow_suffix_link ();
      let chars_found = 
        if pos.pos_node == root then begin
          trace_string (!j,i_1) ~skip:false 
        end else begin
          j := i_1;
          if last_char_in_edge () then begin
            let tmp = find_son pos.pos_node s0.[i_1] in
            if tmp != dummy_node then begin
              pos.pos_node <- tmp;
              pos.pos_in_edge <- 0;
              1
            end else
              0
          end else 
            if 
              s0.[pos.pos_node.label_start + pos.pos_in_edge + 1] ==
                s0.[i_1]
            then begin    
              pos.pos_in_edge <- pos.pos_in_edge + 1;
              1
            end else 
              0
        end
      in
      if debug then printf "chars_found = %d@." chars_found;
      if chars_found = i_1 - !j + 1 then begin 
        (* Rule 3 applies *)
        if debug then printf "rule 3 (%d,%d)@." !j i_1;
        set_suffix_link pos.pos_node.father;
        Rule3
      end else if last_char_in_edge () || pos.pos_node == root then 
        match pos.pos_node.node_type with
          | Branch b ->
              ignore 
                (apply_extension_rule_2 (!j + chars_found) i_1 j0 0 New_son);
              set_suffix_link pos.pos_node;
              Rule2
          | Leaf _ ->
              Rule1
      else begin
        let tmp = 
          apply_extension_rule_2 (!j+chars_found) i_1 j0 pos.pos_in_edge Split
        in
        if !suffix_less != dummy_node then !suffix_less.suffix_link <- tmp;
        if label_length tmp = 1 && tmp.father == root then begin
          tmp.suffix_link <- root;
          suffix_less := dummy_node
        end else
          suffix_less := tmp;
        pos.pos_node <- tmp;
        Rule2
      end
    in
    (* Ukkonen main loop *)
    let extension = ref 2 in
    let last_rule_is_3 = ref false in
    for i = 2 to m do (* phase [i+1] *)
      e := i+1;
      let rec spa () = 
        if !extension <= i+1 then begin
          let rule = sea !extension (i+1) !last_rule_is_3 in
          last_rule_is_3 := (rule = Rule3);
          if not !last_rule_is_3 then begin incr extension; spa () end
        end
      in
      spa ()
    done;
    let rec set_leaf_end n = match n.node_type with
      | Leaf _ -> n.label_end <- m+1
      | Branch b -> B.iter (fun _ n -> set_leaf_end n) b
    in
    set_leaf_end root;
    tree

  let dummy_char = Char.chr 0

  let find t s =
    let m = String.length t.tree_string in
    let p = String.length s in
    let get i = 
      assert (1 <= i && i <= m+1);
      if i <= m then String.unsafe_get t.tree_string (i-1) else dummy_char
    in
    let find_son n c = match n.node_type with
      | Leaf _ -> raise Not_found
      | Branch b -> B.find b c
    in
    let rec descend n j =
      let rec descend_edge j k =
        if j < p && k <= n.label_end && (get k) == (String.unsafe_get s j) then
          descend_edge (succ j) (succ k)
        else if j = p then
          { pos_node = n; pos_in_edge = k }
        else if k > n.label_end then
          descend (find_son n (String.unsafe_get s j)) j
        else
          raise Not_found
      in
      descend_edge j n.label_start
    in
    let n0 = match t.tree_root.node_type with
      | Branch b -> 
          B.find b (if p = 0 then dummy_char else String.unsafe_get s 0) 
      | Leaf _ -> assert false
    in
    descend n0 0

  let substring t s =
    if String.length s = 0 then 
      0 
    else
      let p = find t s in
      p.pos_node.path_position - 1

  let leaves f p =
    let rec iter n = match n.node_type with
      | Leaf j -> f (j - 1)
      | Branch b -> B.iter (fun _ n -> iter n) b
    in
    iter p.pos_node

  (* ne fonctionne que si [B = Bmap] car il faut que [iter] soit lexic. *)
  let suffix_array t = 
    let m = String.length t.tree_string in
    let rec iter n = match n.node_type with
      | Leaf j -> printf "%s\n" (String.sub t.tree_string (j-1) (m-j+1))
      | Branch b -> B.iter (fun _ n -> iter n) b
    in
    iter t.tree_root
    

end


This document was generated using caml2html