(**************************************************************************) (* *) (* Copyright (C) Jean-Christophe Filliatre *) (* *) (* This software is free software; you can redistribute it and/or *) (* modify it under the terms of the GNU Library General Public *) (* License version 2.1, with the special exception on linking *) (* described in file LICENSE. *) (* *) (* This software is distributed in the hope that it will be useful, *) (* but WITHOUT ANY WARRANTY; without even the implied warranty of *) (* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *) (* *) (**************************************************************************) (*s This module implements Ukkonen's algorithm for suffix trees. The module [Ukkonen] provides an implementation for Ocaml type [string]. A generic implementation is also provided, as a functor parameterized by both the alphabet (the types for characters and strings) and the data structure used to branch in the suffix trees. *) module Ukkonen : sig type t val create : string -> t val substring : t -> string -> int type position val find : t -> string -> position val leaves : (int -> unit) -> position -> unit val print : Format.formatter -> t -> unit val suffix_array : t -> unit end (*s Alphabet *) module type Alphabet = sig (* characters *) type t (* [dummy] is a character that is assumed not to appear in any string *) val dummy : t val equal : t -> t -> bool val compare : t -> t -> int val print : Format.formatter -> t -> unit (* strings over this alphabet *) type s val length : s -> int val get : s -> int -> t end (*s Branching *) module type Branching = sig type key type 'a t val create : unit -> 'a t val add : 'a t -> key -> 'a -> unit val find : 'a t -> key -> 'a val iter : (key -> 'a -> unit) -> 'a t -> unit end (*s Generic Suffix Trees implementation *) module Make(A : Alphabet)(B : Branching with type key = A.t) : sig (** the type of suffix trees *) type t (** [create s] build the suffix tree for string [s] *) val create : A.s -> t (** [print fmt t] displays the suffix tree [t] on the formatter [fmt] *) val print : Format.formatter -> t -> unit (** [substring t s] returns the position of substring [s] in the string denotated by suffix tree [t] if any; raises [Not_found] otherwise *) val substring : t -> A.s -> int (** position within a suffix tree *) type position (** [find t s] descends the suffix tree [t] according to string [s] and returns the corresponding position in [t] if any; raises [Not_found] otherwise *) val find : t -> A.s -> position (** [leaves f p] iterates function [f] overs all suffixes below position [p]; a suffix is passed as the index of its first character *) val leaves : (int -> unit) -> position -> unit end (*s Some usual branching implementations *) (* Maps from Ocaml's standard library *) module Bmap(X : Map.OrderedType) : Branching with type key = X.t (* Arrays. To be used only when characters are integers. The alphabet size must be provided *) module Barray(A : sig val size : int end) : Branching with type key = int (* Association lists *) module Blist(X : sig type t val equal : t -> t -> bool end) : Branching with type key = X.t (* Hash tables. The provided value [A.size] is a hint to initialize hash tables length.*) module Bhash(A : sig val size : int end)(X : Hashtbl.HashedType) : Branching with type key = X.t (*s The usual alphabet for types [char] and [string] *) module CharString : Alphabet with type t = char and type s = string
This document was generated using caml2html