diff --git a/clib/cString.ml b/clib/cString.ml index aae672192d09..db52345ef642 100644 --- a/clib/cString.ml +++ b/clib/cString.ml @@ -143,6 +143,8 @@ let ordinal n = in string_of_int n ^ s +(* this and edit_distance copied from ocaml stdlib *) + let uchar_array_of_utf_8_string s = let slen = length s in (* is an upper bound on Uchar.t count *) let uchars = Array.make slen Uchar.max in @@ -209,6 +211,53 @@ let edit_distance ?(limit = Stdlib.Int.max_int) s0 s1 = let d = loop row_minus2 row_minus1 row 1 len0 limit s0 s1 in if d > limit then limit else d +(* modified from edit_distance to match a substring *) +let edit_distance_substring ?(limit = Stdlib.Int.max_int) ~pattern:s0 s1 = + let[@inline] minimum a b c = Stdlib.Int.min a (Stdlib.Int.min b c) in + (* XXX we should be able to cache s0 + instead recomputing every time we do the check *) + let s0, len0 = uchar_array_of_utf_8_string s0 in + let s1, len1 = uchar_array_of_utf_8_string s1 in + if len0 - len1 >= limit then limit else + let rec loop row_minus2 row_minus1 row i len0 limit s0 s1 = + if i > len0 then Array.fold_left Stdlib.Int.min Stdlib.Int.max_int row_minus1 else + let len1 = Array.length row - 1 in + let row_min = ref Stdlib.Int.max_int in + row.(0) <- i; + (* stdlib has some limit-based shortcuts here but naively + copying them doesn't work for substring matching *) + for j = 1 to len1 do + let cost = if Uchar.equal s0.(i-1) s1.(j-1) then 0 else 1 in + let min = minimum + (row_minus1.(j-1) + cost) (* substitute *) + (row_minus1.(j) + 1) (* delete *) + (row.(j-1) + 1) (* insert *) + in + let min = + if (i > 1 && j > 1 && + Uchar.equal s0.(i-1) s1.(j-2) && + Uchar.equal s0.(i-2) s1.(j-1)) + then Stdlib.Int.min min (row_minus2.(j-2) + cost) (* transpose *) + else min + in + row.(j) <- min; + row_min := Stdlib.Int.min !row_min min; + done; + if !row_min >= limit then (* can no longer decrease *) limit else + loop row_minus1 row row_minus2 (i + 1) len0 limit s0 s1 + in + let ignore = + (* Value used to make the values around the diagonal stripe ignored + by the min computations when we have a limit. *) + (* XXX why is max_int + 1 not a problem? *) + limit + 1 + in + let row_minus2 = Array.make (len1 + 1) ignore in + (* for substring matching row_minus1 starts with constant 0 instead of stdlib (fun x -> x) *) + let row_minus1 = Array.make (len1 + 1) 0 in + let row = Array.make (len1 + 1) ignore in + loop row_minus2 row_minus1 row 1 len0 limit s0 s1 + (* string parsing *) module Self = diff --git a/clib/cString.mli b/clib/cString.mli index 23f06a72a904..60b7d7ddbbcd 100644 --- a/clib/cString.mli +++ b/clib/cString.mli @@ -83,6 +83,11 @@ val edit_distance : ?limit:int -> string -> string -> int copied from ocaml 5.4 *) +val edit_distance_substring : ?limit:int -> pattern:string -> string -> int +(** [edit_distance_substring ?limit ~pattern s1] + is the minimum [edit_distance ?limit pattern s2] + where [s2] is a substring of [s1]. *) + (** {6 Generic operations} **) module Set : CSet.ExtS with type elt = t diff --git a/doc/changelog/08-vernac-commands-and-options/20660-search-fuzzy-Added.rst b/doc/changelog/08-vernac-commands-and-options/20660-search-fuzzy-Added.rst new file mode 100644 index 000000000000..71d628555663 --- /dev/null +++ b/doc/changelog/08-vernac-commands-and-options/20660-search-fuzzy-Added.rst @@ -0,0 +1,5 @@ +- **Added:** + approximate matching in :cmd:`Search`, controlled by new option :opt:`Fuzzy Search`. + Currently only when searching by name (:n:`Search @string`) + (`#20660 `_, + by Gaƫtan Gilbert). diff --git a/doc/sphinx/proof-engine/vernacular-commands.rst b/doc/sphinx/proof-engine/vernacular-commands.rst index c3b86e977eab..d3c50b9a61a9 100644 --- a/doc/sphinx/proof-engine/vernacular-commands.rst +++ b/doc/sphinx/proof-engine/vernacular-commands.rst @@ -162,6 +162,14 @@ described elsewhere put it between single quotes or explicitly provide a scope. See :ref:`this example `. + .. opt:: Fuzzy Search + + Objects whose name contains a string differing by at most the + value of this option (default 2) are also included in the + search output. Set to 0 to search for only exact matches. To + avoid overly verbose results exact matching is always used + when the searched string has length 4 or less. + :n:`hyp:` The provided pattern or reference is matched against any subterm of an hypothesis of the type of the objects. See :ref:`this diff --git a/test-suite/output/Search.out b/test-suite/output/Search.out index b88bb362732e..8704b54029ce 100644 --- a/test-suite/output/Search.out +++ b/test-suite/output/Search.out @@ -318,6 +318,13 @@ andb_true_intro: forall [b1 b2 : bool], b1 = true /\ b2 = true -> (b1 && b2)%bool = true andb_prop: forall a b : bool, (a && b)%bool = true -> a = true /\ b = true andb_prop: forall a b : bool, (a && b)%bool = true -> a = true /\ b = true +and_comm: forall A B : Prop, A /\ B <-> B /\ A +eq_id_comm_r: + forall [A : Type] (f : A -> A) (Hf : forall a : A, f a = a) (a : A), + f_equal f (Hf a) = Hf (f a) +eq_id_comm_l: + forall [A : Type] (f : A -> A) (Hf : forall a : A, a = f a) (a : A), + f_equal f (Hf a) = Hf (f a) h: n <> newdef n h': newdef n <> n h: n <> newdef n @@ -326,13 +333,13 @@ h: n <> newdef n h: n <> newdef n h: n <> newdef n h': newdef n <> n -File "./output/Search.v", line 24, characters 2-23: +File "./output/Search.v", line 32, characters 2-23: The command has indeed failed with message: [Focus] No such goal. -File "./output/Search.v", line 25, characters 2-25: +File "./output/Search.v", line 33, characters 2-25: The command has indeed failed with message: Query commands only support the single numbered goal selector. -File "./output/Search.v", line 26, characters 2-25: +File "./output/Search.v", line 34, characters 2-25: The command has indeed failed with message: Query commands only support the single numbered goal selector. h: P n diff --git a/test-suite/output/Search.v b/test-suite/output/Search.v index 7e405244df40..06ef533f691d 100644 --- a/test-suite/output/Search.v +++ b/test-suite/output/Search.v @@ -7,6 +7,14 @@ Search (@eq _ _ true). Search (@eq _ _ _) true -false. (* andb_prop *) Search (@eq _ _ _) true -false "prop" -"intro". (* andb_prop *) +Search "add_comm". +(* currently set to return results whose name contains a substring + with edit distance <=2 from the pattern: + + - and_comm (mutate the first "d") + - eq_id_comm_r and eq_id_comm_l (mutate the "a" and second "d") +*) + Definition newdef := fun x:nat => x. Goal forall n:nat, n <> newdef n -> newdef n <> n -> False. diff --git a/vernac/search.ml b/vernac/search.ml index fcc9f9db12b9..72cc695d0310 100644 --- a/vernac/search.ml +++ b/vernac/search.ml @@ -210,6 +210,17 @@ let module_filter : _ -> filter_function = fun mods ref kind env sigma typ -> let name_of_reference ref = Id.to_string (Nametab.basename_of_global ref) +let { Goptions.get = search_limit } = + Goptions.declare_int_option_and_ref ~key:["Fuzzy"; "Search"] ~value:2 () + +let string_contains_upto ?limit ~pattern s = + let limit = search_limit() in + (* for small patterns, allowing edits produces bad results *) + if String.length pattern <= 4 || limit = 0 then String.string_contains ~where:s ~what:pattern + else + let d = CString.edit_distance_substring ~limit:(limit+1) ~pattern s in + d <= limit + let search_filter : _ -> filter_function = fun query gr kind env sigma typ -> match query with | GlobSearchSubPattern (where,head,pat) -> let open Context.Rel.Declaration in @@ -227,8 +238,7 @@ let search_filter : _ -> filter_function = fun query gr kind env sigma typ -> ma if head then Constr_matching.is_matching_head else Constr_matching.is_matching_appsubterm ~closed:false in f env sigma pat (EConstr.of_constr typ)) typl -| GlobSearchString s -> - String.string_contains ~where:(name_of_reference gr) ~what:s +| GlobSearchString s -> string_contains_upto ~pattern:s (name_of_reference gr) | GlobSearchKind k -> (match kind with None -> false | Some k' -> k = k') | GlobSearchFilter f -> f gr