Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add occurrence count in the json output for search engine #1076

Merged
merged 8 commits into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
used for sidebar (@panglesd, #1145)
- Allow referencing of polymorphic constructors in polymorphic variant type
aliases (@panglesd, #1115)
- Added a `--occurrences` argument to the `compile-index` command to output the
number of occurrences of each entry of the index in the json output
(@panglesd, #1076).

### Changed

Expand Down
14 changes: 10 additions & 4 deletions doc/driver.mld
Original file line number Diff line number Diff line change
Expand Up @@ -774,12 +774,16 @@ Some more details about the json format:
{- ["display"], which is a json object. It contains two fields:
{ul
{- ["url"], a string. It is the URL to the entry in the documentation, relative to the base of the documentation}
{- ["html"], also a string. It is the html odoc uses to display the entry in the search results.}}}}}}
{- ["html"], also a string. It is the html odoc uses to display the entry in the search results.}}}
{- Additionally, the ["occurrences"] field exists if and only if the [--occurrences] flag was given to the [odoc compile-index] command. When it exists, it contains a json object, with two fields:
{ul
{- ["direct"], an integer. It is the number of direct uses of the entry. For instance, [open M] and [Make(M)] contain direct uses of [M], while [1 + M.v] contains an indirect use of [M] and a direct use of [M.v].}
{- ["indirect"], an integer containing the number of indirect uses of the entry.}}}}}}

Search engines written in OCaml can also call the [Odoc_model.Fold.unit] and [Odoc_model.Fold.page] function, in conjunction with [Odoc_search.Entry.entry_of_item] in order to get an OCaml value of each element to be indexed.

{[
let index_generate ?(ignore_output = false) () =
let index_generate ?(ignore_output = false) occurrence_file =
let open Cmd in
let files =
OS.Dir.contents (Fpath.v ".")
Expand All @@ -792,7 +796,7 @@ let index_generate ?(ignore_output = false) () =
let index_map = Fpath.v "index.map" in
let () = Bos.OS.File.write_lines index_map files |> get_ok in
let cmd =
odoc % "compile-index" % "-o" % "html/index.json" % "--file-list"
odoc % "compile-index" % "-o" % "html/index.json" % "--occurrences" % p occurrence_file % "--file-list"
% p index_map
in
let lines = run cmd in
Expand Down Expand Up @@ -851,7 +855,9 @@ The following code executes all of the above, and we're done!
{[
let compiled = compile_all () in
let linked = link_all compiled in
let () = index_generate () in
let occurrence_file = Fpath.v "occurrences-odoc_and_deps.odoc" in
let _ = count_occurrences occurrence_file in
let () = index_generate occurrence_file in
let _ = js_index () in
let _ = count_occurrences (Fpath.v "occurrences-from-odoc.odoc") in
generate_all linked
Expand Down
4 changes: 4 additions & 0 deletions src/occurrences/dune
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
(library
(name odoc_occurrences)
(public_name odoc.occurrences)
(libraries odoc_model))
24 changes: 24 additions & 0 deletions src/occurrences/odoc_occurrences.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
module Table = Table

let of_impl ~include_hidden unit htbl =
let incr tbl p =
let open Odoc_model.Paths.Path.Resolved in
let p = (p :> t) in
let id = identifier p in
if (not (is_hidden p)) || include_hidden then Table.add tbl id
in
let open Odoc_model.Lang in
List.iter
(function
| Source_info.Module { documentation = Some (`Resolved p); _ }, _ ->
incr htbl p
| Value { documentation = Some (`Resolved p); _ }, _ -> incr htbl p
| ModuleType { documentation = Some (`Resolved p); _ }, _ -> incr htbl p
| Type { documentation = Some (`Resolved p); _ }, _ -> incr htbl p
| _ -> ())
unit.Implementation.source_info

let aggregate ~tbl ~data =
Table.iter
(fun id { Table.direct; _ } -> Table.add ~quantity:direct tbl id)
data
9 changes: 9 additions & 0 deletions src/occurrences/odoc_occurrences.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
open Odoc_model.Lang

module Table = Table

val of_impl : include_hidden:bool -> Implementation.t -> Table.t -> unit
(** Add all occurrences from implementation of a compilation unit into a table *)

val aggregate : tbl:Table.t -> data:Table.t -> unit
(** Aggregate [data] into [tbl] *)
95 changes: 95 additions & 0 deletions src/occurrences/table.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
module H = Hashtbl.Make (Odoc_model.Paths.Identifier)

type t = internal_item H.t
and internal_item = { direct : int; indirect : int; sub : t }
type key = Odoc_model.Paths.Identifier.t

type item = { direct : int; indirect : int }

let internal_to_item : internal_item -> item =
fun { direct; indirect; _ } -> { direct; indirect }

let v_item () = { direct = 0; indirect = 0; sub = H.create 0 }

let v () = H.create 0

let add ?(quantity = 1) tbl id =
let rec add ?(kind = `Indirect) id =
let incr htbl id =
let { direct; indirect; sub } =
try H.find htbl id with Not_found -> v_item ()
in
let direct, indirect =
match kind with
| `Direct -> (direct + quantity, indirect)
| `Indirect -> (direct, indirect + quantity)
in
H.replace htbl id { direct; indirect; sub };
sub
in
let do_ parent =
let htbl = add (parent :> key) in
incr htbl id
in
match id.iv with
| `InstanceVariable (parent, _) -> do_ parent
| `Parameter (parent, _) -> do_ parent
| `Module (parent, _) -> do_ parent
| `ModuleType (parent, _) -> do_ parent
| `Method (parent, _) -> do_ parent
| `Field (parent, _) -> do_ parent
| `Extension (parent, _) -> do_ parent
| `Type (parent, _) -> do_ parent
| `CoreType _ -> incr tbl id
| `Constructor (parent, _) -> do_ parent
| `Exception (parent, _) -> do_ parent
| `ExtensionDecl (parent, _, _) -> do_ parent
| `Class (parent, _) -> do_ parent
| `Value (parent, _) -> do_ parent
| `ClassType (parent, _) -> do_ parent
| `Root _ -> incr tbl id
| `SourcePage _ | `Page _ | `LeafPage _ | `SourceLocation _
| `CoreException _ | `Label _ | `SourceLocationMod _ | `Result _
| `AssetFile _ | `SourceDir _ | `SourceLocationInternal _ ->
assert false
in
let _htbl = add ~kind:`Direct id in
()

let rec get t id =
let do_ parent =
get t (parent :> key) |> function
| None -> None
| Some { sub; _ } -> ( try Some (H.find sub id) with Not_found -> None)
in
match id.iv with
| `InstanceVariable (parent, _) -> do_ parent
| `Parameter (parent, _) -> do_ parent
| `Module (parent, _) -> do_ parent
| `ModuleType (parent, _) -> do_ parent
| `Method (parent, _) -> do_ parent
| `Field (parent, _) -> do_ parent
| `Extension (parent, _) -> do_ parent
| `ExtensionDecl (parent, _, _) -> do_ parent
| `Type (parent, _) -> do_ parent
| `Constructor (parent, _) -> do_ parent
| `Exception (parent, _) -> do_ parent
| `Class (parent, _) -> do_ parent
| `Value (parent, _) -> do_ parent
| `ClassType (parent, _) -> do_ parent
| `Root _ -> ( try Some (H.find t id) with Not_found -> None)
| `SourcePage _ | `Page _ | `LeafPage _ | `CoreType _ | `SourceLocation _
| `CoreException _ | `Label _ | `SourceLocationMod _ | `Result _
| `AssetFile _ | `SourceDir _ | `SourceLocationInternal _ ->
None

let get t id =
match get t id with None -> None | Some i -> Some (internal_to_item i)

let rec iter f tbl =
H.iter
(fun id v ->
iter f v.sub;
let v = internal_to_item v in
f id v)
tbl
11 changes: 11 additions & 0 deletions src/occurrences/table.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
type t
type item = { direct : int; indirect : int }
type key = Odoc_model.Paths.Identifier.t

val v : unit -> t

val add : ?quantity:int -> t -> key -> unit

val iter : (key -> item -> unit) -> t -> unit

val get : t -> key -> item option
15 changes: 11 additions & 4 deletions src/odoc/bin/main.ml
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,7 @@ module Indexing = struct
| None, `Marshall -> Ok (Fs.File.of_string "index.odoc-index")

let index dst json warnings_options page_roots lib_roots inputs_in_file inputs
=
occurrences =
let marshall = if json then `JSON else `Marshall in
output_file ~dst marshall >>= fun output ->
(if
Expand All @@ -534,8 +534,8 @@ module Indexing = struct
then Error (`Msg "Paths given to all -P and -L options must be disjoint")
else Ok ())
>>= fun () ->
Indexing.compile marshall ~output ~warnings_options ~lib_roots ~page_roots
~inputs_in_file ~odocls:inputs
Indexing.compile marshall ~output ~warnings_options ~occurrences ~lib_roots
~page_roots ~inputs_in_file ~odocls:inputs
let cmd =
let dst =
let doc =
Expand All @@ -546,6 +546,13 @@ module Indexing = struct
Arg.(
value & opt (some string) None & info ~docs ~docv:"PATH" ~doc [ "o" ])
in
let occurrences =
let doc = "Occurrence file." in
Arg.(
value
& opt (some convert_fpath) None
& info ~docs ~docv:"PATH" ~doc [ "occurrences" ])
in
let inputs_in_file =
let doc =
"Input text file containing a line-separated list of paths to .odocl \
Expand Down Expand Up @@ -587,7 +594,7 @@ module Indexing = struct
Term.(
const handle_error
$ (const index $ dst $ json $ warnings_options $ page_roots $ lib_roots
$ inputs_in_file $ inputs))
$ inputs_in_file $ inputs $ occurrences))

let info ~docs =
let doc =
Expand Down
1 change: 1 addition & 0 deletions src/odoc/dune
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
odoc_model
odoc_json_index
odoc_xref2
odoc_occurrences
tyxml
unix)
(instrumentation
Expand Down
20 changes: 15 additions & 5 deletions src/odoc/indexing.ml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ let parse_input_files input =
(Ok []) input
>>= fun files -> Ok (List.concat files)

let compile_to_json ~output ~warnings_options files =
let compile_to_json ~output ~warnings_options ~occurrences files =
let output_channel =
Fs.Directory.mkdir_p (Fs.File.dirname output);
open_out_bin (Fs.File.to_string output)
Expand All @@ -57,7 +57,7 @@ let compile_to_json ~output ~warnings_options files =
(fun acc file ->
match
handle_file
~unit:(print Json_search.unit acc)
~unit:(print (Json_search.unit ?occurrences) acc)
~page:(print Json_search.page acc)
~occ:(print Json_search.index acc)
file
Expand Down Expand Up @@ -110,13 +110,23 @@ let compile_to_marshall ~output ~warnings_options sidebar files =
result |> Error.handle_warnings ~warnings_options >>= fun () ->
Ok (Odoc_file.save_index output (sidebar, final_index))

let read_occurrences file =
let ic = open_in_bin file in
let htbl : Odoc_occurrences.Table.t = Marshal.from_channel ic in
htbl

open Odoc_model.Lang.Sidebar

let compile out_format ~output ~warnings_options ~lib_roots ~page_roots
~inputs_in_file ~odocls =
let compile out_format ~output ~warnings_options ~occurrences ~lib_roots
~page_roots ~inputs_in_file ~odocls =
let current_dir = Fs.File.dirname output in
parse_input_files inputs_in_file >>= fun files ->
let files = List.rev_append odocls files in
let occurrences =
match occurrences with
| None -> None
| Some occurrences -> Some (read_occurrences (Fpath.to_string occurrences))
in
let resolver =
Resolver.create ~important_digests:false ~directories:[]
~roots:
Expand Down Expand Up @@ -175,5 +185,5 @@ let compile out_format ~output ~warnings_options ~lib_roots ~page_roots
in
let content = { pages; libraries } in
match out_format with
| `JSON -> compile_to_json ~output ~warnings_options files
| `JSON -> compile_to_json ~output ~warnings_options ~occurrences files
| `Marshall -> compile_to_marshall ~output ~warnings_options content files
1 change: 1 addition & 0 deletions src/odoc/indexing.mli
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ val compile :
[ `JSON | `Marshall ] ->
output:Fs.file ->
warnings_options:Odoc_model.Error.warnings_options ->
occurrences:Fs.file option ->
lib_roots:(string * Fs.directory) list ->
page_roots:(string * Fs.directory) list ->
inputs_in_file:Fs.file list ->
Expand Down
Loading
Loading