From 2330b4f61546a845e66f9a814de08f6d93d47ed3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?misha=20=F0=9F=90=A6=E2=80=8D=F0=9F=94=A5?= <146671001+ovnanova@users.noreply.github.com> Date: Thu, 3 Oct 2024 19:32:22 -0700 Subject: [PATCH] i don't even know --- lib/hexpds/car/process.ex | 127 +++++++++++++++ lib/hexpds/car/writer.ex | 35 +++++ lib/hexpds/cid.ex | 60 ++++++++ lib/hexpds/mst.ex | 76 +++++---- lib/hexpds/mst/leaf.ex | 42 ++--- lib/hexpds/mst/node.ex | 19 +++ lib/hexpds/mst/util.ex | 315 +++++++++++++------------------------- lib/main.ex | 15 +- test/hexpds_mst_test.exs | 2 +- 9 files changed, 422 insertions(+), 269 deletions(-) create mode 100644 lib/hexpds/car/process.ex create mode 100644 lib/hexpds/car/writer.ex create mode 100644 lib/hexpds/mst/node.ex diff --git a/lib/hexpds/car/process.ex b/lib/hexpds/car/process.ex new file mode 100644 index 0000000..ab4b81f --- /dev/null +++ b/lib/hexpds/car/process.ex @@ -0,0 +1,127 @@ +defmodule Hexpds.Car.Writer.Process do + @moduledoc """ + GenServer process responsible for managing the CAR stream. + """ + + use GenServer + + alias Hexpds.CID + + @doc """ + Starts the CAR writer GenServer. + + ## Options + + - `:file_path` - The path to the CAR file to write to. + + ## Examples + + iex> {:ok, pid} = Hexpds.Car.Writer.Process.start_link(file_path: "output.car") + """ + def start_link(opts) do + GenServer.start_link(__MODULE__, opts, name: __MODULE__) + end + + @doc """ + Adds a block to the CAR stream. + + ## Parameters + + - `cid`: The CID of the data block. + - `serialized_entries`: The binary data to write. + + ## Returns + + - `:ok` on success. + - `{:error, reason}` on failure. + + ## Examples + + iex> Hexpds.Car.Writer.Process.put(pid, cid, serialized_data) + :ok + """ + @spec put(pid(), CID.t(), binary()) :: :ok | {:error, term()} + def put(pid, %CID{} = cid, serialized_entries) when is_binary(serialized_entries) do + GenServer.call(pid, {:put, cid, serialized_entries}) + end + + @doc """ + Finalizes the CAR stream + + ## Parameters + + - `pid`: The PID of the CAR writer GenServer. + + ## Returns + + - `:ok` on success. + - `{:error, reason}` on failure. + + ## Examples + + iex> Hexpds.Car.Writer.Process.finalize(pid) + :ok + """ + @spec finalize(pid()) :: :ok | {:error, term()} + def finalize(pid) do + GenServer.call(pid, :finalize) + end + + ## Server Callbacks + + @impl true + def init(opts) do + file_path = Keyword.fetch!(opts, :file_path) + + case File.open(file_path, [:write, :binary]) do + {:ok, file} -> + {:ok, %{file: file}} + + {:error, reason} -> + {:stop, {:cannot_open_file, reason}} + end + end + + @impl true + def handle_call({:put, cid, serialized_entries}, _from, state) do + case write_block(state.file, cid, serialized_entries) do + :ok -> + {:reply, :ok, state} + + {:error, reason} -> + {:reply, {:error, reason}, state} + end + end + + def handle_call(:finalize, _from, state) do + case File.close(state.file) do + :ok -> + {:reply, :ok, state} + + {:error, reason} -> + {:reply, {:error, reason}, state} + end + end + + @impl true + def terminate(_reason, state) do + File.close(state.file) + :ok + end + + ## Helper Functions + + defp write_block(file, %CID{} = cid, data) do + # TODO + + cid_string = Hexpds.CID.to_string(cid) + data_length = byte_size(data) + + write_data = "#{cid_string}\n#{data_length}\n#{data}" + + case IO.binwrite(file, write_data) do + :ok -> :ok + {:error, reason} -> {:error, reason} + end + end +end diff --git a/lib/hexpds/car/writer.ex b/lib/hexpds/car/writer.ex new file mode 100644 index 0000000..3c7a9aa --- /dev/null +++ b/lib/hexpds/car/writer.ex @@ -0,0 +1,35 @@ +defmodule Hexpds.Car.Writer do + @moduledoc """ + Module for writing data blocks to a CAR stream. + """ + + alias Hexpds.CID + + @doc """ + Writes a block to the CAR stream. + + ## Parameters + + - `car`: The CAR writer process or handle. + - `cid`: The CID of the data block. + - `serialized_entries`: The binary data to write. + + ## Returns + + - `:ok` on success. + - `{:error, reason}` on failure. + + ## Examples + + iex> Hexpds.Car.Writer.put(car_writer, cid, serialized_data) + :ok + """ + @spec put(car :: pid() | atom(), cid :: CID.t(), serialized_entries :: binary()) :: :ok | {:error, term()} + def put(car, %CID{} = cid, serialized_entries) when is_binary(serialized_entries) do + # TODO + GenServer.call(car, {:put, cid, serialized_entries}) + rescue + e -> + {:error, "Failed to write to CAR stream: #{inspect(e)}"} + end +end diff --git a/lib/hexpds/cid.ex b/lib/hexpds/cid.ex index a874032..63ecdc1 100644 --- a/lib/hexpds/cid.ex +++ b/lib/hexpds/cid.ex @@ -299,6 +299,7 @@ defmodule Hexpds.CID do do_encode(cid, encoding_id) end + @spec encode!(Hexpds.CID.t()) :: binary() @doc """ Encodes a CID as a Multibase encoded string. @@ -364,6 +365,7 @@ defmodule Hexpds.CID do do_encode_buffer(cid) end + @spec encode_buffer!(Hexpds.CID.t()) :: binary() @doc """ Encodes a CID as a raw buffer to be encoded with Multibase. @@ -772,7 +774,65 @@ defmodule Hexpds.CID do end end + @doc """ + Creates a new CID by hashing the given data using the specified hash algorithm and codec. + + ## Parameters + + - `hash_algorithm`: The hash algorithm to use (e.g., `:sha2_256`). + - `data`: The binary data to hash. + - `codec`: The codec to use for the CID (e.g., `"dag-cbor"`). + + ## Returns + + - `{:ok, CID.t()}` on success. + - `{:error, reason}` on failure. + """ + @spec create_cid(atom(), binary(), Multicodec.multi_codec()) :: {:ok, t()} | {:error, term()} + def create_cid(hash_algorithm, data, codec \\ @v0_codec) when is_atom(hash_algorithm) and is_binary(data) do + with {:ok, multihash} <- Multihash.encode(hash_algorithm, data), + {:ok, cid} <- cid(multihash, codec, @current_version) do + {:ok, cid} + else + {:error, reason} -> {:error, reason} + end + end + + @doc """ + Alias for `create_cid/3`. + """ + @spec new(atom(), binary()) :: {:ok, t()} | {:error, term()} + def new(hash_algorithm, data) do + create_cid(hash_algorithm, data) + end + + defimpl String.Chars, for: CID do def to_string(cid), do: CID.encode!(cid, :base32_lower) end + + @spec to_string(CID.t()) :: String.t() + def to_string(%CID{} = cid) do + encode!(cid, :base32_lower) + end + + @doc """ + Parses a CID string into a CID struct. + + Returns `{:ok, CID.t()}` on success, or `{:error, reason}` on failure. + """ + @spec from_string(String.t()) :: {:ok, t()} | {:error, term()} + def from_string(cid_string) when is_binary(cid_string) do + decode_cid(cid_string) + end + + @doc """ + Parses a CID string into a CID struct. + + Raises an exception if the CID string is invalid. + """ + @spec from_string!(String.t()) :: t() + def from_string!(cid_string) do + decode_cid!(cid_string) + end end diff --git a/lib/hexpds/mst.ex b/lib/hexpds/mst.ex index ff1f882..8d6011b 100644 --- a/lib/hexpds/mst.ex +++ b/lib/hexpds/mst.ex @@ -4,10 +4,9 @@ defmodule Hexpds.MST do """ alias Hexpds.CID - alias Hexpds.DagCBOR.Internal alias Hexpds.MST.Storage alias Hexpds.MST.Util - alias Hexpds.MST.Leaf + alias Hexpds.MST.Node @max_entries 32 # Tune this @@ -112,9 +111,9 @@ defmodule Hexpds.MST do end def get_entries(%__MODULE__{pointer: %CID{} = cid, storage: storage} = mst) do - with {:ok, data} <- Storage.read_obj(cid), - {:ok, entries} <- Util.deserialize_node_data(data, storage, mst) do - {:ok, entries} + with {:ok, data} <- Storage.read_obj(storage, cid), + {:ok, entries} <- Util.deserialize_node_data(data, storage) do + {:ok, entries} else {:error, reason} -> {:error, reason} _ -> {:error, "No entries or CID provided"} @@ -159,10 +158,9 @@ defmodule Hexpds.MST do end def get_pointer(%__MODULE__{entries: entries, storage: storage} = mst) do - with {:ok, serialized_entries} <- Util.serialize_node_data(entries, storage), - {:ok, new_cid} <- CID.cid(serialized_entries, "dag-cbor", 1), - :ok <- Storage.put_block(storage, new_cid, serialized_entries) do - {:ok, new_cid} + with {:ok, new_cid} <- CID.create_cid(:sha2_256, entries, "dag-cbor"), + :ok <- Storage.put_block(storage, new_cid, entries) do + {:ok, new_cid} else {:error, reason} -> {:error, reason} _ -> {:error, "Failed to get pointer"} @@ -344,18 +342,18 @@ defmodule Hexpds.MST do # Private Helper Functions defp create_leaf(key, %CID{} = value) do - {:ok, %Leaf{key: key, value: value}} + {:ok, Hexpds.MST.Leaf.new(key, value)} end - defp insert_entry(%__MODULE__{entries: entries, layer: layer} = mst, %Leaf{} = new_leaf, key_zeros, layer) do + defp insert_entry(%__MODULE__{entries: entries, layer: layer} = mst, %Node{type: :leaf} = new_leaf, key_zeros, layer) do # Find the index to insert the new leaf to keep entries sorted index = find_gt_or_equal_leaf_index(mst, new_leaf.key) case Enum.at(entries, index) do - %Leaf{key: existing_key} when existing_key == new_leaf.key -> + %Node{type: :leaf, key: existing_key} when existing_key == new_leaf.key -> {:error, "There is already a value at key: #{new_leaf.key}"} - %Leaf{} -> + %Node{type: :leaf} -> updated_entries = List.insert_at(entries, index, new_leaf) if length(updated_entries) > @max_entries do @@ -374,7 +372,7 @@ defmodule Hexpds.MST do new_tree(mst, updated_entries) end - %__MODULE__{} -> + %Node{type: :internal} -> # Handle subtree insertion if necessary {:error, "Subtree insertion not implemented yet"} @@ -387,6 +385,7 @@ defmodule Hexpds.MST do {:error, "Incompatible layer for insertion"} end + defp get_layer(%__MODULE__{layer: layer}) when not is_nil(layer), do: {:ok, layer} defp get_layer(mst) do @@ -403,7 +402,7 @@ defmodule Hexpds.MST do end end - defp extract_value(%Leaf{key: key, value: value}, search_key) when key == search_key do + defp extract_value(%Node{type: :leaf, key: key, value: value}, search_key) when key == search_key do {:ok, value} end @@ -413,8 +412,8 @@ defmodule Hexpds.MST do defp extract_value(_, _), do: {:error, "Invalid entry type"} - defp update_entry(mst, %Leaf{key: key}, key, %CID{} = new_value) do - new_leaf = %Leaf{key: key, value: new_value} + defp update_entry(mst, %Node{type: :leaf, key: key}, key, %CID{} = new_value) do + new_leaf = %Node{type: :leaf, key: key, value: new_value, pointer: nil} {:ok, updated_entries} = replace_entry(mst, key, new_leaf) {:ok, new_tree(mst, updated_entries)} end @@ -424,7 +423,7 @@ defmodule Hexpds.MST do defp replace_entry(%__MODULE__{entries: entries} = mst, key, new_leaf) do updated_entries = Enum.map(entries, fn - %Leaf{key: ^key} -> new_leaf + %Node{type: :leaf, key: ^key} -> new_leaf other -> other end) @@ -443,9 +442,9 @@ defmodule Hexpds.MST do end end - defp remove_entry(%__MODULE__{entries: entries} = mst, %Leaf{key: key}, key) do + defp remove_entry(%__MODULE__{entries: entries} = mst, %Node{type: :leaf, key: key}, key) do updated_entries = Enum.reject(entries, fn - %Leaf{key: ^key} -> true + %Node{type: :leaf, key: ^key} -> true _ -> false end) @@ -484,7 +483,7 @@ defmodule Hexpds.MST do defp traverse_entries([], acc, _after_key), do: Enum.reverse(acc) - defp traverse_entries([%Leaf{} = leaf | rest], acc, after_key) do + defp traverse_entries([%Node{type: :leaf} = leaf | rest], acc, after_key) do if after_key == nil or leaf.key > after_key do traverse_entries(rest, [leaf | acc], after_key) else @@ -516,14 +515,24 @@ defmodule Hexpds.MST do end end - defp write_node(mst, car) do - with {:ok, serialized_entries} <- Util.serialize_node_data(mst.entries, mst.storage), - {:ok, cid} <- CID.cid(serialized_entries, "dag-cbor", 1), - :ok <- Hexpds.Car.Writer.put(car, cid, serialized_entries) do - {:ok, :written} + def write_node(%__MODULE__{entries: entries, storage: storage} = mst, car) do + # First, write child nodes + Enum.each(entries, fn + %__MODULE__{} = subtree -> + write_node(subtree, car) + %Node{type: :internal, pointer: pointer} -> + subtree = load(storage, pointer) + write_node(subtree, car) + _ -> :ok + end) + + # Then, write the current node + with {:ok, serialized_entries} <- Util.serialize_node_data(entries, storage), + {:ok, cid} <- CID.create_cid(:sha2_256, serialized_entries, "dag-cbor"), + :ok <- Hexpds.Car.Writer.put(car, cid, serialized_entries) do + :ok else {:error, reason} -> {:error, reason} - _ -> {:error, "Failed to write node to CAR stream"} end end @@ -531,7 +540,7 @@ defmodule Hexpds.MST do with {:ok, entries} <- get_entries(mst) do index = Enum.find_index(entries, fn - %Leaf{key: leaf_key} -> leaf_key >= key + %Node{type: :leaf, key: leaf_key} -> leaf_key >= key _ -> false end) @@ -563,12 +572,12 @@ defmodule Hexpds.MST do end end - defp get_entry_key(%Leaf{key: key}), do: key - defp get_entry_key(%__MODULE__{pointer: %CID{} = _cid}), do: "" + defp get_entry_key(%Node{type: :leaf, key: key}), do: key + defp get_entry_key(%Node{type: :internal, pointer: _pointer}), do: "" defp create_subtree(entries, storage) do # Create a new MST node with the given entries - with {:ok, subtree} <- new_tree(storage, entries), + with {:ok, subtree} <- new_tree(mst_from_storage(storage), entries), {:ok, subtree_cid} <- get_pointer(subtree), {:ok, serialized_entries} <- Util.serialize_node_data(entries, storage), :ok <- Storage.put_block(storage, subtree_cid, serialized_entries) do @@ -578,4 +587,9 @@ defmodule Hexpds.MST do _ -> {:error, "Failed to create subtree"} end end + + defp mst_from_storage(storage) do + # Helper function to create an MST instance from storage PID + %__MODULE__{storage: storage} + end end diff --git a/lib/hexpds/mst/leaf.ex b/lib/hexpds/mst/leaf.ex index b64a9fb..a8f59b8 100644 --- a/lib/hexpds/mst/leaf.ex +++ b/lib/hexpds/mst/leaf.ex @@ -3,34 +3,36 @@ defmodule Hexpds.MST.Leaf do Represents a leaf node in the MST """ + alias Hexpds.MST.Node alias Hexpds.CID - defstruct [:key, :value] - - @type t :: %__MODULE__{ + @type t :: %Node{ + type: :leaf, key: String.t(), - value: CID.t() + value: CID.t(), + pointer: nil } @doc """ - Checks if the given entry is a leaf - """ - @spec is_leaf(t()) :: true - def is_leaf(_leaf), do: true + Creates a new leaf node. - @doc """ - Checks if the given entry is a tree - """ - @spec is_tree(t()) :: false - def is_tree(_leaf), do: false + ## Parameters - @doc """ - Compares two leaf nodes for equality + - `key`: The key for the leaf. + - `value`: The CID of the value. + + ## Examples + + iex> Hexpds.MST.Leaf.new("key1", %CID{}) + %Hexpds.MST.Node{type: :leaf, key: "key1", value: %CID{}, pointer: nil} """ - @spec equals(t(), t()) :: boolean() - def equals(%__MODULE__{key: key1, value: value1}, %__MODULE__{key: key2, value: value2}) do - key1 == key2 and value1.multihash == value2.multihash + @spec new(String.t(), CID.t()) :: t() + def new(key, %CID{} = value) when is_binary(key) do + %Node{ + type: :leaf, + key: key, + value: value, + pointer: nil + } end - - def equals(_, _), do: false end diff --git a/lib/hexpds/mst/node.ex b/lib/hexpds/mst/node.ex new file mode 100644 index 0000000..cf9953b --- /dev/null +++ b/lib/hexpds/mst/node.ex @@ -0,0 +1,19 @@ +defmodule Hexpds.MST.Node do + @moduledoc """ + Type definitions for both internal MST nodes and leaf nodes + """ + + alias Hexpds.CID + + defstruct type: :internal, # `:internal` or `:leaf` + key: nil, # Only for leaf nodes + value: nil, # CID only for leaf nodes + pointer: nil # CID for internal nodes + + @type t :: %__MODULE__{ + type: :internal | :leaf, + key: String.t() | nil, + value: CID.t() | nil, + pointer: CID.t() | nil + } +end diff --git a/lib/hexpds/mst/util.ex b/lib/hexpds/mst/util.ex index a836b92..ad98cdb 100644 --- a/lib/hexpds/mst/util.ex +++ b/lib/hexpds/mst/util.ex @@ -1,31 +1,21 @@ defmodule Hexpds.MST.Util do @moduledoc """ - Utility function module for MST operations + Utility functions for MST operations. """ alias Hexpds.DagCBOR.Internal alias Hexpds.CID - alias Hexpds.MST + alias Hexpds.MST.Node require Logger @doc """ - Serializes MST node entries into a CBOR binary with prefix compression - - ## Parameters - - - `entries`: List of MST node entries (`%MST{}` or `%MST.Leaf{}` structs) - - `storage`: PID of the storage module - - ## Returns - - - `{:ok, binary}` on success - - `{:error, reason}` on failure + Serializes MST node entries into a CBOR binary with prefix compression. """ - @spec serialize_node_data([MST.node_entry()], pid()) :: {:ok, binary()} | {:error, term()} - def serialize_node_data(entries, storage) do + @spec serialize_node_data([Node.t()], pid()) :: {:ok, binary()} | {:error, term()} + def serialize_node_data(entries, _storage) do keys = Enum.map(entries, fn - %MST.Leaf{key: key} -> key - %MST{} -> "" # Subtrees use the common prefix + %Node{type: :leaf, key: key} -> key + %Node{type: :internal} -> "" end) common_prefix = longest_common_prefix(keys) @@ -33,182 +23,128 @@ defmodule Hexpds.MST.Util do serialized_entries = Enum.map(entries, fn - %MST{} = subtree -> + %Node{type: :internal, pointer: pointer} -> %{ "p" => prefix_length, "k" => nil, "v" => nil, - "t" => CID.encode!(subtree.pointer, :base32_lower) # Assuming `pointer` holds the CID + "t" => CID.to_string(pointer) } - %MST.Leaf{key: key, value: %CID{} = value} -> + %Node{type: :leaf, key: key, value: %CID{} = value} -> suffix = remove_prefix(key, common_prefix) %{ "p" => prefix_length, "k" => suffix, - "v" => CID.encode!(value, :base32_lower), + "v" => CID.to_string(value), "t" => nil } end) - # Construct the data map data_map = %{ "prefix" => common_prefix, "entries" => serialized_entries } - # Encode the data map to JSON string - case Jason.encode(data_map) do - {:ok, json_string} -> - # Call the Rust NIF to encode JSON to DAG-CBOR - case Internal.encode_dag_cbor(json_string) do - {:ok, cbor_binary} -> - {:ok, cbor_binary} - - {:error, reason} -> - {:error, "Failed to encode DAG-CBOR via NIF: #{reason}"} - end - - {:error, reason} -> - {:error, "Failed to encode data map to JSON: #{reason}"} + with {:ok, json_string} <- Jason.encode(data_map), + {:ok, cbor_binary} <- Internal.encode_dag_cbor(json_string) do + {:ok, cbor_binary} + else + {:error, reason} -> {:error, reason} end end @doc """ - Deserializes a CBOR binary into MST node entries with prefix decompression - - ## Parameters - - - `cbor`: Binary data in DAG-CBOR format - - `storage`: PID of the storage module - - `mst`: The current MST node (used for context) - - ## Returns - - - `{:ok, entries}` on success - - `{:error, reason}` on failure + Deserializes a CBOR binary into MST node entries with prefix decompression. """ - @spec deserialize_node_data(binary(), pid(), MST.t()) :: {:ok, [MST.node_entry()]} | {:error, term()} - def deserialize_node_data(cbor, storage, mst) do - # Call the Rust NIF to decode DAG-CBOR to JSON string - case Internal.decode_dag_cbor(cbor) do - {:ok, json_string} -> - # Parse the JSON string to a map - case Jason.decode(json_string) do - {:ok, %{"prefix" => common_prefix, "entries" => serialized_entries}} -> - # Reconstruct entries - reconstructed_entries = - Enum.map(serialized_entries, fn entry -> - cond do - Map.has_key?(entry, "t") and is_binary(entry["t"]) -> - # Subtree entry - case CID.decode(entry["t"]) do - {:ok, cid_struct} -> - # Load the subtree MST node - case MST.load(storage, cid_struct) do - %MST{} = subtree -> - subtree - - _ -> - Logger.error("Failed to load subtree with CID: #{entry["t"]}") - nil - end - - {:error, reason} -> - Logger.error("Failed to decode CID from string: #{entry["t"]}, reason: #{reason}") - nil - end - - Map.has_key?(entry, "k") and Map.has_key?(entry, "v") -> - # Leaf entry - full_key = common_prefix <> entry["k"] - - case CID.decode(entry["v"]) do - {:ok, value_cid} -> - %MST.Leaf{key: full_key, value: value_cid} - - {:error, reason} -> - Logger.error("Failed to decode value CID for key #{full_key}: #{reason}") - nil - end - - true -> - Logger.error("Invalid entry format: #{inspect(entry)}") - nil - end - end) - |> Enum.reject(&is_nil/1) - - {:ok, reconstructed_entries} - - {:error, reason} -> - {:error, "Failed to parse JSON string: #{reason}"} - - _ -> - {:error, "Malformed JSON data from DAG-CBOR decoding"} - end - - {:error, reason} -> - {:error, "Failed to decode DAG-CBOR via NIF: #{reason}"} + @spec deserialize_node_data(binary(), pid()) :: {:ok, [Node.t()]} | {:error, term()} + def deserialize_node_data(cbor, _storage) do + with {:ok, json_string} <- Internal.decode_dag_cbor(cbor), + {:ok, %{"prefix" => common_prefix, "entries" => serialized_entries}} <- Jason.decode(json_string) do + reconstructed_entries = + Enum.map(serialized_entries, fn entry -> + cond do + is_binary(entry["t"]) -> + with {:ok, cid_struct} <- CID.from_string(entry["t"]) do + %Node{type: :internal, pointer: cid_struct} + else + {:error, reason} -> + Logger.error("Failed to decode CID: #{reason}") + nil + end + + is_binary(entry["k"]) and is_binary(entry["v"]) -> + full_key = common_prefix <> entry["k"] + with {:ok, value_cid} <- CID.from_string(entry["v"]) do + %Node{type: :leaf, key: full_key, value: value_cid} + else + {:error, reason} -> + Logger.error("Failed to decode value CID: #{reason}") + nil + end + + true -> + Logger.error("Invalid entry format: #{inspect(entry)}") + nil + end + end) + |> Enum.reject(&is_nil/1) + + {:ok, reconstructed_entries} + else + {:error, reason} -> {:error, reason} end end @doc """ - Determines the longest common prefix among a list of strings - - ## Parameters - - - `keys`: List of strings - - ## Returns + Determines the appropriate layer based on the entries. + """ + @spec layer_for_entries([Node.t()]) :: {:ok, non_neg_integer()} | {:error, term()} + def layer_for_entries(entries) when is_list(entries) do + case entries do + [] -> {:error, :no_entries} + _ -> + leading_zeros_list = + entries + |> Enum.map(fn + %Node{type: :leaf, key: key} -> key + %Node{type: :internal, pointer: pointer} -> CID.to_string(pointer) + end) + |> Enum.map(&leading_zeros_on_hash/1) + |> Enum.map(fn + {:ok, count} -> count + {:error, _} -> 0 + end) + + {:ok, Enum.min(leading_zeros_list)} + end + end - - `String.t()`: The longest common prefix + @doc """ + Determines the longest common prefix among a list of strings. """ @spec longest_common_prefix([String.t()]) :: String.t() def longest_common_prefix([]), do: "" def longest_common_prefix([first | rest]) do - Enum.reduce(rest, first, fn str, acc -> - common_prefix(acc, str) - end) + Enum.reduce(rest, first, &common_prefix/2) end @doc """ - Finds the common prefix between two strings - - ## Parameters - - - `str1`: First string - - `str2`: Second string - - ## Returns - - - `String.t()`: The common prefix + Finds the common prefix between two strings. """ @spec common_prefix(String.t(), String.t()) :: String.t() def common_prefix(str1, str2) do - do_common_prefix(String.graphemes(str1), String.graphemes(str2), []) - |> Enum.reverse() + str1 + |> String.graphemes() + |> Enum.zip(String.graphemes(str2)) + |> Enum.take_while(fn {c1, c2} -> c1 == c2 end) + |> Enum.map(&elem(&1, 0)) |> Enum.join() end - defp do_common_prefix([h1 | t1], [h2 | t2], acc) when h1 == h2 do - do_common_prefix(t1, t2, [h1 | acc]) - end - - defp do_common_prefix(_, _, acc), do: acc - @doc """ - Removes the common prefix from a string - - ## Parameters - - - `str`: The original string - - `prefix`: The prefix to remove - - ## Returns - - - `String.t()`: The string after removing the prefix + Removes the common prefix from a string. """ @spec remove_prefix(String.t(), String.t()) :: String.t() def remove_prefix(str, prefix) do @@ -216,83 +152,40 @@ defmodule Hexpds.MST.Util do end @doc """ - Ensures that the provided MST key is valid - - ## Parameters - - - `key`: The key to validate - - ## Returns - - - `:ok` if the key is valid - - `{:error, reason}` if the key is invalid + Ensures that the provided MST key is valid. """ @spec ensure_valid_mst_key(String.t()) :: :ok | {:error, String.t()} - def ensure_valid_mst_key(key) when is_binary(key) and byte_size(key) > 0 do - # Example validation: keys must start with "key" followed by digits - if Regex.match?(~r/^key\d+$/, key) do - :ok - else - {:error, "Invalid MST key format: #{key}. Keys must match the pattern /^key\\d+$/."} - end - end - - def ensure_valid_mst_key(_key), do: {:error, "MST key must be a non-empty string."} + def ensure_valid_mst_key(key) when is_binary(key) and byte_size(key) > 0, do: :ok + def ensure_valid_mst_key(_), do: {:error, "MST key must be a non-empty string."} @doc """ - Calculates the number of leading zeros in the SHA-256 hash of the given key - - ## Parameters - - - `key`: The key to hash - - ## Returns - - - `{:ok, count}` where `count` is the number of leading zero bits - - `{:error, reason}` if hashing fails + Calculates the number of leading zeros in the SHA-256 hash of the given key. """ @spec leading_zeros_on_hash(String.t()) :: {:ok, non_neg_integer()} | {:error, String.t()} def leading_zeros_on_hash(key) when is_binary(key) do - # Compute SHA-256 hash - hash = :crypto.hash(:sha256, key) - - # Convert hash to bitstring - bitstring = :binary.bin_to_list(hash) - |> Enum.map(&Integer.to_string(&1, 2) |> String.pad_leading(8, "0")) - |> Enum.join() - - # Count leading zeros - leading_zeros = String.length(bitstring) - String.length(String.trim_leading(bitstring, "0")) - - {:ok, leading_zeros} - rescue - e -> - {:error, "Failed to compute leading zeros: #{inspect(e)}"} + try do + <> = :crypto.hash(:sha256, key) + leading_zeros = count_leading_zeros(hash) + {:ok, leading_zeros} + rescue + e -> + {:error, "Failed to compute leading zeros: #{inspect(e)}"} + end end - def leading_zeros_on_hash(_key), do: {:error, "Key must be a binary (string)."} + defp count_leading_zeros(<<0::1, rest::bitstring>>), do: 1 + count_leading_zeros(rest) + defp count_leading_zeros(_), do: 0 @doc """ - Generates a CID for the given entries - - ## Parameters - - - `entries`: List of MST node entries - - `storage`: PID of the storage module - - ## Returns - - - `{:ok, CID.t()}` on success - - `{:error, reason}` on failure + Generates a CID for the given entries. """ - @spec cid_for_entries([MST.node_entry()], pid()) :: {:ok, CID.t()} | {:error, term()} - def cid_for_entries(entries, storage) do - with {:ok, serialized} <- serialize_node_data(entries, storage), - {:ok, cid} <- CID.cid(serialized, "dag-cbor", 1) do + @spec cid_for_entries([Node.t()], pid()) :: {:ok, CID.t()} | {:error, term()} + def cid_for_entries(entries, _storage) do + with {:ok, serialized} <- serialize_node_data(entries, nil), + {:ok, cid} <- CID.create_cid(:sha256, serialized) do {:ok, cid} else {:error, reason} -> {:error, reason} - _ -> {:error, "Failed to generate CID for entries"} end end end diff --git a/lib/main.ex b/lib/main.ex index 5317f33..ae68119 100644 --- a/lib/main.ex +++ b/lib/main.ex @@ -5,13 +5,16 @@ defmodule Hexpds.Application do def start(_type, _args) do Hexpds.Database.Mnesia.create_tables() + children = [ + {Bandit, plug: Hexpds.Http, scheme: :http, port: Application.get_env(:hexpds, :port)}, + {Hexpds.Multicodec, Application.get_env(:hexpds, :multicodec_csv_path)}, + {Hexpds.Database, []}, + {Hexpds.Auth.Session.Cleaner, []}, + {Hexpds.Car.Writer.Process, file_path: "output.car"} + ] + Supervisor.start_link( - [ - {Bandit, plug: Hexpds.Http, scheme: :http, port: Application.get_env(:hexpds, :port)}, - {Hexpds.Multicodec, Application.get_env(:hexpds, :multicodec_csv_path)}, - {Hexpds.Database, []}, - {Hexpds.Auth.Session.Cleaner, []} - ], + children, strategy: :one_for_one, name: Hexpds.Supervisor ) diff --git a/test/hexpds_mst_test.exs b/test/hexpds_mst_test.exs index 3e99ba3..d21cf69 100644 --- a/test/hexpds_mst_test.exs +++ b/test/hexpds_mst_test.exs @@ -71,7 +71,7 @@ defmodule Hexpds.MSTTest do {:ok, serialized} = MST.Util.serialize_node_data(final_mst.entries, storage) # Deserialize into a new MST - {:ok, deserialized_entries} = MST.Util.deserialize_node_data(serialized, storage, final_mst) + {:ok, deserialized_entries} = MST.Util.deserialize_node_data(serialized, storage) {:ok, deserialized_mst} = MST.new_tree(final_mst, deserialized_entries) # Verify that all entries are present