From 7ddaaff295ff00242989668cabd578a0d7daf100 Mon Sep 17 00:00:00 2001 From: Robin Linacre Date: Mon, 20 Jan 2025 15:55:03 +0000 Subject: [PATCH] add modify settings exampel to cookbook --- docs/demos/examples/duckdb/cookbook.ipynb | 51 +++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/docs/demos/examples/duckdb/cookbook.ipynb b/docs/demos/examples/duckdb/cookbook.ipynb index 5b37fa6704..cee9f9b869 100644 --- a/docs/demos/examples/duckdb/cookbook.ipynb +++ b/docs/demos/examples/duckdb/cookbook.ipynb @@ -823,6 +823,57 @@ "first_unique_id = df.iloc[0].unique_id\n", "linker.evaluation.labelling_tool_for_specific_record(unique_id=first_unique_id, overwrite=True)\n" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Modifying settings after loading from a serialised `.json` model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import splink.comparison_library as cl\n", + "from splink import DuckDBAPI, Linker, SettingsCreator, block_on, splink_datasets\n", + "\n", + "# setup to create a model\n", + "\n", + "db_api = DuckDBAPI()\n", + "\n", + "df = splink_datasets.fake_1000\n", + "\n", + "settings = SettingsCreator(\n", + " link_type=\"dedupe_only\",\n", + " comparisons=[\n", + " cl.LevenshteinAtThresholds(\"first_name\"),\n", + " cl.LevenshteinAtThresholds(\"surname\"),\n", + "\n", + " ],\n", + " blocking_rules_to_generate_predictions=[\n", + " block_on(\"first_name\", \"dob\"),\n", + " block_on(\"surname\"),\n", + " ]\n", + ")\n", + "\n", + "linker = Linker(df, settings, db_api)\n", + "\n", + "\n", + "linker.misc.save_model_to_json(\"mod.json\", overwrite=True)\n", + "\n", + "new_settings = SettingsCreator.from_path_or_dict(\"mod.json\")\n", + "\n", + "new_settings.retain_intermediate_calculation_columns = True\n", + "new_settings.blocking_rules_to_generate_predictions = [\"1=1\"]\n", + "new_settings.additional_columns_to_retain = [\"cluster\"]\n", + "\n", + "linker = Linker(df, new_settings, DuckDBAPI())\n", + "\n", + "linker.inference.predict().as_duckdbpyrelation().show()" + ] } ], "metadata": {