v0.1.5 (#44)

* bump prettier v3 * more descriptive var names * v0.1.5 * fix ruff PT011 * GH Pages npm install --force
janosh · Jul 7, 2023 · 82037a8 · 82037a8
1 parent c03d741
commit 82037a8
Show file tree

Hide file tree

Showing 14 changed files with 50 additions and 47 deletions.
diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml
@@ -13,6 +13,7 @@ jobs:
     with:
       python-version: "3.11"
       working-directory: site
+      install-cmd: npm install --force
       pre-build: |
         pip install lazydocs
         # lazydocs needs package deps to be installed

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,7 +7,7 @@ default_install_hook_types: [pre-commit, commit-msg]
 
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.275
+    rev: v0.0.277
     hooks:
       - id: ruff
         args: [--fix]
@@ -49,7 +49,7 @@ repos:
         args: [--ignore-words-list, "nd,te,fpr"]
 
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v3.0.0-alpha.9-for-vscode
+    rev: v3.0.0
     hooks:
       - id: prettier
         args: [--write] # edit files in-place

diff --git a/changelog.md b/changelog.md
@@ -2,8 +2,12 @@
 
 All notable changes to this project will be documented in this file. Dates are displayed in UTC.
 
-#### [v1.0.0](https://github.com/janosh/matbench-discovery/compare/v0.1.4...v1.0.0)
+#### [v0.1.5](https://github.com/janosh/matbench-discovery/compare/v0.1.4...v0.1.5)
 
+> 7 July 2023
+
+- v0.1.5 [`#44`](https://github.com/janosh/matbench-discovery/pull/44)
+- [pre-commit.ci] pre-commit autoupdate [`#42`](https://github.com/janosh/matbench-discovery/pull/42)
 - Update site to show ALIGNN results [`#38`](https://github.com/janosh/matbench-discovery/pull/38)
 - ALIGNN test on WBM data [`#37`](https://github.com/janosh/matbench-discovery/pull/37)
 - Add PDF versions of all SI figures [`#36`](https://github.com/janosh/matbench-discovery/pull/36)
@@ -18,6 +22,7 @@ All notable changes to this project will be documented in this file. Dates are d
 - Add `upload_to_figshare.py` and replace local data-loading with cached Figshare downloads [`#13`](https://github.com/janosh/matbench-discovery/pull/13)
 - DRY workflows [`#8`](https://github.com/janosh/matbench-discovery/pull/8)
 - Add `Files`, `DataFiles` and `PredFiles` classes in `matbench_discovery/data.py` [`#7`](https://github.com/janosh/matbench-discovery/pull/7)
+- fix path issues and using JSON pandas reader for CSV files in train_(cgcnn|wrenformer).py (closes #43) [`#43`](https://github.com/janosh/matbench-discovery/issues/43)
 - Fix KeyError('wyckoff_spglib') in `fetch_process_wbm_dataset.py` (#34) [`#32`](https://github.com/janosh/matbench-discovery/issues/32) [`#33`](https://github.com/janosh/matbench-discovery/issues/33)
 - Fix `compute_struct_fingerprints.py`: cannot insert `material_id`, already exists (#31) [`#30`](https://github.com/janosh/matbench-discovery/issues/30)
 - Fix `fetch_process_wbm_dataset.py` unnecessarily loading WBM initial structures from disk (#29) [`#28`](https://github.com/janosh/matbench-discovery/issues/28)
@@ -38,6 +43,7 @@ All notable changes to this project will be documented in this file. Dates are d
 - add scripts/compute_struct_fingerprints.py to generate matminer SiteStats fingerprints for all MP+WBM structures [`7946b5e`](https://github.com/janosh/matbench-discovery/commit/7946b5e433713ef8f46b839562f09d889331e63b)
 - split model run times into train and test contribs [`b8a18d8`](https://github.com/janosh/matbench-discovery/commit/b8a18d8fd357ea4ec77dae837fdb8317fe978b28)
 - add models/chgnet/{test_chgnet,join_m3gnet_results}.py [`da39074`](https://github.com/janosh/matbench-discovery/commit/da390741b0d0f03fbcca5bcbac725c2211a80427)
+- git surgery purging all large CSV files from repo history [`4a90dee`](https://github.com/janosh/matbench-discovery/commit/4a90dee85965aebbcfcd547353c548eeb19651ce)
 - add scripts/compute_projections.py [`eec1cb4`](https://github.com/janosh/matbench-discovery/commit/eec1cb4073c2db65cd238157f656af963582ffc5)
 - make horizontal versions of roc-models-2x4.pdf and model-run-times-bar.pdf [`551050e`](https://github.com/janosh/matbench-discovery/commit/551050e17cc98cf786707448fd304bb06177a914)
 - add changelog.md [`0621282`](https://github.com/janosh/matbench-discovery/commit/0621282b0ed6ca397b6293e8d6f2bcbaac736468)
@@ -73,12 +79,11 @@ All notable changes to this project will be documented in this file. Dates are d
 - tweak contributing guide [`b3ea8d3`](https://github.com/janosh/matbench-discovery/commit/b3ea8d3980d62032ed33ae75fb0c7539fafcebe3)
 - upload @pbenner's ALIGNN checkpoint to Figshare and link from models/alignn/readme.md [`da56377`](https://github.com/janosh/matbench-discovery/commit/da5637725e3f785cdf8bd82ff6a3be95858b2eae)
 - start preprint [`fd8355d`](https://github.com/janosh/matbench-discovery/commit/fd8355dca9fe87ef9196980c06907ef377fdcd97)
-- git surgery purging all large CSV files from repo history [`873fa75`](https://github.com/janosh/matbench-discovery/commit/873fa7559fab539f2f1fe8fe6557d5c530b5aabc)
 - rename pages /about-test-test-set to /about-the-data and /how-to-contribute to /contribute [`e41ca32`](https://github.com/janosh/matbench-discovery/commit/e41ca32e768ea06f404b216353521f89e6bee2e2)
 
 #### v0.1.4
 
-> 24 February 2023
+> 19 June 2023
 
 - Add WBM element heatmap to site and data/wbm/readme.md [`#6`](https://github.com/janosh/matbench-discovery/pull/6)
 - doc: remove reference to alignn [`#4`](https://github.com/janosh/matbench-discovery/pull/4)

diff --git a/data/mp/get_mp_energies.py b/data/mp/get_mp_energies.py
@@ -58,19 +58,20 @@
 # %%
 df_cse = pd.read_json(DATA_FILES.mp_computed_structure_entries).set_index("material_id")
 
-df_cse["structure"] = [
-    Structure.from_dict(cse["structure"]) for cse in tqdm(df_cse.entry)
+struct_col = "structure"
+df_cse[struct_col] = [
+    Structure.from_dict(cse[struct_col]) for cse in tqdm(df_cse.entry)
 ]
-wyk_col = "wyckoff_spglib"
-df_cse[wyk_col] = [
+wyckoff_col = "wyckoff_spglib"
+df_cse[wyckoff_col] = [
     get_aflow_label_from_spglib(struct, errors="ignore")
     for struct in tqdm(df_cse.structure)
 ]
 # make sure symmetry detection succeeded for all structures
-assert df_cse[wyk_col].str.startswith("invalid").sum() == 0
-df[wyk_col] = df_cse[wyk_col]
+assert df_cse[wyckoff_col].str.startswith("invalid").sum() == 0
+df[wyckoff_col] = df_cse[wyckoff_col]
 
-spg_nums = df[wyk_col].str.split("_").str[2].astype(int)
+spg_nums = df[wyckoff_col].str.split("_").str[2].astype(int)
 # make sure all our spacegroup numbers match MP's
 assert (spg_nums.sort_index() == df_spg["number"].sort_index()).all()
 

diff --git a/data/wbm/eda.py b/data/wbm/eda.py
@@ -235,9 +235,9 @@
 
 
 # %%
-wyk_col, spg_col = "wyckoff_spglib", "spacegroup"
-df_wbm[spg_col] = df_wbm[wyk_col].str.split("_").str[2].astype(int)
-df_mp[spg_col] = df_mp[wyk_col].str.split("_").str[2].astype(int)
+wyckoff_col, spg_col = "wyckoff_spglib", "spacegroup"
+df_wbm[spg_col] = df_wbm[wyckoff_col].str.split("_").str[2].astype(int)
+df_mp[spg_col] = df_mp[wyckoff_col].str.split("_").str[2].astype(int)
 
 
 # %%

diff --git a/models/cgcnn/train_cgcnn.py b/models/cgcnn/train_cgcnn.py
@@ -64,7 +64,7 @@
 df_in = pd.read_csv(data_path).set_index(id_col)
 
 df_cse = pd.read_json(DATA_FILES.mp_computed_structure_entries).set_index(id_col)
-df_in[input_col] = [Structure.from_dict(cse["structure"]) for cse in tqdm(df_cse.entry)]
+df_in[input_col] = [Structure.from_dict(cse[input_col]) for cse in tqdm(df_cse.entry)]
 
 assert target_col in df_in
 

diff --git a/models/wrenformer/analyze_wrenformer.py b/models/wrenformer/analyze_wrenformer.py
@@ -27,16 +27,16 @@
 ).index
 
 spg_col = "spacegroup"
-wyk_col = "wyckoff_spglib"
-df_wbm[spg_col] = df_wbm[wyk_col].str.split("_").str[2].astype(int)
+wyckoff_col = "wyckoff_spglib"
+df_wbm[spg_col] = df_wbm[wyckoff_col].str.split("_").str[2].astype(int)
 df_bad = df_wbm.loc[bad_ids]
 title = f"{len(df_bad)} {model} preds<br>with {max_each_true=}, {min_each_pred=}"
 
 
 # %%
 df_mp = pd.read_csv(DATA_FILES.mp_energies).set_index("material_id")
-df_mp[spg_col] = df_mp[wyk_col].str.split("_").str[2].astype(int)
-df_mp["isopointal_proto_from_aflow"] = df_mp[wyk_col].map(
+df_mp[spg_col] = df_mp[wyckoff_col].str.split("_").str[2].astype(int)
+df_mp["isopointal_proto_from_aflow"] = df_mp[wyckoff_col].map(
     get_isopointal_proto_from_aflow
 )
 df_mp.isopointal_proto_from_aflow.value_counts().head(12)
@@ -51,7 +51,7 @@
 # %%
 proto_col = "Isopointal Prototypes"
 df_proto_counts = (
-    df_bad[wyk_col].map(get_isopointal_proto_from_aflow).value_counts().to_frame()
+    df_bad[wyckoff_col].map(get_isopointal_proto_from_aflow).value_counts().to_frame()
 )
 
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "matbench-discovery"
-version = "0.1.4"
+version = "0.1.5"
 description = "A benchmark for machine learning energy models on inorganic crystal stability prediction from unrelaxed structures"
 authors = [{ name = "Janosh Riebesell", email = "[email protected]" }]
 readme = "readme.md"
@@ -124,7 +124,6 @@ ignore = [
   "PLR",     # pylint refactor
   "PLW2901", # redefined-loop-name
   "PT006",   # pytest-parametrize-names-wrong-type
-  "PT011",   # pytest-raises-too-broad
   "PT013",   # pytest-incorrect-pytest-import
 ]
 pydocstyle.convention = "google"

diff --git a/site/package.json b/site/package.json
@@ -20,7 +20,7 @@
     "@iconify/svelte": "^3.1.4",
     "@rollup/plugin-yaml": "^4.1.1",
     "@sveltejs/adapter-static": "^2.0.2",
-    "@sveltejs/kit": "^1.21.0",
+    "@sveltejs/kit": "^1.22.1",
     "@sveltejs/vite-plugin-svelte": "^2.4.2",
     "@typescript-eslint/eslint-plugin": "^5.61.0",
     "@typescript-eslint/parser": "^5.61.0",
@@ -32,22 +32,22 @@
     "js-yaml": "^4.1.0",
     "katex": "^0.16.8",
     "mdsvex": "^0.11.0",
-    "prettier": "^2.8.8",
+    "prettier": "^3.0.0",
     "prettier-plugin-svelte": "^2.10.1",
     "rehype-autolink-headings": "^6.1.1",
     "rehype-katex-svelte": "^1.2.0",
     "rehype-slug": "^5.1.0",
     "remark-math": "3.0.0",
-    "svelte": "^4.0.3",
-    "svelte-check": "^3.4.4",
+    "svelte": "^4.0.5",
+    "svelte-check": "^3.4.5",
     "svelte-multiselect": "^10.0.0",
     "svelte-preprocess": "^5.0.4",
     "svelte-toc": "^0.5.5",
     "svelte-zoo": "^0.4.8",
-    "svelte2tsx": "^0.6.16",
+    "svelte2tsx": "^0.6.19",
     "tslib": "^2.6.0",
     "typescript": "5.1.6",
-    "vite": "^4.3.9"
+    "vite": "^4.4.2"
   },
   "prettier": {
     "semi": false,

diff --git a/site/src/app.html b/site/src/app.html
@@ -1,4 +1,4 @@
-<!DOCTYPE html>
+<!doctype html>
 <html lang="en">
   <head>
     <meta name="author" content="Janosh Riebesell" />

diff --git a/site/src/routes/changelog/+page.server.ts b/site/src/routes/changelog/+page.server.ts
@@ -11,6 +11,6 @@ const section_level = (str: string) => str.replaceAll(`###`, `#`)
 
 export const load = async () => ({
   changelog: compile(
-    backticks_to_lt_gt(brace_to_paren(section_level(changelog)))
+    backticks_to_lt_gt(brace_to_paren(section_level(changelog))),
   ),
 })
diff --git a/site/svelte.config.js b/site/svelte.config.js
@@ -32,7 +32,7 @@ export default {
               `svg`,
               { width: 16, height: 16, viewBox: `0 0 16 16` },
               // symbol #octicon-link defined in app.html
-              s(`use`, { 'xlink:href': `#octicon-link` })
+              s(`use`, { 'xlink:href': `#octicon-link` }),
             ),
           },
         ],
@@ -59,7 +59,7 @@ export default {
               const idx = (route.startsWith(`si`) ? `S` : ``) + fig_index.size
               const link_icon = `<a aria-hidden="true" tabindex="-1" href="#${id}"><svg width="16" height="16" viewBox="0 0 16 16"><use xlink:href="#octicon-link"></use></svg></a>`
               return `<strong id='${id}'>${link_icon}Fig. ${idx}</strong>`
-            }
+            },
           )
 
           // Replace figure references @fig:label with 'fig. {n}' and add to fig_index
@@ -73,12 +73,12 @@ export default {
                 console.error(
                   `Figure id '${id}' not found, expected one of ${[
                     ...fig_index,
-                  ]}`
+                  ]}`,
                 )
                 idx = `not found`
               }
               return `<a href="#${id_lower}">${fig_or_Fig}. ${idx}</a>`
-            }
+            },
           )
 
           // preprocess markdown citations @auth_1st-word-title_yyyy into superscript
@@ -93,7 +93,7 @@ export default {
                 console.error(`Reference id ${id} not found`)
               }
               return `[<a class="ref" href="#${id}">${author} ${year}</a>]`
-            }
+            },
           )
 
           return { code }

diff --git a/tests/test_data.py b/tests/test_data.py
@@ -81,20 +81,17 @@ def test_load(
 
 def test_load_raises(tmp_path: Path) -> None:
     data_key = "bad-key"
-    with pytest.raises(ValueError) as exc_info:
+    with pytest.raises(ValueError) as exc:  # noqa: PT011
         load(data_key)
 
-    assert f"Unknown {data_key=}, must be one of {list(DATA_FILES)}" in str(
-        exc_info.value
-    )
+    assert f"Unknown {data_key=}, must be one of {list(DATA_FILES)}" in str(exc.value)
 
     version = "invalid-version"
-    with pytest.raises(ValueError) as exc_info:
+    with pytest.raises(ValueError) as exc:  # noqa: PT011
         load("wbm_summary", version=version, cache_dir=tmp_path)
 
     assert (
-        str(exc_info.value)
-        == f"Unexpected {version=}. Must be one of {figshare_versions}."
+        str(exc.value) == f"Unexpected {version=}. Must be one of {figshare_versions}."
     )
     assert os.listdir(tmp_path) == [], "cache_dir should be empty"
 

diff --git a/tests/test_plots.py b/tests/test_plots.py
@@ -70,15 +70,15 @@ def test_cumulative_metrics(
 
 
 def test_cumulative_metrics_raises() -> None:
-    with pytest.raises(ValueError) as exc_info:
+    with pytest.raises(
+        ValueError,
+        match="invalid_metrics={'invalid'}, should be case-insensitive subset of",
+    ):
         cumulative_metrics(
             e_above_hull_true=df_wbm[each_true_col],
             df_preds=df_wbm[models],
             metrics=("invalid",),
         )
-    assert "invalid_metrics={'invalid'}, should be case-insensitive subset of" in str(
-        exc_info.value
-    )
 
 
 @pytest.mark.parametrize("window", [0.02, 0.002])