diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index 6d0d90e6..6ec42f2e 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -13,6 +13,7 @@ jobs: with: python-version: "3.11" working-directory: site + install-cmd: npm install --force pre-build: | pip install lazydocs # lazydocs needs package deps to be installed diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3ae3270c..586b2ac3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ default_install_hook_types: [pre-commit, commit-msg] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.275 + rev: v0.0.277 hooks: - id: ruff args: [--fix] @@ -49,7 +49,7 @@ repos: args: [--ignore-words-list, "nd,te,fpr"] - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.0-alpha.9-for-vscode + rev: v3.0.0 hooks: - id: prettier args: [--write] # edit files in-place diff --git a/changelog.md b/changelog.md index 72cb9204..ba7c2157 100644 --- a/changelog.md +++ b/changelog.md @@ -2,8 +2,12 @@ All notable changes to this project will be documented in this file. Dates are displayed in UTC. -#### [v1.0.0](https://github.com/janosh/matbench-discovery/compare/v0.1.4...v1.0.0) +#### [v0.1.5](https://github.com/janosh/matbench-discovery/compare/v0.1.4...v0.1.5) +> 7 July 2023 + +- v0.1.5 [`#44`](https://github.com/janosh/matbench-discovery/pull/44) +- [pre-commit.ci] pre-commit autoupdate [`#42`](https://github.com/janosh/matbench-discovery/pull/42) - Update site to show ALIGNN results [`#38`](https://github.com/janosh/matbench-discovery/pull/38) - ALIGNN test on WBM data [`#37`](https://github.com/janosh/matbench-discovery/pull/37) - Add PDF versions of all SI figures [`#36`](https://github.com/janosh/matbench-discovery/pull/36) @@ -18,6 +22,7 @@ All notable changes to this project will be documented in this file. Dates are d - Add `upload_to_figshare.py` and replace local data-loading with cached Figshare downloads [`#13`](https://github.com/janosh/matbench-discovery/pull/13) - DRY workflows [`#8`](https://github.com/janosh/matbench-discovery/pull/8) - Add `Files`, `DataFiles` and `PredFiles` classes in `matbench_discovery/data.py` [`#7`](https://github.com/janosh/matbench-discovery/pull/7) +- fix path issues and using JSON pandas reader for CSV files in train_(cgcnn|wrenformer).py (closes #43) [`#43`](https://github.com/janosh/matbench-discovery/issues/43) - Fix KeyError('wyckoff_spglib') in `fetch_process_wbm_dataset.py` (#34) [`#32`](https://github.com/janosh/matbench-discovery/issues/32) [`#33`](https://github.com/janosh/matbench-discovery/issues/33) - Fix `compute_struct_fingerprints.py`: cannot insert `material_id`, already exists (#31) [`#30`](https://github.com/janosh/matbench-discovery/issues/30) - Fix `fetch_process_wbm_dataset.py` unnecessarily loading WBM initial structures from disk (#29) [`#28`](https://github.com/janosh/matbench-discovery/issues/28) @@ -38,6 +43,7 @@ All notable changes to this project will be documented in this file. Dates are d - add scripts/compute_struct_fingerprints.py to generate matminer SiteStats fingerprints for all MP+WBM structures [`7946b5e`](https://github.com/janosh/matbench-discovery/commit/7946b5e433713ef8f46b839562f09d889331e63b) - split model run times into train and test contribs [`b8a18d8`](https://github.com/janosh/matbench-discovery/commit/b8a18d8fd357ea4ec77dae837fdb8317fe978b28) - add models/chgnet/{test_chgnet,join_m3gnet_results}.py [`da39074`](https://github.com/janosh/matbench-discovery/commit/da390741b0d0f03fbcca5bcbac725c2211a80427) +- git surgery purging all large CSV files from repo history [`4a90dee`](https://github.com/janosh/matbench-discovery/commit/4a90dee85965aebbcfcd547353c548eeb19651ce) - add scripts/compute_projections.py [`eec1cb4`](https://github.com/janosh/matbench-discovery/commit/eec1cb4073c2db65cd238157f656af963582ffc5) - make horizontal versions of roc-models-2x4.pdf and model-run-times-bar.pdf [`551050e`](https://github.com/janosh/matbench-discovery/commit/551050e17cc98cf786707448fd304bb06177a914) - add changelog.md [`0621282`](https://github.com/janosh/matbench-discovery/commit/0621282b0ed6ca397b6293e8d6f2bcbaac736468) @@ -73,12 +79,11 @@ All notable changes to this project will be documented in this file. Dates are d - tweak contributing guide [`b3ea8d3`](https://github.com/janosh/matbench-discovery/commit/b3ea8d3980d62032ed33ae75fb0c7539fafcebe3) - upload @pbenner's ALIGNN checkpoint to Figshare and link from models/alignn/readme.md [`da56377`](https://github.com/janosh/matbench-discovery/commit/da5637725e3f785cdf8bd82ff6a3be95858b2eae) - start preprint [`fd8355d`](https://github.com/janosh/matbench-discovery/commit/fd8355dca9fe87ef9196980c06907ef377fdcd97) -- git surgery purging all large CSV files from repo history [`873fa75`](https://github.com/janosh/matbench-discovery/commit/873fa7559fab539f2f1fe8fe6557d5c530b5aabc) - rename pages /about-test-test-set to /about-the-data and /how-to-contribute to /contribute [`e41ca32`](https://github.com/janosh/matbench-discovery/commit/e41ca32e768ea06f404b216353521f89e6bee2e2) #### v0.1.4 -> 24 February 2023 +> 19 June 2023 - Add WBM element heatmap to site and data/wbm/readme.md [`#6`](https://github.com/janosh/matbench-discovery/pull/6) - doc: remove reference to alignn [`#4`](https://github.com/janosh/matbench-discovery/pull/4) diff --git a/data/mp/get_mp_energies.py b/data/mp/get_mp_energies.py index 13b636d1..730ec094 100644 --- a/data/mp/get_mp_energies.py +++ b/data/mp/get_mp_energies.py @@ -58,19 +58,20 @@ # %% df_cse = pd.read_json(DATA_FILES.mp_computed_structure_entries).set_index("material_id") -df_cse["structure"] = [ - Structure.from_dict(cse["structure"]) for cse in tqdm(df_cse.entry) +struct_col = "structure" +df_cse[struct_col] = [ + Structure.from_dict(cse[struct_col]) for cse in tqdm(df_cse.entry) ] -wyk_col = "wyckoff_spglib" -df_cse[wyk_col] = [ +wyckoff_col = "wyckoff_spglib" +df_cse[wyckoff_col] = [ get_aflow_label_from_spglib(struct, errors="ignore") for struct in tqdm(df_cse.structure) ] # make sure symmetry detection succeeded for all structures -assert df_cse[wyk_col].str.startswith("invalid").sum() == 0 -df[wyk_col] = df_cse[wyk_col] +assert df_cse[wyckoff_col].str.startswith("invalid").sum() == 0 +df[wyckoff_col] = df_cse[wyckoff_col] -spg_nums = df[wyk_col].str.split("_").str[2].astype(int) +spg_nums = df[wyckoff_col].str.split("_").str[2].astype(int) # make sure all our spacegroup numbers match MP's assert (spg_nums.sort_index() == df_spg["number"].sort_index()).all() diff --git a/data/wbm/eda.py b/data/wbm/eda.py index 99d1fea7..d453f8dc 100644 --- a/data/wbm/eda.py +++ b/data/wbm/eda.py @@ -235,9 +235,9 @@ # %% -wyk_col, spg_col = "wyckoff_spglib", "spacegroup" -df_wbm[spg_col] = df_wbm[wyk_col].str.split("_").str[2].astype(int) -df_mp[spg_col] = df_mp[wyk_col].str.split("_").str[2].astype(int) +wyckoff_col, spg_col = "wyckoff_spglib", "spacegroup" +df_wbm[spg_col] = df_wbm[wyckoff_col].str.split("_").str[2].astype(int) +df_mp[spg_col] = df_mp[wyckoff_col].str.split("_").str[2].astype(int) # %% diff --git a/models/cgcnn/train_cgcnn.py b/models/cgcnn/train_cgcnn.py index 8616f684..218e875a 100644 --- a/models/cgcnn/train_cgcnn.py +++ b/models/cgcnn/train_cgcnn.py @@ -64,7 +64,7 @@ df_in = pd.read_csv(data_path).set_index(id_col) df_cse = pd.read_json(DATA_FILES.mp_computed_structure_entries).set_index(id_col) -df_in[input_col] = [Structure.from_dict(cse["structure"]) for cse in tqdm(df_cse.entry)] +df_in[input_col] = [Structure.from_dict(cse[input_col]) for cse in tqdm(df_cse.entry)] assert target_col in df_in diff --git a/models/wrenformer/analyze_wrenformer.py b/models/wrenformer/analyze_wrenformer.py index f0712258..ac4be05d 100644 --- a/models/wrenformer/analyze_wrenformer.py +++ b/models/wrenformer/analyze_wrenformer.py @@ -27,16 +27,16 @@ ).index spg_col = "spacegroup" -wyk_col = "wyckoff_spglib" -df_wbm[spg_col] = df_wbm[wyk_col].str.split("_").str[2].astype(int) +wyckoff_col = "wyckoff_spglib" +df_wbm[spg_col] = df_wbm[wyckoff_col].str.split("_").str[2].astype(int) df_bad = df_wbm.loc[bad_ids] title = f"{len(df_bad)} {model} preds
with {max_each_true=}, {min_each_pred=}" # %% df_mp = pd.read_csv(DATA_FILES.mp_energies).set_index("material_id") -df_mp[spg_col] = df_mp[wyk_col].str.split("_").str[2].astype(int) -df_mp["isopointal_proto_from_aflow"] = df_mp[wyk_col].map( +df_mp[spg_col] = df_mp[wyckoff_col].str.split("_").str[2].astype(int) +df_mp["isopointal_proto_from_aflow"] = df_mp[wyckoff_col].map( get_isopointal_proto_from_aflow ) df_mp.isopointal_proto_from_aflow.value_counts().head(12) @@ -51,7 +51,7 @@ # %% proto_col = "Isopointal Prototypes" df_proto_counts = ( - df_bad[wyk_col].map(get_isopointal_proto_from_aflow).value_counts().to_frame() + df_bad[wyckoff_col].map(get_isopointal_proto_from_aflow).value_counts().to_frame() ) diff --git a/pyproject.toml b/pyproject.toml index 3cf29c06..c73cb874 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "matbench-discovery" -version = "0.1.4" +version = "0.1.5" description = "A benchmark for machine learning energy models on inorganic crystal stability prediction from unrelaxed structures" authors = [{ name = "Janosh Riebesell", email = "janosh@lbl.gov" }] readme = "readme.md" @@ -124,7 +124,6 @@ ignore = [ "PLR", # pylint refactor "PLW2901", # redefined-loop-name "PT006", # pytest-parametrize-names-wrong-type - "PT011", # pytest-raises-too-broad "PT013", # pytest-incorrect-pytest-import ] pydocstyle.convention = "google" diff --git a/site/package.json b/site/package.json index df7cda54..9692d919 100644 --- a/site/package.json +++ b/site/package.json @@ -20,7 +20,7 @@ "@iconify/svelte": "^3.1.4", "@rollup/plugin-yaml": "^4.1.1", "@sveltejs/adapter-static": "^2.0.2", - "@sveltejs/kit": "^1.21.0", + "@sveltejs/kit": "^1.22.1", "@sveltejs/vite-plugin-svelte": "^2.4.2", "@typescript-eslint/eslint-plugin": "^5.61.0", "@typescript-eslint/parser": "^5.61.0", @@ -32,22 +32,22 @@ "js-yaml": "^4.1.0", "katex": "^0.16.8", "mdsvex": "^0.11.0", - "prettier": "^2.8.8", + "prettier": "^3.0.0", "prettier-plugin-svelte": "^2.10.1", "rehype-autolink-headings": "^6.1.1", "rehype-katex-svelte": "^1.2.0", "rehype-slug": "^5.1.0", "remark-math": "3.0.0", - "svelte": "^4.0.3", - "svelte-check": "^3.4.4", + "svelte": "^4.0.5", + "svelte-check": "^3.4.5", "svelte-multiselect": "^10.0.0", "svelte-preprocess": "^5.0.4", "svelte-toc": "^0.5.5", "svelte-zoo": "^0.4.8", - "svelte2tsx": "^0.6.16", + "svelte2tsx": "^0.6.19", "tslib": "^2.6.0", "typescript": "5.1.6", - "vite": "^4.3.9" + "vite": "^4.4.2" }, "prettier": { "semi": false, diff --git a/site/src/app.html b/site/src/app.html index f812a2b0..f399ad85 100644 --- a/site/src/app.html +++ b/site/src/app.html @@ -1,4 +1,4 @@ - + diff --git a/site/src/routes/changelog/+page.server.ts b/site/src/routes/changelog/+page.server.ts index ec6e4061..dfc26bce 100644 --- a/site/src/routes/changelog/+page.server.ts +++ b/site/src/routes/changelog/+page.server.ts @@ -11,6 +11,6 @@ const section_level = (str: string) => str.replaceAll(`###`, `#`) export const load = async () => ({ changelog: compile( - backticks_to_lt_gt(brace_to_paren(section_level(changelog))) + backticks_to_lt_gt(brace_to_paren(section_level(changelog))), ), }) diff --git a/site/svelte.config.js b/site/svelte.config.js index 25b28568..c5926396 100644 --- a/site/svelte.config.js +++ b/site/svelte.config.js @@ -32,7 +32,7 @@ export default { `svg`, { width: 16, height: 16, viewBox: `0 0 16 16` }, // symbol #octicon-link defined in app.html - s(`use`, { 'xlink:href': `#octicon-link` }) + s(`use`, { 'xlink:href': `#octicon-link` }), ), }, ], @@ -59,7 +59,7 @@ export default { const idx = (route.startsWith(`si`) ? `S` : ``) + fig_index.size const link_icon = `` return `${link_icon}Fig. ${idx}` - } + }, ) // Replace figure references @fig:label with 'fig. {n}' and add to fig_index @@ -73,12 +73,12 @@ export default { console.error( `Figure id '${id}' not found, expected one of ${[ ...fig_index, - ]}` + ]}`, ) idx = `not found` } return `${fig_or_Fig}. ${idx}` - } + }, ) // preprocess markdown citations @auth_1st-word-title_yyyy into superscript @@ -93,7 +93,7 @@ export default { console.error(`Reference id ${id} not found`) } return `[${author} ${year}]` - } + }, ) return { code } diff --git a/tests/test_data.py b/tests/test_data.py index 5fea7948..ef14a641 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -81,20 +81,17 @@ def test_load( def test_load_raises(tmp_path: Path) -> None: data_key = "bad-key" - with pytest.raises(ValueError) as exc_info: + with pytest.raises(ValueError) as exc: # noqa: PT011 load(data_key) - assert f"Unknown {data_key=}, must be one of {list(DATA_FILES)}" in str( - exc_info.value - ) + assert f"Unknown {data_key=}, must be one of {list(DATA_FILES)}" in str(exc.value) version = "invalid-version" - with pytest.raises(ValueError) as exc_info: + with pytest.raises(ValueError) as exc: # noqa: PT011 load("wbm_summary", version=version, cache_dir=tmp_path) assert ( - str(exc_info.value) - == f"Unexpected {version=}. Must be one of {figshare_versions}." + str(exc.value) == f"Unexpected {version=}. Must be one of {figshare_versions}." ) assert os.listdir(tmp_path) == [], "cache_dir should be empty" diff --git a/tests/test_plots.py b/tests/test_plots.py index 8cfbcd97..64d8d6bc 100644 --- a/tests/test_plots.py +++ b/tests/test_plots.py @@ -70,15 +70,15 @@ def test_cumulative_metrics( def test_cumulative_metrics_raises() -> None: - with pytest.raises(ValueError) as exc_info: + with pytest.raises( + ValueError, + match="invalid_metrics={'invalid'}, should be case-insensitive subset of", + ): cumulative_metrics( e_above_hull_true=df_wbm[each_true_col], df_preds=df_wbm[models], metrics=("invalid",), ) - assert "invalid_metrics={'invalid'}, should be case-insensitive subset of" in str( - exc_info.value - ) @pytest.mark.parametrize("window", [0.02, 0.002])