Skip to content

Commit 244f9be

Browse files
committed
deploy: 5774ba4
0 parents  commit 244f9be

File tree

2,704 files changed

+450937
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,704 files changed

+450937
-0
lines changed

.gitignore

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
target/
2+
**/*.rs.bk
3+
Cargo.lock
4+
5+
.tox/
6+
build/
7+
dist/
8+
*.egg-info
9+
__pycache__/

.nojekyll

Whitespace-only changes.

_redirect.html

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<meta charset="utf-8" />
5+
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
6+
<meta http-equiv="refresh" content="0;URL=rascaline/index.html" />
7+
</head>
8+
<body></body>
9+
</html>

featomic-torch-v0.6.0-rc1/.buildinfo

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
# Sphinx build info version 1
2+
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3+
config: 03e11edcf884b4e6e1f62e583ec21792
4+
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,248 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"\n# Property Selection\n\n.. start-body\n"
8+
]
9+
},
10+
{
11+
"cell_type": "code",
12+
"execution_count": null,
13+
"metadata": {
14+
"collapsed": false
15+
},
16+
"outputs": [],
17+
"source": [
18+
"import chemfiles\nimport numpy as np\nfrom metatensor import Labels, MetatensorError, TensorBlock, TensorMap\nfrom skmatter.feature_selection import FPS\n\nfrom featomic import SoapPowerSpectrum"
19+
]
20+
},
21+
{
22+
"cell_type": "markdown",
23+
"metadata": {},
24+
"source": [
25+
"First we load the dataset with chemfiles\n\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": null,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [],
35+
"source": [
36+
"with chemfiles.Trajectory(\"dataset.xyz\") as trajectory:\n frames = [f for f in trajectory]"
37+
]
38+
},
39+
{
40+
"cell_type": "markdown",
41+
"metadata": {},
42+
"source": [
43+
"and define the hyper parameters of the representation\n\n"
44+
]
45+
},
46+
{
47+
"cell_type": "code",
48+
"execution_count": null,
49+
"metadata": {
50+
"collapsed": false
51+
},
52+
"outputs": [],
53+
"source": [
54+
"HYPER_PARAMETERS = {\n \"cutoff\": {\n \"radius\": 5.0,\n \"smoothing\": {\"type\": \"ShiftedCosine\", \"width\": 0.5},\n },\n \"density\": {\n \"type\": \"Gaussian\",\n \"width\": 0.3,\n },\n \"basis\": {\n \"type\": \"TensorProduct\",\n \"max_angular\": 4,\n \"radial\": {\"type\": \"Gto\", \"max_radial\": 6},\n },\n}\n\ncalculator = SoapPowerSpectrum(**HYPER_PARAMETERS)\n\ndescriptor = calculator.compute(frames)"
55+
]
56+
},
57+
{
58+
"cell_type": "markdown",
59+
"metadata": {},
60+
"source": [
61+
"The selections for feature can be a set of ``Labels``, in which case the names\nof the labels must be a subset of the names of the properties produced by the\ncalculator. You can see the default set of names with:\n\n"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": null,
67+
"metadata": {
68+
"collapsed": false
69+
},
70+
"outputs": [],
71+
"source": [
72+
"print(\"property names:\", descriptor.property_names)"
73+
]
74+
},
75+
{
76+
"cell_type": "markdown",
77+
"metadata": {},
78+
"source": [
79+
"We can use a subset of these names to define a selection. In this case, only\nproperties matching the labels in this selection will be used by featomic\n(here, only properties with ``l = 0`` will be used)\n\n"
80+
]
81+
},
82+
{
83+
"cell_type": "code",
84+
"execution_count": null,
85+
"metadata": {
86+
"collapsed": false
87+
},
88+
"outputs": [],
89+
"source": [
90+
"selection = Labels(\n names=[\"l\"],\n values=np.array([[0]]),\n)\nselected_descriptor = calculator.compute(frames, selected_properties=selection)\n\nselected_descriptor = selected_descriptor.keys_to_samples(\"center_type\")\nselected_descriptor = selected_descriptor.keys_to_properties(\n [\"neighbor_1_type\", \"neighbor_2_type\"]\n)\n\nproperties = selected_descriptor.block().properties"
91+
]
92+
},
93+
{
94+
"cell_type": "markdown",
95+
"metadata": {},
96+
"source": [
97+
"We expect to get `[0]` as the list of `l` properties\n\n"
98+
]
99+
},
100+
{
101+
"cell_type": "code",
102+
"execution_count": null,
103+
"metadata": {
104+
"collapsed": false
105+
},
106+
"outputs": [],
107+
"source": [
108+
"print(f\"we have the following angular components: {np.unique(properties['l'])}\")"
109+
]
110+
},
111+
{
112+
"cell_type": "markdown",
113+
"metadata": {},
114+
"source": [
115+
"The previous selection method uses the same selection for all blocks. If you\ncan to use different selection for different blocks, you should use a\n``TensorMap`` to create your selection\n\n"
116+
]
117+
},
118+
{
119+
"cell_type": "code",
120+
"execution_count": null,
121+
"metadata": {
122+
"collapsed": false
123+
},
124+
"outputs": [],
125+
"source": [
126+
"selected_descriptor = calculator.compute(frames, selected_properties=selection)\ndescriptor_for_comparison = calculator.compute(\n frames, selected_properties=selected_descriptor\n)"
127+
]
128+
},
129+
{
130+
"cell_type": "markdown",
131+
"metadata": {},
132+
"source": [
133+
"The descriptor had 180 properties stored in the first block, the\nselected_descriptor had 36. So ``descriptor_for_comparison`` will also have 36\nproperties.\n\n"
134+
]
135+
},
136+
{
137+
"cell_type": "code",
138+
"execution_count": null,
139+
"metadata": {
140+
"collapsed": false
141+
},
142+
"outputs": [],
143+
"source": [
144+
"print(\"shape of first block initially:\", descriptor.block(0).values.shape)\nprint(\"shape of first block of reference:\", selected_descriptor.block(0).values.shape)\nprint(\n \"shape of first block after selection:\",\n descriptor_for_comparison.block(0).values.shape,\n)"
145+
]
146+
},
147+
{
148+
"cell_type": "markdown",
149+
"metadata": {},
150+
"source": [
151+
"The ``TensorMap`` format allows us to select different features within each\nblock, and then construct a general matrix of features. We can select the most\nsignificant features using FPS, which selects features based on the distance\nbetween them. The following code snippet selects the 10 most important\nfeatures in each block, then constructs a TensorMap containing this selection,\nand calculates the final matrix of features for it.\n\n"
152+
]
153+
},
154+
{
155+
"cell_type": "code",
156+
"execution_count": null,
157+
"metadata": {
158+
"collapsed": false
159+
},
160+
"outputs": [],
161+
"source": [
162+
"def fps_feature_selection(descriptor, n_to_select):\n \"\"\"\n Select ``n_to_select`` features block by block in the ``descriptor``, using\n Farthest Point Sampling to do the selection; and return a ``TensorMap`` with\n the right structure to be used as properties selection with featomic calculators\n \"\"\"\n blocks = []\n for block in descriptor:\n # create a separate FPS selector for each block\n fps = FPS(n_to_select=n_to_select)\n mask = fps.fit(block.values).get_support()\n selected_properties = Labels(\n names=block.properties.names,\n values=block.properties.values[mask],\n )\n # The only important data here is the properties, so we create empty\n # sets of samples and components.\n blocks.append(\n TensorBlock(\n values=np.empty((1, len(selected_properties))),\n samples=Labels.single(),\n components=[],\n properties=selected_properties,\n )\n )\n\n return TensorMap(descriptor.keys, blocks)"
163+
]
164+
},
165+
{
166+
"cell_type": "markdown",
167+
"metadata": {},
168+
"source": [
169+
"We can then apply this function to subselect according to the data contained\nin a descriptor\n\n"
170+
]
171+
},
172+
{
173+
"cell_type": "code",
174+
"execution_count": null,
175+
"metadata": {
176+
"collapsed": false
177+
},
178+
"outputs": [],
179+
"source": [
180+
"selection = fps_feature_selection(descriptor, n_to_select=10)"
181+
]
182+
},
183+
{
184+
"cell_type": "markdown",
185+
"metadata": {},
186+
"source": [
187+
"and use the selection with featomic, potentially running the calculation on a\ndifferent set of systems\n\n"
188+
]
189+
},
190+
{
191+
"cell_type": "code",
192+
"execution_count": null,
193+
"metadata": {
194+
"collapsed": false
195+
},
196+
"outputs": [],
197+
"source": [
198+
"selected_descriptor = calculator.compute(frames, selected_properties=selection)"
199+
]
200+
},
201+
{
202+
"cell_type": "markdown",
203+
"metadata": {},
204+
"source": [
205+
"Note that in this case it is no longer possible to have a single feature\nmatrix, because each block will have its own properties.\n\n"
206+
]
207+
},
208+
{
209+
"cell_type": "code",
210+
"execution_count": null,
211+
"metadata": {
212+
"collapsed": false
213+
},
214+
"outputs": [],
215+
"source": [
216+
"try:\n selected_descriptor.keys_to_samples(\"center_type\")\nexcept MetatensorError as err:\n print(err)"
217+
]
218+
},
219+
{
220+
"cell_type": "markdown",
221+
"metadata": {},
222+
"source": [
223+
".. end-body\n\n"
224+
]
225+
}
226+
],
227+
"metadata": {
228+
"kernelspec": {
229+
"display_name": "Python 3",
230+
"language": "python",
231+
"name": "python3"
232+
},
233+
"language_info": {
234+
"codemirror_mode": {
235+
"name": "ipython",
236+
"version": 3
237+
},
238+
"file_extension": ".py",
239+
"mimetype": "text/x-python",
240+
"name": "python",
241+
"nbconvert_exporter": "python",
242+
"pygments_lexer": "ipython3",
243+
"version": "3.12.7"
244+
}
245+
},
246+
"nbformat": 4,
247+
"nbformat_minor": 0
248+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
"""
2+
Profiling calculation
3+
=====================
4+
5+
.. start-body
6+
"""
7+
8+
import chemfiles
9+
10+
import featomic
11+
from featomic import SoapPowerSpectrum
12+
13+
14+
def compute_soap(path):
15+
"""Compute SOAP power spectrum.
16+
17+
This is the same code as the 'compute-soap' example
18+
"""
19+
with chemfiles.Trajectory(path) as trajectory:
20+
frames = [f for f in trajectory]
21+
22+
HYPER_PARAMETERS = {
23+
"cutoff": {
24+
"radius": 5.0,
25+
"smoothing": {"type": "ShiftedCosine", "width": 0.5},
26+
},
27+
"density": {
28+
"type": "Gaussian",
29+
"width": 0.3,
30+
},
31+
"basis": {
32+
"type": "TensorProduct",
33+
"max_angular": 4,
34+
"radial": {"type": "Gto", "max_radial": 6},
35+
},
36+
}
37+
38+
calculator = SoapPowerSpectrum(**HYPER_PARAMETERS)
39+
descriptor = calculator.compute(frames, gradients=["positions"])
40+
descriptor = descriptor.keys_to_samples("center_type")
41+
descriptor = descriptor.keys_to_properties(["neighbor_1_type", "neighbor_2_type"])
42+
43+
return descriptor
44+
45+
46+
# %%
47+
#
48+
# Run the calculation with profiling enabled.
49+
50+
with featomic.Profiler() as profiler:
51+
descriptor = compute_soap("dataset.xyz")
52+
# %%
53+
#
54+
# Display the recorded profiling data as table.
55+
56+
print(profiler.as_short_table())
57+
58+
# %%
59+
#
60+
# You can also save this data as json for future usage
61+
print(profiler.as_json())
62+
63+
# %%
64+
#
65+
# .. end-body

0 commit comments

Comments
 (0)