Skip to content

Commit d8c03a0

Browse files
committed
add documentation to extra.storage
1 parent 99e505d commit d8c03a0

File tree

3 files changed

+164
-52
lines changed

3 files changed

+164
-52
lines changed

qsprpred/extra/data/storage/protein/interfaces/protein_storage.py

+71-44
Original file line numberDiff line numberDiff line change
@@ -10,38 +10,49 @@
1010

1111

1212
class ProteinStorage(PropertyStorage, ABC):
13+
"""Storage for proteins.
14+
15+
Attributes:
16+
sequenceProp (str): name of the property that contains all protein sequences
17+
proteins (Iterable[StoredProtein]): all proteins in the store
18+
"""
1319

1420
@property
1521
@abstractmethod
1622
def sequenceProp(self) -> str:
1723
"""Get the name of the property that contains all protein sequences."""
1824

1925
@abstractmethod
20-
def add_protein(self, protein: StoredProtein, raise_on_existing=True):
21-
"""
22-
Add a protein to the store.
26+
def add_protein(self, protein: StoredProtein, raise_on_existing=True) -> StoredProtein:
27+
"""Add a protein to the store.
28+
29+
Args:
30+
protein (StoredProtein): protein sequence
31+
raise_on_existing (bool):
32+
raise an exception if the protein already exists in the store
2333
24-
:param protein: protein sequence
25-
:param raise_on_existing: raise an exception if the protein already exists in the store
26-
:return: `StoredProtein` instance of the added protein
34+
Returns:
35+
StoredProtein: instance of the added protein
2736
"""
2837

2938
@property
3039
@abstractmethod
3140
def proteins(self) -> Iterable[StoredProtein]:
32-
"""
33-
Get all proteins in the store.
34-
35-
:return: iterable of `Protein` instances
41+
"""Get all proteins in the store.
42+
43+
Returns:
44+
Iterable[StoredProtein]: iterable of `Protein` instances
3645
"""
3746

3847
@abstractmethod
3948
def getProtein(self, protein_id: str) -> StoredProtein:
40-
"""
41-
Get a protein from the store using its name.
49+
"""Get a protein from the store using its name.
4250
43-
:param protein_id: name of the protein to search
44-
:return: instance of `Protein`
51+
Args:
52+
protein_id (str): name of the protein to search
53+
54+
Returns:
55+
StoredProtein: instance of `Protein`
4556
"""
4657

4758
@abstractmethod
@@ -57,56 +68,72 @@ def getPCMInfo(self) -> tuple[dict[str, str], dict]:
5768

5869

5970
class DockableStore(ChemStore, ABC):
71+
"""Storage for dockable molecules.
72+
73+
Attributes:
74+
proteins (Iterable[StoredProtein]): all proteins in the store
75+
"""
6076

6177
@abstractmethod
6278
def add_target(self, target: StoredProtein,
6379
raise_on_existing=True) -> StoredProtein:
64-
"""
65-
Add a target to the store.
80+
"""Add a target to the store.
81+
82+
Args:
83+
target (StoredProtein): target protein
84+
raise_on_existing (bool):
85+
raise an exception if the target already exists in the store
6686
67-
:param target: target protein
68-
:param raise_on_existing: raise an exception if the target already exists in the store
69-
:return: `Protein` instance of the added target
87+
Returns:
88+
(StoredProtein): instance of the added target
7089
"""
7190

7291
@abstractmethod
7392
def add_poses(self, mol_id: str, poses: Chem.Mol, target: StoredProtein,
7493
metadata: list[dict[str, Any]] | None = None) -> list[StoredMol]:
75-
"""
76-
Add poses to the store.
77-
78-
:param mol_id: identifier of the molecule to add poses for
79-
:param poses: dictionary of target identifiers and poses
80-
:param target: target protein
81-
:param metadata: additional metadata to store with the poses
82-
:return: Added poses represented as `StoredMol`
94+
"""Add poses to the store.
95+
96+
Args:
97+
mol_id (str): identifier of the molecule to add poses for
98+
poses (Chem.Mol): dictionary of target identifiers and poses
99+
target (StoredProtein): target protein
100+
metadata (list[dict[str, Any]]): additional metadata to store with the poses
101+
102+
Returns:
103+
list[StoredMol]: Added poses represented as `StoredMol`
83104
"""
84105

85106
@abstractmethod
86107
def get_poses(self, mol_id: str, target_id: str) -> list[StoredMol]:
87-
"""
88-
Get poses from the store.
89-
90-
:param mol_id: identifier of the molecule to get poses for
91-
:param target_id: identifier of the target to get poses for
92-
:return:
108+
"""Get poses from the store.
109+
110+
Args:
111+
mol_id (str): identifier of the molecule to get poses for
112+
target_id (str): identifier of the target to get poses for
113+
114+
Returns:
115+
list[StoredMol]: poses for the molecule and target
93116
"""
94117

95118
@abstractmethod
96119
def get_complex_for_pose(self, mol_id: str, target_id: str) -> Chem.Mol:
97-
"""
98-
Get the complex for a pose.
99-
100-
:param mol_id: identifier of the molecule to get the complex for
101-
:param target_id: identifier of the target to get the complex for
102-
:return: tuple of the complex and the target
120+
"""Get the complex for a pose.
121+
122+
Args:
123+
mol_id (str): identifier of the molecule to get the complex for
124+
target_id (str): identifier of the target to get the complex for
125+
126+
Returns:
127+
tuple: complex and the target
103128
"""
104129

105130
@abstractmethod
106131
def get_target(self, target_id: str) -> StoredProtein:
107-
"""
108-
Get a target from the store using its ID.
109-
110-
:param target_id: identifier of the target to search
111-
:return: instance of `Protein`
132+
"""Get a target from the store using its ID.
133+
134+
Args:
135+
target_id (str): identifier of the target to search
136+
137+
Returns:
138+
StoredProtein: instance of `Protein`
112139
"""

qsprpred/extra/data/storage/protein/interfaces/storedprotein.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -5,39 +5,51 @@
55

66

77
class StoredProtein(ABC):
8-
"""
9-
A protein object
8+
"""A protein object.
9+
10+
Attributes:
11+
id (str): id of the protein
12+
sequence (str): sequence of the protein
13+
props (dict[str, Any]): properties of the protein
14+
representations (Iterable[StoredProtein]): representations of the protein
1015
"""
1116

1217
@property
1318
@abstractmethod
1419
def id(self) -> str:
20+
"""Get the id of the protein."""
1521
pass
1622

1723
@property
1824
@abstractmethod
1925
def sequence(self) -> str | None:
26+
"""Get the sequence of the protein."""
2027
pass
2128

2229
@property
2330
@abstractmethod
2431
def props(self) -> dict[str, Any] | None:
32+
"""Get the properties of the protein."""
2533
pass
2634

2735
@abstractmethod
2836
def as_pdb(self) -> str | None:
37+
"""Return the protein as a PDB file."""
2938
pass
3039

3140
@abstractmethod
3241
def as_fasta(self) -> str | None:
42+
"""Return the protein as a FASTA file."""
3343
pass
3444

3545
def as_rd_mol(self) -> Chem.Mol | None:
46+
"""Return the protein as an RDKit molecule."""
3647
pdb = self.as_pdb()
3748
if pdb is not None:
3849
return Chem.MolFromPDBBlock(self.as_pdb())
3950

4051
@property
4152
@abstractmethod
4253
def representations(self) -> Iterable["StoredProtein"]:
54+
"""Get all representations of the protein."""
4355
pass

qsprpred/extra/data/storage/protein/tabular_pcm.py

+79-6
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@
1212

1313

1414
class TabularProtein(StoredProtein):
15+
"""A protein object that is stored in a tabular format.
16+
17+
Attributes:
18+
id (str): id of the protein
19+
sequence (str): sequence of the protein
20+
props (dict[str, Any]): properties of the protein
21+
representations (Iterable[TabularProtein]): representations of the protein
22+
"""
1523

1624
def __init__(
1725
self,
@@ -21,12 +29,14 @@ def __init__(
2129
props: dict[str, Any] | None = None,
2230
representations: Iterable["TabularProtein"] | None = None,
2331
) -> None:
24-
"""
25-
Create a new protein instance.
26-
27-
:param parent: parent protein
28-
:param protein_id: identifier of the protein
29-
:param sequence: sequence of the protein
32+
"""Create a new protein instance.
33+
34+
Args:
35+
protein_id (str): identifier of the protein
36+
sequence (str): sequence of the protein
37+
parent (TabularProtein): parent protein
38+
props (dict[str, Any]): properties of the protein
39+
representations (Iterable[TabularProtein]): representations of the protein
3040
"""
3141
self._parent = parent
3242
self._id = protein_id
@@ -36,28 +46,42 @@ def __init__(
3646

3747
@property
3848
def id(self) -> str:
49+
"""Get the id of the protein."""
3950
return self._id
4051

4152
@property
4253
def sequence(self) -> str | None:
54+
"""Get the sequence of the protein."""
4355
return self._sequence
4456

4557
@property
4658
def props(self) -> dict[str, Any] | None:
59+
"""Get the properties of the protein."""
4760
return self._props
4861

4962
def as_pdb(self) -> str | None:
63+
"""Return the protein as a PDB file."""
5064
return self._props["pdb"] if "pdb" in self._props else None
5165

5266
def as_fasta(self) -> str | None:
67+
"""Return the protein as a FASTA file."""
5368
return self._props["fasta"] if "fasta" in self._props else None
5469

5570
@property
5671
def representations(self) -> Iterable["TabularProtein"]:
72+
"""Get all representations of the protein."""
5773
return self._representations
5874

5975

6076
class TabularProteinStorage(ProteinStorage, PandasDataTable):
77+
"""A storage class for proteins stored in a tabular format.
78+
79+
Attributes:
80+
sequenceCol (str): name of the column that contains all protein sequences
81+
proteinSeqProvider (Callable): function that provides protein
82+
sequenceProp (str): name of the property that contains all protein sequences
83+
proteins (Iterable[TabularProtein]): all proteins in the store
84+
"""
6185

6286
def __init__(
6387
self,
@@ -75,6 +99,23 @@ def __init__(
7599
store_format: str = "pkl",
76100
parallel_generator: ParallelGenerator | None = None,
77101
):
102+
"""Create a new protein storage instance.
103+
104+
Args:
105+
name (str): name of the storage
106+
df (pd.DataFrame): data frame containing the proteins
107+
sequence_col (str): name of the column that contains all protein sequences
108+
sequence_provider (Callable): function that provides protein
109+
store_dir (str): directory to store the data
110+
overwrite (bool): overwrite the existing data
111+
index_cols (list[str]): columns to use as index
112+
n_jobs (int): number of parallel jobs
113+
chunk_size (int): size of the chunks
114+
protein_col (str): name of the column that contains the protein ids
115+
random_state (int): random state
116+
store_format (str): format to store the data
117+
parallel_generator (ParallelGenerator): parallel generator
118+
"""
78119
super().__init__(
79120
name,
80121
df if df is not None else pd.DataFrame(columns=[sequence_col,
@@ -158,16 +199,32 @@ def getPCMInfo(self) -> tuple[dict[str, str], dict]:
158199

159200
@property
160201
def sequenceProp(self) -> str:
202+
"""Get the name of the property that contains all protein sequences."""
161203
return self._sequenceCol
162204

163205
def add_protein(self, protein: TabularProtein, raise_on_existing=True):
206+
"""Add a protein to the store.
207+
208+
Args:
209+
protein (TabularProtein): protein sequence
210+
raise_on_existing (bool):
211+
raise an exception if the protein already exists in the store
212+
"""
164213
self.addEntries(
165214
[protein.id],
166215
{prop: [val] for prop, val in protein.props},
167216
raise_on_existing
168217
)
169218

170219
def _make_proteins_from_chunk(self, df: pd.DataFrame) -> list[TabularProtein]:
220+
"""Create a list of proteins from a chunk of the data frame.
221+
222+
Args:
223+
df (pd.DataFrame): chunk of the data frame
224+
225+
Returns:
226+
list[TabularProtein]: list of proteins
227+
"""
171228
ids = df[self.idProp].values
172229
sequences = df[self.sequenceProp].values
173230
props = df.columns.difference([self.idProp, self.sequenceProp])
@@ -182,12 +239,28 @@ def _make_proteins_from_chunk(self, df: pd.DataFrame) -> list[TabularProtein]:
182239

183240
@property
184241
def proteins(self) -> list[TabularProtein]:
242+
"""Get all proteins in the store.
243+
244+
Returns:
245+
list[TabularProtein]: list of proteins
246+
"""
185247
ret = []
186248
for chunk in self.iterChunks(len(self)):
187249
ret.extend(self._make_proteins_from_chunk(chunk))
188250
return ret
189251

190252
def getProtein(self, protein_id: str) -> TabularProtein:
253+
"""Get a protein from the store using its name.
254+
255+
Args:
256+
protein_id (str): name of the protein to search
257+
258+
Returns:
259+
TabularProtein: instance of `Protein`
260+
261+
Raises:
262+
ValueError: if the protein is not found
263+
"""
191264
df = self.getDF()
192265
protein = df[df[self.idProp] == protein_id]
193266
if protein.empty:

0 commit comments

Comments
 (0)