generated from frapercan/python-poetry-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
11 changed files
with
208 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
31 changes: 31 additions & 0 deletions
31
protein_metamorphisms_is/operations/embedding_tasks/esm.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
from transformers import AutoTokenizer, EsmModel | ||
import torch | ||
|
||
|
||
def embedding_task(session,chains,module,model_name): | ||
# Verificar si CUDA está disponible | ||
if not torch.cuda.is_available(): | ||
raise Exception("CUDA is not available. This script requires a GPU with CUDA.") | ||
|
||
# Configurar el dispositivo | ||
device = torch.device("cuda") | ||
|
||
# Cargar el tokenizador y el modelo | ||
tokenizer = AutoTokenizer.from_pretrained(model_name) | ||
model = EsmModel.from_pretrained(model_name).to(device) | ||
|
||
# Preparar la secuencia | ||
|
||
with torch.no_grad(): # Desactivar el cálculo de gradientes | ||
for chain in chains: | ||
tokens = tokenizer(chain.sequence, return_tensors="pt", truncation=True, padding=True) | ||
# Mover los tokens al dispositivo correcto | ||
tokens = {k: v.to(device) for k, v in tokens.items()} | ||
|
||
# Obtener los embeddings del modelo | ||
outputs = model(**tokens) | ||
embeddings = outputs.last_hidden_state | ||
# embeddings es un tensor de shape (batch_size, sequence_length, hidden_size) | ||
|
||
print(embeddings.shape) | ||
|
25 changes: 25 additions & 0 deletions
25
protein_metamorphisms_is/operations/embedding_tasks/prost_t5.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
from transformers import T5Tokenizer, T5EncoderModel | ||
import re | ||
import torch | ||
|
||
|
||
def embedding_task(session,chains,module,model_name): | ||
if not torch.cuda.is_available(): | ||
raise Exception("CUDA is not available. This script requires a GPU with CUDA.") | ||
|
||
device = torch.device("cuda") | ||
model_name = model_name | ||
tokenizer = T5Tokenizer.from_pretrained(model_name, do_lower_case=False) | ||
model = T5EncoderModel.from_pretrained(model_name).to(device) | ||
model.eval() | ||
|
||
with torch.no_grad(): | ||
for chain in chains: | ||
sequence_processed = " ".join(list(re.sub(r"[UZOB]", "X", chain.sequence))) | ||
sequence_processed = "<AA2fold> " + sequence_processed if sequence_processed.isupper() else "<fold2AA> " + sequence_processed | ||
inputs = tokenizer(sequence_processed, return_tensors="pt", padding=True, truncation=True, | ||
max_length=512, add_special_tokens=True).to(device) | ||
|
||
# Generación de embeddings | ||
outputs = model(input_ids=inputs.input_ids, attention_mask=inputs.attention_mask) | ||
embeddings = outputs.last_hidden_state.mean(dim=1) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import importlib | ||
import multiprocessing | ||
from datetime import datetime, timedelta | ||
from multiprocessing import Pool | ||
|
||
from sqlalchemy.orm import aliased | ||
|
||
|
||
from protein_metamorphisms_is.operations.base.operator import OperatorBase | ||
from protein_metamorphisms_is.sql.model import PDBChains, Cluster, PDBReference, StructuralAlignmentQueue, \ | ||
StructuralAlignmentType, StructuralAlignmentResults, EmbeddingType | ||
|
||
|
||
class EmbeddingManager(OperatorBase): | ||
""" | ||
lorem ipsum | ||
Attributes: | ||
conf (dict): Configuration of the instance, including database connections and operational settings. | ||
""" | ||
|
||
def __init__(self, conf): | ||
""" | ||
Initializes an instance of `StructuralAlignmentManager` with configuration settings. | ||
Args: | ||
conf (dict): Configuration parameters, including database connections and operational settings. | ||
""" | ||
super().__init__(conf) | ||
self.logger.info("Secuence Embedding Manager instance created.") | ||
|
||
def fetch_models_info(self): | ||
""" | ||
Fetches and prepares alignment task modules based on the configuration. | ||
This method dynamically imports alignment task modules specified in the configuration and stores | ||
references to these modules in a dictionary for later use in the alignment process. | ||
""" | ||
embedding_types = self.session.query(EmbeddingType).all() | ||
self.types = {} | ||
base_module_path = 'protein_metamorphisms_is.operations.embedding_tasks' | ||
|
||
for type_obj in embedding_types: | ||
if type_obj.id in self.conf['embedding']['types']: | ||
# Construye el nombre completo del módulo | ||
module_name = f"{base_module_path}.{type_obj.task_name}" | ||
# Importa dinámicamente el módulo usando importlib | ||
module = importlib.import_module(module_name) | ||
# Almacena la referencia al módulo en el diccionario self.types | ||
self.types[type_obj.id] = {'module': module, 'model_name' : type_obj.model_name} | ||
|
||
print(self.types) | ||
|
||
def start(self): | ||
""" | ||
Begin the structural alignment process. | ||
This method manages the workflow of the alignment process, including loading clusters, executing alignments, | ||
and handling any exceptions encountered during the process. Progress and errors are logged appropriately. | ||
""" | ||
try: | ||
self.logger.info("Starting structural alignment process.") | ||
chains = self.load_chains() | ||
self.fetch_models_info() | ||
|
||
for type in self.types.values(): | ||
print(type) | ||
module, model = type['module'], type['model_name'] | ||
module.embedding_task(self.session,chains,module,model) | ||
|
||
|
||
|
||
except Exception as e: | ||
self.logger.error(f"Error during structural alignment process: {e}") | ||
raise | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters