Skip to content

Commit

Permalink
Merge pull request #9 from frapercan/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
frapercan authored Dec 1, 2023
2 parents 6e63f25 + c4c6476 commit 30599d0
Show file tree
Hide file tree
Showing 9 changed files with 199 additions and 33 deletions.
4 changes: 3 additions & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ version: 2
build:
os: ubuntu-22.04
tools:
python: "3.11"
python: "3.10"

# Build documentation in the docs/ directory with Sphinx
sphinx:
Expand All @@ -20,3 +20,5 @@ sphinx:
python:
install:
- requirements: docs/requirements.txt
- method: pip
path: .
8 changes: 7 additions & 1 deletion docs/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
sphinx_rtd_theme
sphinx_rtd_theme
sphinx
sphinx_rtd_theme
readthedocs-sphinx-search
sqlalchemy
bio
pyyaml
16 changes: 10 additions & 6 deletions docs/source/fasta.rst
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
fasta
=====
FastaHandler
===============

El módulo `parser` proporciona funcionalidades para transformar cadenas de texto.
El módulo `fasta` proporciona la clase `FastaHandler` para descargar archivos FASTA de la base de datos de PDB.

Funciones
---------
Clase FastaHandler
---------------------

.. autoclass:: protein_data_handler.fasta.FastaHandler
:members:
:undoc-members:
:show-inheritance:

.. autoclass:: protein_data_handler.fasta.FastaDownloader
4 changes: 3 additions & 1 deletion examples/example_fasta/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ DB_NAME: BioData

resolution_threshold: 2.5
max_workers: 15
data_dir: /FASTA
data_dir: /opt/shared/FASTA
output_dir: /opt/shared/FASTA/output
merge_name: merge_25
13 changes: 8 additions & 5 deletions examples/example_fasta/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from protein_data_handler.helpers.config.yaml import read_yaml_config
from protein_data_handler.helpers.database.database import create_session
from protein_data_handler.fasta import FastaDownloader # Asegúrate de ajustar la ruta de importación
from protein_data_handler.fasta import FastaHandler # Asegúrate de ajustar la ruta de importación
from protein_data_handler.models.uniprot import Base, PDBReference


Expand All @@ -21,13 +21,16 @@ def main():
Session = sessionmaker(bind=engine)
session = Session()

query = session.query(PDBReference).filter(PDBReference.resolution < config.get("resolution_threshold",2.5)).all()
query = session.query(PDBReference).filter(PDBReference.resolution < config.get("resolution_threshold", 2.5)).all()
pdb_ids = [pdb_ref.pdb_id for pdb_ref in query]
# Inicializa FastaDownloader
fasta_downloader = FastaDownloader(session,config['data_dir'])
# Inicializa FastaHandler
fasta_downloader = FastaHandler(session, config['data_dir'], config['output_dir'])

# Descarga los archivos FASTA
fasta_downloader.download_fastas(pdb_ids,config['max_workers'])
fasta_downloader.download_fastas(pdb_ids[:-500], config['max_workers'])

fasta_downloader.merge_fastas(pdb_ids,config['merge_name'])


if __name__ == "__main__":
main()
78 changes: 72 additions & 6 deletions protein_data_handler/fasta.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,52 @@
format='%(asctime)s - %(levelname)s - %(message)s')


class FastaDownloader:
class FastaHandler:
"""
Clase para descargar archivos FASTA de la base de datos de PDB.
Clase para descargar archivos FASTA de la base de datos de PDB
(Protein Data Bank).
Esta clase permite descargar archivos FASTA, que contienen secuencias de
aminoácidos o nucleótidos, para un conjunto de identificadores de
PDB proporcionados.
:param session: Sesión de requests utilizada para realizar las descargas.
:type session: requests.Session
:param data_dir: Directorio donde se guardarán los archivos FASTA
descargados.
:type data_dir: str
"""

def __init__(self, session, data_dir):
def __init__(self, session, data_dir, output_dir):
"""
Inicializa el descargador de FASTA con una sesión de base de datos.
Inicializa el descargador de FASTA con una sesión de base de datos y un
directorio de datos.
:param session: Sesión de requests para realizar las descargas.
:param data_dir: Ruta del directorio donde se almacenarán los archivos
FASTA.
"""
self.session = session
self.data_dir = data_dir
self.output_dir = output_dir

if not os.path.exists(data_dir):
os.makedirs(data_dir, exist_ok=True)

if not os.path.exists(output_dir):
os.makedirs(output_dir, exist_ok=True)

def download_fastas(self, pdb_ids, max_workers=10):
"""
Descarga archivos FASTA para un conjunto de IDs de PDB
utilizando múltiples hilos.
Descarga archivos FASTA para un conjunto de IDs de PDB utilizando
múltiples hilos.
:param pdb_ids: Lista de identificadores de PDB para los cuales
descargar los archivos FASTA.
:type pdb_ids: list[str]
:param max_workers: Número máximo de hilos para usar en la descarga.
:type max_workers: int
:raises ValueError: Si `pdb_ids` no es una lista de cadenas de texto.
"""
logging.info(f"Descarga de {len(pdb_ids)} estructuras FASTA.")
if not isinstance(pdb_ids, list) or not all(isinstance(id, str)
Expand All @@ -38,6 +66,13 @@ def download_fastas(self, pdb_ids, max_workers=10):
def download_fasta(self, pdb_id):
"""
Descarga un archivo FASTA individual de la base de datos de PDB.
:param pdb_id: Identificador de PDB para el cual descargar el archivo
FASTA.
:type pdb_id: str
:raises ValueError: Si `pdb_id` no es una cadena de texto.
:raises RequestException: Si ocurre un error en la solicitud HTTP.
:raises IOError: Si ocurre un error al escribir el archivo descargado.
"""

if not isinstance(pdb_id, str):
Expand All @@ -57,3 +92,34 @@ def download_fasta(self, pdb_id):
logging.error(f"Error al descargar FASTA para {pdb_id}: {e}")
except IOError as e:
logging.error(f"Error al escribir el archivo para {pdb_id}: {e}")

def merge_fastas(self, pdb_ids, merge_name):
"""
Combina archivos FASTA para un conjunto de IDs de PDB en un único
archivo. Descarga los archivos FASTA que no están presentes en el
directorio local.
:param pdb_ids: Lista de identificadores de PDB para los cuales
combinar los archivos FASTA.
:type pdb_ids: list[str]
"""
missing_files = []
for pdb_id in pdb_ids:
file_path = os.path.join(self.data_dir, f"{pdb_id}.fasta")
if not os.path.isfile(file_path):
missing_files.append(pdb_id)

if missing_files:
logging.info("Descargando archivos FASTA faltantes.")
self.download_fastas(missing_files)

with (open(os.path.join(self.output_dir, f"{merge_name}.fasta"), 'w')
as outfile):
for pdb_id in pdb_ids:
file_path = os.path.join(self.data_dir, f"{pdb_id}.fasta")
if os.path.isfile(file_path):
with open(file_path, 'r') as infile:
outfile.write(infile.read())
outfile.write('\n')
else:
logging.warning(f"Archivo no encontrado: {file_path}")
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "protein-data-handler"
version = "1.0.6"
version = "1.0.7"
description = "Comprehensive Python Module for Protein Data Management: Designed for streamlined integration and processing of protein information from both UniProt and PDB. Equipped with features for concurrent data fetching, robust error handling, and database synchronization."
authors = ["frapercan <[email protected]>"]
readme = "README.md"
Expand Down
99 changes: 88 additions & 11 deletions tests/test_fasta.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
import os
import unittest
from unittest.mock import patch, MagicMock
from unittest.mock import patch, MagicMock, call

import requests
from requests import RequestException

from protein_data_handler.fasta import FastaDownloader
from protein_data_handler.fasta import FastaHandler


class TestFastaDownloader(unittest.TestCase):
class TestFastaHandler(unittest.TestCase):

def setUp(self):
self.session_mock = MagicMock() # Simula la sesión de SQLAlchemy
self.downloader = FastaDownloader(self.session_mock,'test_dir')
self.data_dir = './tests/data/FASTA'
self.output_dir = './tests/data/FASTA/output'
self.downloader = FastaHandler(self.session_mock, self.data_dir, self.output_dir)

def test_init(self):
self.assertEqual(self.downloader.session, self.session_mock)
Expand All @@ -22,12 +25,13 @@ def test_creacion_directorio_si_no_existe(self, mock_exists, mock_makedirs):
# Configurar el mock para simular que el directorio no existe
mock_exists.return_value = False

# Inicializar FastaDownloader
FastaDownloader('sesion_falsa', 'ruta/directorio')
# Inicializar FastaHandler
FastaHandler('sesion_falsa', './tests/data/FASTA', './tests/data/FASTA/output')

# Verificar que os.makedirs fue llamado
mock_makedirs.assert_called_once_with('ruta/directorio', exist_ok=True)

expected_calls = [call('./tests/data/FASTA', exist_ok=True),
call('./tests/data/FASTA/output', exist_ok=True)]
mock_makedirs.assert_has_calls(expected_calls)

def test_download_fastas_invalid_input(self):
with self.assertRaises(ValueError):
Expand All @@ -39,10 +43,9 @@ def test_download_fasta_successful(self, mock_get):
mock_response.status_code = 200
mock_response.text = "fake-fasta-content"
mock_get.return_value = mock_response

with patch('builtins.open', unittest.mock.mock_open()) as mock_file:
self.downloader.download_fasta("PDBID")
mock_file.assert_called_with("test_dir/PDBID.fasta", "w")
mock_file.assert_called_with("./tests/data/FASTA/PDBID.fasta", "w")
mock_file().write.assert_called_with("fake-fasta-content")

@patch('protein_data_handler.fasta.requests.get')
Expand All @@ -66,7 +69,7 @@ def test_download_fasta_io_error(self, mock_logging_error, mock_get):
self.downloader.download_fasta("PDBID")
mock_logging_error.assert_called_with("Error al escribir el archivo para PDBID: IO error")

@patch('protein_data_handler.fasta.FastaDownloader.download_fasta')
@patch('protein_data_handler.fasta.FastaHandler.download_fasta')
def test_download_fastas(self, mock_download_fasta):
pdb_ids = ["PDB1", "PDB2", "PDB3"]
self.downloader.download_fastas(pdb_ids)
Expand All @@ -88,6 +91,80 @@ def test_download_fasta_directory_creation(self, mock_makedirs, mock_exists):

self.downloader.download_fasta("PDBID")

@patch('os.path.isfile')
@patch('builtins.open', new_callable=unittest.mock.mock_open, read_data="contenido_fasta")
@patch('protein_data_handler.fasta.FastaHandler.download_fastas')
def test_merge_fastas(self, mock_download_fastas, mock_open, mock_isfile):
# Configura los mocks
mock_isfile.side_effect = lambda x: x.endswith('.fasta')
pdb_ids = ['PDB1', 'PDB2', 'PDB3']
merge_name = 'merged'

# Ejecuta la función
self.downloader.merge_fastas(pdb_ids, merge_name)

# Verifica que se abran los archivos correctos y se escriba en el archivo de salida
expected_file_calls = [call(os.path.join(self.downloader.data_dir, f'{pdb_id}.fasta'), 'r') for pdb_id in
pdb_ids]
expected_file_calls.append(call(os.path.join(self.downloader.output_dir, f'{merge_name}.fasta'), 'w'))
mock_open.assert_has_calls(expected_file_calls, any_order=True)

@patch('os.path.isfile')
@patch('builtins.open', new_callable=unittest.mock.mock_open, read_data="contenido_fasta")
@patch('protein_data_handler.fasta.FastaHandler.download_fastas')
def test_merge_fastas_fichero_inexistente(self, mock_download_fastas, mock_open, mock_isfile):
# Configura los mocks
mock_isfile.side_effect = lambda x: x.endswith('.fasta')
pdb_ids = ['PDB1', 'PDB2', 'PDB3']
merge_name = 'merged'

# Ejecuta la función
self.downloader.merge_fastas(pdb_ids, merge_name)

# Verifica que se abran los archivos correctos y se escriba en el archivo de salida
expected_file_calls = [call(os.path.join(self.downloader.data_dir, f'{pdb_id}.fasta'), 'r') for pdb_id in
pdb_ids]
expected_file_calls.append(call(os.path.join(self.downloader.output_dir, f'{merge_name}.fasta'), 'w'))
mock_open.assert_has_calls(expected_file_calls, any_order=True)

@patch('os.path.isfile')
@patch('builtins.open', new_callable=unittest.mock.mock_open, read_data="contenido_fasta")
@patch('protein_data_handler.fasta.FastaHandler.download_fastas')
@patch('logging.info')
def test_merge_fastas_with_missing_files(self, mock_logging_info, mock_download_fastas, mock_open, mock_isfile):
# Configura los mocks para simular que algunos archivos no existen
mock_isfile.side_effect = lambda filepath: 'PDB2.fasta' in filepath
pdb_ids = ['PDB1', 'PDB2', 'PDB3']
merge_name = 'merged'

# Ejecuta la función
self.downloader.merge_fastas(pdb_ids, merge_name)

# Verifica que se llame a download_fastas para los archivos faltantes
mock_download_fastas.assert_called_with(['PDB1', 'PDB3'])

# Verifica que se registre la información de descarga
mock_logging_info.assert_called_with("Descargando archivos FASTA faltantes.")

# Verifica que se abran los archivos correctos y se escriba en el archivo de salida
expected_file_calls = [call(os.path.join(self.data_dir, 'PDB2.fasta'), 'r')]
expected_file_calls.append(call(os.path.join(self.output_dir, f'{merge_name}.fasta'), 'w'))
mock_open.assert_has_calls(expected_file_calls, any_order=True)

@patch('os.path.isfile')
@patch('logging.warning')
def test_merge_fastas_file_not_found(self, mock_logging_warning, mock_isfile):
# Configura los mocks para simular que ningún archivo existe
mock_isfile.return_value = False
pdb_ids = ['PDB1']
merge_name = 'merged'

# Ejecuta la función
self.downloader.merge_fastas(pdb_ids, merge_name)

# Verifica que se registre una advertencia para el archivo no encontrado
mock_logging_warning.assert_called_with(f"Archivo no encontrado: ./tests/data/FASTA/PDB1.fasta")


if __name__ == '__main__':
unittest.main()
8 changes: 7 additions & 1 deletion tests/test_uniprot.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,17 @@ def test_cargar_codigos_acceso_falla_api(self, mock_get):
# Verifica que se realizó un rollback en la sesión
mock_session.rollback.assert_called_once()

def test_crear_proteina_si_no_existe(self):
@patch("requests.get")
def test_crear_proteina_si_no_existe(self,mock_get):
# Configurar los mocks
session_mock = MagicMock() # Reemplaza 'Session' con la clase de sesión SQLAlchemy correcta
session_mock.query.return_value.filter_by.return_value.one.side_effect = NoResultFound


mock_get.return_value.ok = True
mock_get.return_value.text = "ABC123\nDEF456\nGHI789"
mock_session = MagicMock()

# Llamar a la función con los mocks
cargar_codigos_acceso("criterio_busqueda", 10, session_mock)

Expand Down

0 comments on commit 30599d0

Please sign in to comment.