diff --git a/CHANGES.txt b/CHANGES.txt index e69de29..a86e7af 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -0,0 +1,2 @@ +3.1.0: + - Tensorflow 2 version. Source code version 0.2.1. \ No newline at end of file diff --git a/cryocare/__init__.py b/cryocare/__init__.py index e61f02e..dfd48a7 100644 --- a/cryocare/__init__.py +++ b/cryocare/__init__.py @@ -32,11 +32,10 @@ _logo = "icon.png" _references = ['buchholz2019cryo', 'buchholz2019content'] -__version__ = "3.0.1" +__version__ = "3.1.0" class Plugin(pwem.Plugin): - _homeVar = CRYOCARE_HOME _url = 'https://github.com/scipion-em/scipion-em-cryocare' @@ -72,21 +71,15 @@ def defineBinaries(cls, env): # Create the environment installationCmd += 'conda create -y -n %s -c conda-forge -c anaconda python=3.8 ' \ - 'cudnn=7.6.5=cuda10.1_0 && ' % CRYOCARE_ENV_NAME + 'cudatoolkit=11.0 cudnn=8.0 && ' % CRYOCARE_ENV_NAME + # 'keras-gpu=2.3.1 ' \ # Activate new the environment installationCmd += 'conda activate %s && ' % CRYOCARE_ENV_NAME - # Install non-conda required packages - installationCmd += 'pip install tensorflow-gpu==2.3.3 && ' - installationCmd += 'pip install mrcfile && ' - installationCmd += 'pip install csbdeep ' - # I had the same issue and was able to fix this by setting h5py < 3.0.0. - # Looks like here was a 3.0 release of h5py recently where they changed how strings are stored/read. - # https://github.com/keras-team/keras/issues/14265 - - # Install cryoCARE - installationCmd += 'pip install %s==%s &&' % (CRYOCARE, CRYOCARE_DEFAULT_VERSION) + # Install cryoCARE and the rest of dependencies + installationCmd += 'pip install tensorflow-gpu==2.4.0 && ' + installationCmd += 'pip install %s==%s && ' % (CRYOCARE, CRYOCARE_DEFAULT_VERSION) # Flag installation finished installationCmd += 'touch %s' % CRYOCARE_INSTALLED diff --git a/cryocare/constants.py b/cryocare/constants.py index 6d2d761..11501b5 100644 --- a/cryocare/constants.py +++ b/cryocare/constants.py @@ -26,8 +26,8 @@ # ************************************************************************** CRYOCARE_HOME = 'CRYOCARE_HOME' -V0_1_1 = '0.1.1' -CRYOCARE_DEFAULT_VERSION = V0_1_1 +V0_2_1 = '0.2.1' +CRYOCARE_DEFAULT_VERSION = V0_2_1 CRYOCARE = 'cryoCARE' CRYOCARE_ENV_NAME = '%s-%s' % (CRYOCARE, CRYOCARE_DEFAULT_VERSION) CRYOCARE_ENV_ACTIVATION = 'CRYOCARE_ENV_ACTIVATION' @@ -40,4 +40,5 @@ MEAN_STD_FN = 'mean_std.npz' TRAIN_DATA_CONFIG = 'training_data_config' CRYOCARE_MODEL = 'cryoCARE_model' +CRYOCARE_MODEL_TGZ = CRYOCARE_MODEL + '.tar.gz' PREDICT_CONFIG = 'predict_config' diff --git a/cryocare/objects.py b/cryocare/objects.py index 4d56b54..5870363 100644 --- a/cryocare/objects.py +++ b/cryocare/objects.py @@ -1,5 +1,7 @@ +from os.path import join import pyworkflow.object as pwobj +from cryocare.constants import CRYOCARE_MODEL from pwem import EMObject @@ -20,13 +22,13 @@ def __str__(self): class CryocareModel(EMObject): - def __init__(self, basedir=None, train_data_dir=None, **kwargs): + def __init__(self, model_file=None, train_data_dir=None, **kwargs): EMObject.__init__(self, **kwargs) - self._basedir = pwobj.String(basedir) + self._model_file = pwobj.String(model_file) self._train_data_dir = pwobj.String(train_data_dir) def getPath(self): - return self._basedir.get() + return self._model_file.get() def getTrainDataDir(self): return self._train_data_dir.get() diff --git a/cryocare/protocols/protocol_load_model.py b/cryocare/protocols/protocol_load_model.py index c208887..a5f1cef 100644 --- a/cryocare/protocols/protocol_load_model.py +++ b/cryocare/protocols/protocol_load_model.py @@ -1,14 +1,18 @@ -import glob +from enum import Enum from os.path import exists, join +from cryocare.utils import makeDatasetSymLinks, getModelName from pwem.protocols import EMProtocol from pyworkflow import BETA from pyworkflow.protocol import PathParam, FileParam from pyworkflow.utils import Message, createLink -from cryocare.constants import TRAIN_DATA_FN, VALIDATION_DATA_FN, CRYOCARE_MODEL +from cryocare.constants import TRAIN_DATA_FN, VALIDATION_DATA_FN from cryocare.objects import CryocareModel -from cryocare.utils import makeDatasetSymLinks + + +class outputObjects(Enum): + model = CryocareModel class ProtCryoCARELoadModel(EMProtocol): @@ -16,6 +20,7 @@ class ProtCryoCARELoadModel(EMProtocol): _label = 'CryoCARE Load Model' _devStatus = BETA + _possibleOutputs = outputObjects # -------------------------- DEFINE param functions ---------------------- def _defineParams(self, form): @@ -25,11 +30,16 @@ def _defineParams(self, form): """ # You need a params to belong to a section: form.addSection(label=Message.LABEL_INPUT) - form.addParam('basedir', PathParam, - label='Base directory of the trained cryoCARE model', + form.addParam('trainDataModel', PathParam, + label='Pre-trained cryoCARE model (.tar.gz)', important=True, allowsNull=False, - help='It must contain a model in .h5 format.') + help='It is a .tar.gz file containing a folder that contains, in turn, the following files:\n\n' + '\t- config.json\n' + '\t- history.dat\n' + '\t- norm.json\n' + '\t- weights_best.h5\n' + '\t- weights_last.h5\n') form.addParam('trainDataDir', FileParam, label='Directory of the prepared data for training', important=True, @@ -46,19 +56,17 @@ def _initialize(self): # model, but they are located in the training data generation extra directory. Hence, a symbolic link will # be created makeDatasetSymLinks(self, self.trainDataDir.get()) - createLink(join('..', self.basedir.get()), self._getExtraPath(CRYOCARE_MODEL)) + createLink(join(self.trainDataModel.get()), getModelName(self)) def createOutputStep(self): - model = CryocareModel(basedir=self._getExtraPath(), train_data_dir=self._getExtraPath()) - self._defineOutputs(model=model) + model = CryocareModel(model_file=getModelName(self), train_data_dir=self._getExtraPath()) + self._defineOutputs(**{outputObjects.model.name: model}) # --------------------------- INFO functions ----------------------------------- def _validate(self): errors = [] - if not exists(self.basedir.get()): - errors.append('Training model base directory does not exists.') - elif not glob.glob(join(self.basedir.get(), '*.h5')): - errors.append('No model files were found in the introduced training model base directory.') + if not exists(self.trainDataModel.get()): + errors.append('Training model introduced does not exists.') if not exists(self.trainDataDir.get()): errors.append('Directory of the prepared data for training does not exists.') @@ -75,5 +83,6 @@ def _summary(self): summary = [] if self.isFinished(): - summary.append("Loaded training model_dir = *%s*" % self.basedir.get()) + summary.append("Loaded training model_dir = *%s*" % self.trainDataModel.get()) return summary + diff --git a/cryocare/protocols/protocol_predict.py b/cryocare/protocols/protocol_predict.py index 208ebdf..5711313 100644 --- a/cryocare/protocols/protocol_predict.py +++ b/cryocare/protocols/protocol_predict.py @@ -1,28 +1,65 @@ +# ************************************************************************** +# * +# * Authors: Scipion Team +# * +# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC +# * +# * This program is free software; you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation; either version 2 of the License, or +# * (at your option) any later version. +# * +# * This program is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with this program; if not, write to the Free Software +# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +# * 02111-1307 USA +# * +# * All comments concerning this program package may be sent to the +# * e-mail address 'scipion@cnb.csic.es' +# * +# ************************************************************************** +import glob import json -from os.path import abspath, join - +import re +import shutil +from enum import Enum +from os.path import join +from cryocare.utils import checkInputTomoSetsSize from pwem.protocols import EMProtocol from pyworkflow import BETA from pyworkflow.protocol import params, StringParam -from pyworkflow.utils import Message, removeBaseExt, makePath +from pyworkflow.utils import Message, makePath from scipion.constants import PYTHON - from cryocare import Plugin -from tomo.objects import Tomogram -from tomo.protocols import ProtTomoBase +from tomo.objects import Tomogram, SetOfTomograms +from cryocare.constants import PREDICT_CONFIG + -from cryocare.constants import PREDICT_CONFIG, CRYOCARE_MODEL -from cryocare.utils import CryocareUtils as ccutils +DENOISED_SUFFIX = 'denoised' +EVEN = 'even' -class ProtCryoCAREPrediction(EMProtocol, ProtTomoBase): +class outputObjects(Enum): + tomograms = SetOfTomograms + + +class ProtCryoCAREPrediction(EMProtocol): """Generate the final restored tomogram by applying the cryoCARE trained network to both tomograms followed by per-pixel averaging.""" _label = 'CryoCARE Prediction' - _configPath = [] - _outputFiles = [] _devStatus = BETA + _possibleOutputs = outputObjects + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self._configPath = {} + self.sRate = None # -------------------------- DEFINE param functions ---------------------- def _defineParams(self, form): @@ -70,47 +107,53 @@ def _defineParams(self, form): # --------------------------- STEPS functions ------------------------------ def _insertAllSteps(self): - numTomo = 0 - makePath(self._getPredictConfDir()) + self._initialize() # Insert processing steps for evenTomo, oddTomo in zip(self.even.get(), self.odd.get()): - self._insertFunctionStep(self.preparePredictStep, evenTomo.getFileName(), oddTomo.getFileName(), numTomo) - self._insertFunctionStep(self.predictStep, numTomo) - numTomo += 1 + tsId = evenTomo.getTsId() + self._insertFunctionStep(self.preparePredictStep, evenTomo.getFileName(), oddTomo.getFileName(), tsId) + self._insertFunctionStep(self.predictStep, tsId) + self._insertFunctionStep(self.createOutputStep, tsId) - self._insertFunctionStep(self.createOutputStep) + def _initialize(self): + makePath(self._getPredictConfDir()) + self.sRate = self.even.get().getSamplingRate() - def preparePredictStep(self, evenTomo, oddTomo, numTomo): - outputName = self._getOutputName(evenTomo) - self._outputFiles.append(outputName) + def preparePredictStep(self, evenTomo, oddTomo, tsId): config = { - 'model_name': CRYOCARE_MODEL, 'path': self.model.get().getPath(), 'even': evenTomo, 'odd': oddTomo, - 'output_name': outputName, - 'n_tiles': [int(i) for i in self.n_tiles.get().split()] + 'n_tiles': [int(i) for i in self.n_tiles.get().split()], + 'output': self._getOutputPath(tsId), + 'overwrite': False } - self._configPath.append(join(self._getPredictConfDir(), '{}_{:03d}.json'.format(PREDICT_CONFIG, numTomo))) - with open(self._configPath[numTomo], 'w+') as f: + self._configPath[tsId] = join(self._getPredictConfDir(), '%s_%s.json' % (PREDICT_CONFIG, tsId)) + with open(self._configPath[tsId], 'w+') as f: json.dump(config, f, indent=2) - def predictStep(self, numTomo): + def predictStep(self, tsId): # Run cryoCARE - Plugin.runCryocare(self, PYTHON, '$(which cryoCARE_predict.py) --conf %s' % self._configPath[numTomo], + Plugin.runCryocare(self, PYTHON, '$(which cryoCARE_predict.py) --conf %s' % self._configPath[tsId], gpuId=getattr(self, params.GPU_LIST).get()) + # Remove even/odd words from the output name to avoid confusion + origName = self._getOutputFile(tsId) + finalNameRe = re.compile(re.escape(EVEN), re.IGNORECASE) # Used to do a case-insensitive replacement + shutil.move(origName, finalNameRe.sub('', origName)) - def createOutputStep(self): - outputSetOfTomo = self._createSetOfTomograms(suffix='_denoised') - outputSetOfTomo.copyInfo(self.even.get()) + def createOutputStep(self, tsId): + outputSetOfTomo = getattr(self, outputObjects.tomograms.name, None) + if not outputSetOfTomo: + outputSetOfTomo = SetOfTomograms.create(self._getPath(), template='tomograms%s.sqlite', suffix=DENOISED_SUFFIX) + outputSetOfTomo.copyInfo(self.even.get()) - for i, inTomo in enumerate(self.even.get()): - tomo = Tomogram() - tomo.setLocation(self._outputFiles[i]) - tomo.setSamplingRate(inTomo.getSamplingRate()) - outputSetOfTomo.append(tomo) + tomo = self._genOutputTomogram(tsId) + outputSetOfTomo.append(tomo) - self._defineOutputs(outputTomograms=outputSetOfTomo) + self._defineOutputs(**{outputObjects.tomograms.name: outputSetOfTomo}) + self._defineSourceRelation(self.even.get(), outputSetOfTomo) + self._defineSourceRelation(self.odd.get(), outputSetOfTomo) + self._defineSourceRelation(self.model.get(), outputSetOfTomo) # --------------------------- INFO functions ----------------------------------- def _summary(self): @@ -118,22 +161,40 @@ def _summary(self): summary = [] if self.isFinished(): - summary.append( - "Tomogram denoising finished.") + summary.append("Tomogram denoising finished.") return summary def _validate(self): validateMsgs = [] - - msg = ccutils.checkInputTomoSetsSize(self.even.get(), self.odd.get()) + # Check the sampling rate + sRateEven = self.even.get().getSamplingRate() + sRateOdd = self.odd.get().getSamplingRate() + if sRateEven != sRateOdd: + validateMsgs.append('The sampling rate of the introduced sets of tomograms is different:\n' + 'Even SR %.2f != Odd SR %.2f\n\n' % (sRateEven, sRateOdd)) + # Check the size + msg = checkInputTomoSetsSize(self.even.get(), self.odd.get()) if msg: - validateMsgs.append() + validateMsgs.append(msg) return validateMsgs # --------------------------- UTIL functions ----------------------------------- - def _getOutputName(self, inTomoName): - outputName = removeBaseExt(inTomoName) + '_denoised.mrc' - return abspath(self._getExtraPath(outputName.replace('_Even', '').replace('_Odd', ''))) - def _getPredictConfDir(self): return self._getExtraPath(PREDICT_CONFIG) + + def _getOutputPath(self, tsId): + """cryoCARE will generate a new folder for each tomogram denoised. Apart from that, if the + tomograms were imported, the 'Even_' word can be included in the tsId, as in that case it will be + the filename. To avoid confusion, it's removed from the generated folder name.""" + outPath = self._getExtraPath(tsId + '_' + DENOISED_SUFFIX) + outPathRe = re.compile(re.escape(EVEN), re.IGNORECASE) # Used to carry out a case-insensitive replacement + return outPathRe.sub('', outPath) + + def _getOutputFile(self, tsId): + return glob.glob(join(self._getOutputPath(tsId), '*.mrc'))[0] # Only one file is contained in each dir + + def _genOutputTomogram(self, tsId): + tomo = Tomogram() + tomo.setLocation(self._getOutputFile(tsId)) + tomo.setSamplingRate(self.sRate) + return tomo diff --git a/cryocare/protocols/protocol_prepare_training_data.py b/cryocare/protocols/protocol_prepare_training_data.py index 6dc3c27..30d40c0 100644 --- a/cryocare/protocols/protocol_prepare_training_data.py +++ b/cryocare/protocols/protocol_prepare_training_data.py @@ -1,8 +1,10 @@ import glob import json +from enum import Enum from os.path import join import numpy as np +from cryocare.utils import checkInputTomoSetsSize from pwem.protocols import EMProtocol from pyworkflow import BETA from pyworkflow.protocol import params, IntParam, FloatParam, Positive, LT, GT, LEVEL_ADVANCED, EnumParam @@ -12,7 +14,6 @@ from cryocare import Plugin from cryocare.constants import TRAIN_DATA_DIR, TRAIN_DATA_FN, TRAIN_DATA_CONFIG, VALIDATION_DATA_FN from cryocare.objects import CryocareTrainData -from cryocare.utils import CryocareUtils as ccutils # Tilt axis values X_AXIS = 0 @@ -23,12 +24,17 @@ Z_AXIS_LABEL = 'Z' +class outputObjects(Enum): + train_data = CryocareTrainData + + class ProtCryoCAREPrepareTrainingData(EMProtocol): """Operate the data to make it be expressed as expected by cryoCARE net.""" _label = 'CryoCARE Training Data Extraction' _devStatus = BETA _configFile = None + _possibleOutputs = outputObjects # -------------------------- DEFINE param functions ---------------------- @@ -101,7 +107,6 @@ def _insertAllSteps(self): self._insertFunctionStep(self.createOutputStep) def _initialize(self): - makePath(self._getTrainDataDir()) makePath(self._getTrainDataConfDir()) self._configFile = join(self._getTrainDataConfDir(), TRAIN_DATA_CONFIG) @@ -126,7 +131,9 @@ def createOutputStep(self): # Generate a train data object containing the resulting data train_data = CryocareTrainData(train_data_dir=self._getTrainDataDir(), patch_size=self.patch_shape.get()) - self._defineOutputs(train_data=train_data) + self._defineOutputs(**{outputObjects.train_data.name: train_data}) + self._defineSourceRelation(self.evenTomos.get(), train_data) + self._defineSourceRelation(self.oddTomos.get(), train_data) # --------------------------- INFO functions ----------------------------------- def _summary(self): @@ -144,14 +151,25 @@ def _summary(self): def _validate(self): validateMsgs = [] - - msg = ccutils.checkInputTomoSetsSize(self.evenTomos.get(), self.oddTomos.get()) + sideLength = self.patch_shape.get() + evenTomos = self.evenTomos.get() + oddTomos = self.oddTomos.get() + xe, ye, ze = evenTomos.getDimensions() + xo, yo, zo = oddTomos.getDimensions() + # Check the length and the dimensions of the sets introduced + msg = checkInputTomoSetsSize(evenTomos, oddTomos) if msg: - validateMsgs.append() - - if self.patch_shape.get() % 2 != 0: + validateMsgs.append(msg) + # Check the patch conditions + if sideLength % 2 != 0: validateMsgs.append('Patch shape has to be an even number.') - + for idim in [xe, ye, ze, xo, yo, zo]: + if idim <= 2 * sideLength: + validateMsgs.append('X, Y and Z dimensions of the tomograms introduced must satisfy the ' + 'condition\n\n*dimension > 2 x SideLength*\n\n' + '(X, Y, Z) = (%i, %i, %i)\n' + 'SideLength = %i\n\n' % (xe, ye, ze, sideLength)) + break return validateMsgs # --------------------------- UTIL functions ----------------------------------- diff --git a/cryocare/protocols/protocol_training.py b/cryocare/protocols/protocol_training.py index 004a128..9bbc78e 100644 --- a/cryocare/protocols/protocol_training.py +++ b/cryocare/protocols/protocol_training.py @@ -1,16 +1,20 @@ import json import operator +from enum import Enum +from cryocare.utils import makeDatasetSymLinks, getModelName from pwem.protocols import EMProtocol from pyworkflow import BETA from pyworkflow.protocol import IntParam, PointerParam, FloatParam, params, GT, LEVEL_ADVANCED, GE, Positive from pyworkflow.utils import Message from scipion.constants import PYTHON - from cryocare import Plugin from cryocare.constants import CRYOCARE_MODEL from cryocare.objects import CryocareModel -from cryocare.utils import makeDatasetSymLinks + + +class outputObjects(Enum): + model = CryocareModel class ProtCryoCARETraining(EMProtocol): @@ -18,6 +22,7 @@ class ProtCryoCARETraining(EMProtocol): _label = 'CryoCARE Training' _devStatus = BETA + _possibleOutputs = outputObjects _configPath = None # -------------------------- DEFINE param functions ---------------------- @@ -128,9 +133,10 @@ def trainingStep(self): gpuId=getattr(self, params.GPU_LIST).get()) def createOutputStep(self): - model = CryocareModel(basedir=self._getExtraPath(), + model = CryocareModel(model_file=getModelName(self), train_data_dir=self._getPreparedTrainingDataDir()) - self._defineOutputs(model=model) + self._defineOutputs(**{outputObjects.model.name: model}) + self._defineSourceRelation(self.train_data.get(), model) # --------------------------- INFO functions ----------------------------------- def _summary(self): diff --git a/cryocare/tests/__init__.py b/cryocare/tests/__init__.py index f89446a..be60ff8 100644 --- a/cryocare/tests/__init__.py +++ b/cryocare/tests/__init__.py @@ -1,15 +1,48 @@ +# ************************************************************************** +# * +# * Authors: Scipion Team +# * +# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC +# * +# * This program is free software; you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation; either version 2 of the License, or +# * (at your option) any later version. +# * +# * This program is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with this program; if not, write to the Free Software +# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +# * 02111-1307 USA +# * +# * All comments concerning this program package may be sent to the +# * e-mail address 'scipion@cnb.csic.es' +# * +# ************************************************************************** +from enum import Enum +from cryocare.constants import CRYOCARE_MODEL_TGZ from pyworkflow.tests import DataSet -DataSet(name='cryocare', folder='cryocare', - files={ - 'rec_even_odd_tomos_dir': 'Tomos_EvenOdd_Reconstructed', - 'tomo_even': 'Tomos_EvenOdd_Reconstructed/Tomo110_Even_bin6.mrc', - 'tomo_odd': 'Tomos_EvenOdd_Reconstructed/Tomo110_Odd_bin6.mrc', - 'model_dir': 'Training_Model', - 'training_data_dir': 'Training_Data', - 'train_data_file': 'Training_Data/train_data.npz', - 'validation_data_file': 'Training_Data/val_data.npz', - 'training_data_conf_dir': 'Training_Data_Config', - 'training_data_conf': 'Training_Data_Config/training_data_config' - }) + +CRYOCARE = 'cryocare' + + +class DataSetCryoCARE(Enum): + rec_even_odd_tomos_dir = 'Tomos_EvenOdd_Reconstructed' + tomo_even = 'Tomos_EvenOdd_Reconstructed/Tomo110_Even_bin6.mrc' + tomo_odd = 'Tomos_EvenOdd_Reconstructed/Tomo110_Odd_bin6.mrc' + model_dir = 'Training_Model' + training_data_dir = 'Training_Data' + train_data_file = 'Training_Data/train_data.npz' + validation_data_file = 'Training_Data/val_data.npz' + training_data_conf_dir = 'Training_Data_Config' + training_data_conf = 'Training_Data_Config/training_data_config' + training_data_model = CRYOCARE_MODEL_TGZ + + +DataSet(name=CRYOCARE, folder=CRYOCARE, files={el.name: el.value for el in DataSetCryoCARE}) diff --git a/cryocare/tests/test_cryoCARE_workflow.py b/cryocare/tests/test_cryoCARE_workflow.py index 584432d..91668f3 100644 --- a/cryocare/tests/test_cryoCARE_workflow.py +++ b/cryocare/tests/test_cryoCARE_workflow.py @@ -1,13 +1,41 @@ +# ************************************************************************** +# * +# * Authors: Scipion Team +# * +# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC +# * +# * This program is free software; you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation; either version 2 of the License, or +# * (at your option) any later version. +# * +# * This program is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with this program; if not, write to the Free Software +# * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA +# * 02111-1307 USA +# * +# * All comments concerning this program package may be sent to the +# * e-mail address 'scipion@cnb.csic.es' +# * +# ************************************************************************** + from os.path import exists -from pyworkflow.tests import BaseTest, setupTestProject -import tomo.protocols +from cryocare.tests import CRYOCARE, DataSetCryoCARE +from pyworkflow.tests import BaseTest, setupTestProject, DataSet from pyworkflow.utils import magentaStr - -from . import DataSet -from ..constants import TRAIN_DATA_FN, TRAIN_DATA_CONFIG, TRAIN_DATA_DIR, VALIDATION_DATA_FN, CRYOCARE_MODEL -from ..objects import CryocareTrainData, CryocareModel -from ..protocols import ProtCryoCAREPrediction, ProtCryoCAREPrepareTrainingData, ProtCryoCARELoadModel, \ - ProtCryoCARETraining +from tomo.protocols import ProtImportTomograms +from cryocare.protocols.protocol_prepare_training_data import outputObjects as prepTrainDataOutputs, \ + ProtCryoCAREPrepareTrainingData +from cryocare.protocols.protocol_load_model import outputObjects as loadTrainingModelOutputs, ProtCryoCARELoadModel +from cryocare.protocols.protocol_training import outputObjects as trainOutputs, ProtCryoCARETraining +from cryocare.protocols.protocol_predict import outputObjects as predictOutputs, ProtCryoCAREPrediction +from cryocare.constants import TRAIN_DATA_FN, TRAIN_DATA_CONFIG, TRAIN_DATA_DIR, VALIDATION_DATA_FN, CRYOCARE_MODEL_TGZ +from cryocare.objects import CryocareTrainData, CryocareModel class TestCryoCARE(BaseTest): @@ -15,15 +43,15 @@ class TestCryoCARE(BaseTest): @classmethod def setUpClass(cls): setupTestProject(cls) - cls.dataset = DataSet.getDataSet('cryocare') - cls.sRate = 2.355 + cls.dataset = DataSet.getDataSet(CRYOCARE) + cls.sRate = 4.71 def _runImportTomograms(self, tomoFile, mode): print(magentaStr("\n==> Importing the %s tomograms:" % mode)) - protImport = self.newProtocol( - tomo.protocols.ProtImportTomograms, - filesPath=self.dataset.getFile(tomoFile), - samplingRate=self.sRate) + protImport = self.newProtocol(ProtImportTomograms, + filesPath=self.dataset.getFile(tomoFile), + samplingRate=self.sRate) + protImport.setObjLabel('Import %s tomograms' % mode) self.launchProtocol(protImport) output = getattr(protImport, 'outputTomograms', None) self.assertSetSize(output, size=1) @@ -31,16 +59,20 @@ def _runImportTomograms(self, tomoFile, mode): def _runPrepareTrainingData(self, protImportEven, protImportOdd): print(magentaStr("\n==> Preparing the training data:")) + patchSize = 40 protPrepTrainingData = self.newProtocol(ProtCryoCAREPrepareTrainingData, evenTomos=protImportEven.outputTomograms, - oddTomos=protImportOdd.outputTomograms) + oddTomos=protImportOdd.outputTomograms, + patch_shape=patchSize, + num_slices=400, + n_normalization_samples=60) self.launchProtocol(protPrepTrainingData) - cryoCareTrainData = getattr(protPrepTrainingData, 'train_data', None) + cryoCareTrainData = getattr(protPrepTrainingData, prepTrainDataOutputs.train_data.name, None) # Check generated object self.assertEqual(type(cryoCareTrainData), CryocareTrainData) self.assertEqual(cryoCareTrainData.getTrainDataDir(), protPrepTrainingData._getExtraPath(TRAIN_DATA_DIR)) - self.assertEqual(cryoCareTrainData.getPatchSize(), 72) + self.assertEqual(cryoCareTrainData.getPatchSize(), patchSize) # Check files generated self.assertTrue(exists(protPrepTrainingData._getExtraPath(TRAIN_DATA_DIR, TRAIN_DATA_FN))) self.assertTrue(exists(protPrepTrainingData._getExtraPath(TRAIN_DATA_DIR, VALIDATION_DATA_FN))) @@ -49,40 +81,35 @@ def _runPrepareTrainingData(self, protImportEven, protImportOdd): return protPrepTrainingData def _runTrainingData(self, protPrepTrainingData): - # # Skipped because of it long execution time. Generated model was stored as part of the test - # # dataset and imported in the prediction test - # print(magentaStr("\n==> Skipping training due to its long execution time")) - # return [] print(magentaStr("\n==> Training")) protTraining = self.newProtocol(ProtCryoCARETraining, train_data=getattr(protPrepTrainingData, 'train_data', None), - batch_size=8) + epochs=2, + steps_per_epoch=10) self.launchProtocol(protTraining) - cryoCareModel = getattr(protTraining, 'model', None) + cryoCareModel = getattr(protTraining, trainOutputs.model.name, None) # Check generated model self.assertEqual(type(cryoCareModel), CryocareModel) - self.assertEqual(cryoCareModel.getPath(), protTraining._getExtraPath()) + self.assertEqual(cryoCareModel.getPath(), protTraining._getExtraPath(CRYOCARE_MODEL_TGZ)) self.assertEqual(cryoCareModel.getTrainDataDir(), protPrepTrainingData._getExtraPath(TRAIN_DATA_DIR)) # Check files and links generated self.assertTrue(exists(protTraining._getExtraPath('train_config.json'))) self.assertTrue(exists(protTraining._getExtraPath(TRAIN_DATA_FN))) self.assertTrue(exists(protTraining._getExtraPath(VALIDATION_DATA_FN))) - self.assertTrue(exists(protTraining._getExtraPath(CRYOCARE_MODEL, 'config.json'))) - self.assertTrue(exists(protTraining._getExtraPath(CRYOCARE_MODEL, 'weights_best.h5'))) - self.assertTrue(exists(protTraining._getExtraPath(CRYOCARE_MODEL, 'weights_last.h5'))) + self.assertTrue(exists(protTraining._getExtraPath(CRYOCARE_MODEL_TGZ))) return protTraining def _runLoadTrainingModel(self): print(magentaStr("\n==> Loading a pre-trained model:")) protImportTM = self.newProtocol( ProtCryoCARELoadModel, - basedir=self.dataset.getFile('model_dir'), - trainDataDir=self.dataset.getFile('training_data_dir')) + trainDataModel=self.dataset.getFile(DataSetCryoCARE.training_data_model.name), + trainDataDir=self.dataset.getFile(DataSetCryoCARE.training_data_dir.name)) protImportTM = self.launchProtocol(protImportTM) - cryoCareModel = getattr(protImportTM, 'model', None) + cryoCareModel = getattr(protImportTM, loadTrainingModelOutputs.model.name, None) self.assertEqual(type(cryoCareModel), CryocareModel) - self.assertEqual(cryoCareModel.getPath(), protImportTM._getExtraPath()) + self.assertEqual(cryoCareModel.getPath(), protImportTM._getExtraPath(CRYOCARE_MODEL_TGZ)) self.assertEqual(cryoCareModel.getTrainDataDir(), protImportTM._getExtraPath()) return protImportTM @@ -100,22 +127,22 @@ def _runPredict(self, protImportEven, protImportOdd, **kwargs): protPredict = self.newProtocol(ProtCryoCAREPrediction, even=protImportEven.outputTomograms, odd=protImportOdd.outputTomograms, - model=getattr(trainedModelProt, 'model', None)) + model=getattr(trainedModelProt, trainOutputs.model.name, None)) self.launchProtocol(protPredict) - output = getattr(protPredict, 'outputTomograms', None) - self.assertEqual(output.getDim(), (1236, 1279, 209)) + output = getattr(protPredict, predictOutputs.tomograms.name, None) + self.assertEqual(output.getDim(), (618, 639, 104)) self.assertEqual(output.getSize(), 1) self.assertEqual(output.getSamplingRate(), self.sRate) - self.assertTrue(exists(protPredict._getExtraPath('Tomo110_bin6_denoised.mrc'))) + self.assertTrue(exists(protPredict._getExtraPath('Tomo110__bin6_denoised', 'Tomo110__bin6.mrc'))) def testWorkflow(self): - importTomoProtEven = self._runImportTomograms('tomo_even', 'even') - importTomoProtOdd = self._runImportTomograms('tomo_odd', 'odd') + importTomoProtEven = self._runImportTomograms(DataSetCryoCARE.tomo_even.name, 'even') + importTomoProtOdd = self._runImportTomograms(DataSetCryoCARE.tomo_odd.name, 'odd') prepTrainingDataProt = self._runPrepareTrainingData(importTomoProtEven, importTomoProtOdd) - # protTraining = self._runTrainingData(prepTrainingDataProt) + protTraining = self._runTrainingData(prepTrainingDataProt) # Prediction from training - # self._runPredict(importTomoProtEven, importTomoProtOdd, protTraining=protTraining) + self._runPredict(importTomoProtEven, importTomoProtOdd, protTraining=protTraining) # Load a pre-trained model and predict protLoadPreTrainedModel = self._runLoadTrainingModel() self._runPredict(importTomoProtEven, importTomoProtOdd, protTraining=protLoadPreTrainedModel) diff --git a/cryocare/utils.py b/cryocare/utils.py index c4e46c2..8004bb8 100644 --- a/cryocare/utils.py +++ b/cryocare/utils.py @@ -1,8 +1,8 @@ # ************************************************************************** # * -# * Authors: you (you@yourinstitution.email) +# * Authors: Scipion Team # * -# * your institution +# * Unidad de Bioinformatica of Centro Nacional de Biotecnologia , CSIC # * # * This program is free software; you can redistribute it and/or modify # * it under the terms of the GNU General Public License as published by @@ -23,11 +23,24 @@ # * e-mail address 'scipion@cnb.csic.es' # * # ************************************************************************** -from os.path import join, exists - +from os.path import join from pyworkflow.utils import createLink +from cryocare.constants import TRAIN_DATA_FN, VALIDATION_DATA_FN, CRYOCARE_MODEL_TGZ + + +def checkInputTomoSetsSize(evenTomoSet, oddTomoSet): + message = '' + xe, ye, ze = evenTomoSet.getDimensions() + xo, yo, zo = oddTomoSet.getDimensions() + ne = evenTomoSet.getSize() + no = oddTomoSet.getSize() + if (xe, ye, ze, ne) != (xo, yo, zo, no): + message = ('Size of even and odd set of tomograms must be the same:\n' + 'Even --> (x, y, z, n) = ({xe}, {ye}, {ze}, {ne})\n' + 'Odd --> (x, y, z, n) = ({xo}, {yo}, {zo}, {no})'.format( + xe=xe, ye=ye, ze=ze, ne=ne, xo=xo, yo=yo, zo=zo, no=no)) -from cryocare.constants import TRAIN_DATA_FN, VALIDATION_DATA_FN + return message def makeDatasetSymLinks(prot, trainDataDir): @@ -36,23 +49,13 @@ def makeDatasetSymLinks(prot, trainDataDir): # be created for each one linkedTrainingDataFile = prot._getExtraPath(TRAIN_DATA_FN) linkedValidationDataFile = prot._getExtraPath(VALIDATION_DATA_FN) - if not exists(linkedTrainingDataFile): - createLink(join(trainDataDir, TRAIN_DATA_FN), linkedTrainingDataFile) - if not exists(linkedValidationDataFile): - createLink(join(trainDataDir, VALIDATION_DATA_FN), linkedValidationDataFile) - -class CryocareUtils: - @staticmethod - def checkInputTomoSetsSize(evenTomoSet, oddTomoSet): - message = '' - xe, ye, ze = evenTomoSet.getDimensions() - xo, yo, zo = oddTomoSet.getDimensions() - ne = evenTomoSet.getSize() - no = oddTomoSet.getSize() - if (xe, ye, ze, ne) != (xo, yo, zo, no): - message = ('Size of even and odd set of tomograms must be the same:\n' - 'Even --> (x, y, z, n) = ({xe}, {ye}, {ze}, {ne})\n' - 'Odd --> (x, y, z, n) = ({xo}, {yo}, {zo}, {no})'.format( - xe=xe, ye=ye, ze=ze, ne=ne, xo=xo, yo=yo, zo=zo, no=no)) - - return message + createLink(join(trainDataDir, TRAIN_DATA_FN), linkedTrainingDataFile) + createLink(join(trainDataDir, VALIDATION_DATA_FN), linkedValidationDataFile) + + +def getModelName(prot): + return prot._getExtraPath(CRYOCARE_MODEL_TGZ) + + + + diff --git a/requirements.txt b/requirements.txt index 2219970..d0dcd66 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1 @@ -scipion-pyworkflow -scipion-em scipion-em-tomo \ No newline at end of file