diff --git a/cobaya/output.py b/cobaya/output.py index 7976850b9..879539085 100644 --- a/cobaya/output.py +++ b/cobaya/output.py @@ -31,6 +31,17 @@ _ext = "txt" +def use_portalocker(): + if os.getenv('COBAYA_USE_FILE_LOCKING', 't').lower() in ('true', '1', 't'): + try: + import portalocker + except ModuleNotFoundError: + return None + else: + return True + return False + + class FileLock: _file_handle: Any @@ -52,12 +63,8 @@ def set_lock(self, log, filename, force=False): self.log = log or get_logger("file_lock") try: h: Any = None - try: + if use_portalocker(): import portalocker - except ModuleNotFoundError: - # will work, but crashes will leave .lock files that will raise error - self._file_handle = open(self.lock_file, 'wb' if force else 'xb') - else: try: h = open(self.lock_file, 'wb') portalocker.lock(h, portalocker.LOCK_EX + portalocker.LOCK_NB) @@ -66,6 +73,9 @@ def set_lock(self, log, filename, force=False): if h: h.close() self.lock_error() + else: + # will work, but crashes will leave .lock files that will raise error + self._file_handle = open(self.lock_file, 'wb' if force else 'xb') except OSError: self.lock_error() @@ -83,11 +93,8 @@ def lock_error(self): import mpi4py else: mpi4py = None - if mpi.is_main_process(): - try: - import portalocker - except ModuleNotFoundError: - self.log.warning('install "portalocker" for better file lock control.') + if mpi.is_main_process() and use_portalocker() is None: + self.log.warning('install "portalocker" for better file lock control.') raise LoggedError(self.log, "File %s is locked.\nYou may be running multiple jobs with " "the same output when you intended to run with MPI. " diff --git a/docs/installation.rst b/docs/installation.rst index a6c725e37..01ac4ebbc 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -137,6 +137,17 @@ Simply do, from anywhere Installation troubleshooting ---------------------------- +Problems with file locks +^^^^^^^^^^^^^^^^^^^^^^^^ + +By default Cobaya uses `Portalocker `_ to lock output chain files to check that MPI is being used correctly, that only one process is accessing each file, and to clean up files from aborted runs. +If Portalocker is uninstalled it will still work, but files may need to be cleaned up manually. You can also set an environment variable to turn off file locking if it causes problems (e.g. on NERSC home). + +.. code:: bash + + export COBAYA_USE_FILE_LOCKING=false + + .. note:: This section will be filled with the most common problems that our users encounter, so if you followed the instructions above and still something failed (or if you think that the instructions were not clear enough), don't hesitate to contact us! diff --git a/setup.py b/setup.py index a1f410196..bf47ba268 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ def extract_docs_requirements(): return ["sphinx"] + reqs -install_requires = ['numpy>=1.17.0', 'scipy>=1.5', 'pandas>=1.0.1', +install_requires = ['numpy>=1.17.0', 'scipy>=1.5', 'pandas>=1.0.1,!=1.4.0', 'PyYAML>=5.1', 'requests>=2.18', 'py-bobyqa>=1.2', 'GetDist>=1.3.1', 'fuzzywuzzy>=0.17', 'packaging', 'tqdm', 'portalocker>=2.3.0', 'dill>=0.3.3']