diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml new file mode 100644 index 00000000..9612161e --- /dev/null +++ b/.github/workflows/draft-pdf.yml @@ -0,0 +1,23 @@ +on: [push] + +jobs: + paper: + runs-on: ubuntu-latest + name: Paper Draft + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build draft PDF + uses: openjournals/openjournals-draft-action@master + with: + journal: joss + # This should be the path to the paper within your repo. + paper-path: joss_paper/paper.md + - name: Upload + uses: actions/upload-artifact@v1 + with: + name: paper + # This is the output path where Pandoc will write the compiled + # PDF. Note, this should be the same directory as the input + # paper.md + path: paper.pdf diff --git a/joss_paper/paper.bib b/joss_paper/paper.bib index e69de29b..f0be9a25 100644 --- a/joss_paper/paper.bib +++ b/joss_paper/paper.bib @@ -0,0 +1,155 @@ +@article{bennett1976efficient, + title={Efficient estimation of free energy differences from Monte Carlo data}, + author={Bennett, C. H.}, + journal={Journal of Computational Physics}, + volume={22}, + number={2}, + pages={245-268}, + year={1976}, +} + +@article{case2014ff14sb, + title={The FF14SB force field}, + author={Case, D. and Babin, V. and Berryman, J. and Betz, R. and Cai, Q. and Cerutti, D. and et al.}, + journal={Amber}, + volume={14}, + pages={29-31}, + year={2014}, +} + +@article{chodera2007use, + title={Use of the Weighted Histogram Analysis Method for the Analysis of Simulated and Parallel Tempering Simulations}, + author={Chodera, J. D. and Swope, W. C. and Pitera, J. W. and Seok, C. and Dill, K. A.}, + journal={J Chem Theory Comput}, + volume={3}, + number={1}, + pages={26-41}, + year={2007}, + doi={10.1021/ct0502864}, +} + +@article{cummings2021open, + title={Open-source molecular modeling software in chemical engineering focusing on the Molecular Simulation Design Framework}, + author={Cummings, P. T. and McCabe, C. and Iacovella, C. R. and Ledeczi, A. and Jankowski, E. and Jayaraman, A. and et al.}, + journal={AIChE Journal}, + volume={67}, + number={3}, + pages={e17206}, + year={2021}, + doi={https://doi.org/10.1002/aic.17206}, +} + +@article{deng2009computations, + title={Computations of standard binding free energies with molecular dynamics simulations}, + author={Deng, Y. and Roux, B.}, + journal={J Phys Chem B}, + volume={113}, + number={8}, + pages={2234-2246}, + year={2009}, + doi={10.1021/jp807701h}, +} + +@article{gusev2023active, + title={Active Learning Guided Drug Design Lead Optimization Based on Relative Binding Free Energy Modeling}, + author={Gusev, F. and Gutkin, E. and Kurnikova, M. G. and Isayev, O.}, + journal={Journal of Chemical Information and Modeling}, + volume={63}, + number={2}, + pages={583-594}, + year={2023}, + doi={10.1021/acs.jcim.2c01052}, +} + +@article{hedges2023suite, + title={A Suite of Tutorials for the BioSimSpace Framework for Interoperable Biomolecular Simulation [Article v1.0]}, + author={Hedges, L. O. and Bariami, S. and Burman, M. and Clark, F. and Cossins, B. P. and Hardie, A. and et al.}, + journal={Living Journal of Computational Molecular Science}, + volume={5}, + number={1}, + pages={2375}, + year={2023}, + doi={10.33011/livecoms.5.1.2375}, +} + +@article{kirkwood1935statistical, + title={Statistical mechanics of fluid mixtures}, + author={Kirkwood, J. G.}, + journal={The Journal of Chemical Physics}, + volume={3}, + number={5}, + pages={300-313}, + year={1935}, +} + +@article{klimovich2015guidelines, + title={Guidelines for the analysis of free energy calculations}, + author={Klimovich, P. V. and Shirts, M. R. and Mobley, D. L.}, + journal={J Comput Aided Mol Des}, + volume={29}, + number={5}, + pages={397-411}, + year={2015}, + doi={10.1007/s10822-015-9840-9}, +} + +@book{merz2010drug, + title={Drug design: structure-and ligand-based approaches}, + author={Merz Jr, K. M. and Ringe, D. and Reynolds, C. H.}, + year={2010}, + publisher={Cambridge University Press}, +} + +@article{paliwal2011benchmark, + title={A Benchmark Test Set for Alchemical Free Energy Transformations and Its Use to Quantify Error in Common Free Energy Methods}, + author={Paliwal, H. and Shirts, M. R.}, + journal={J Chem Theory Comput}, + volume={7}, + number={12}, + pages={4115-4134}, + year={2011}, + doi={10.1021/ct2003995}, +} + +@article{pham2011identifying, + title={Identifying low variance pathways for free energy calculations of molecular transformations in solution phase}, + author={Pham, T. T. and Shirts, M. R.}, + journal={J Chem Phys}, + volume={135}, + number={3}, + pages={034114}, + year={2011}, + doi={10.1063/1.3607597}, +} + +@article{phillips2020scalable, + title={Scalable molecular dynamics on CPU and GPU architectures with NAMD}, + author={Phillips, J. C. and Hardy, D. J. and Maia, J. D. C. and Stone, J. E. and Ribeiro, J. V. and Bernardi, R. C. and et al.}, + journal={The Journal of Chemical Physics}, + volume={153}, + number={4}, + pages={044130}, + year={2020}, + doi={10.1063/5.0014475}, +} + +@article{pohorille2010good, + title={Good practices in free-energy calculations}, + author={Pohorille, A. and Jarzynski, C. and Chipot, C.}, + journal={J Phys Chem B}, + volume={114}, + number={32}, + pages={10235-10253}, + year={2010}, + doi={10.1021/jp102971x}, +} + +@article{pronk2013gromacs, + title={GROMACS 4.5: a high-throughput and highly parallel open source molecular simulation toolkit}, + author={Pronk, S. and Páll, S. and Schulz, R. and Larsson, P. and Bjelkmar, P. and Apostolov, R. and et al.}, + journal={Bioinformatics}, + volume={29}, + number={7}, + pages={845-854}, + year={2013}, + doi={10.1093/bioinformatics/btt055}, diff --git a/joss_paper/paper.md b/joss_paper/paper.md index 2d378935..7c2be3dc 100644 --- a/joss_paper/paper.md +++ b/joss_paper/paper.md @@ -22,7 +22,7 @@ affiliations: - name: Institution Name, Country index: 2 -date: 99 August 2023 +date: 29 December 2023 bibliography: paper.bib --- @@ -35,37 +35,37 @@ A distinctive attribute of alchemlyb is its streamlined, end-to-end analysis pro # Statement of need -In the pharmaceutical sector, computational chemistry techniques, particularly relative/absolute binding free energy calculations, are regularly employed to rank potential drug compounds based on their protein-binding affinity. These calculations produce free energy data, which alchemlyb expertly processes to offer free energy estimates. These estimates provide critical insights into the binding affinity at various stages of drug discovery, such as hit identification and lead optimization. alchemlyb's unique capacity to cater to this need has cemented its role as an invaluable asset in computational chemistry. +In the pharmaceutical sector, the utilization of computational chemistry techniques is integral for evaluating potential drug compounds based on their protein-binding affinity [@deng2009computations]. Notably, relative/absolute binding free energy calculations are routinely employed for this purpose (Merz Jr, Ringe, & Reynolds, 2010). The resultant free energy data is essential for understanding binding affinity throughout various stages of drug discovery, such as hit identification and lead optimization (Merz Jr et al., 2010). The alchemlyb software adeptly processes this data, providing crucial insights and establishing itself as an indispensable asset in computational chemistry. -Moreover, within the realm of computational research, different MD engines, including GROMACS, AMBER, OpenMM, and NAMD, have their distinct sets of tools for conducting free energy calculations. This diversity complicates the research process, as data from different engines necessitate unique processing and analysis methods. -The solution to this complication comes in the form of alchemlyb, providing a unified, engine-agnostic analysis workflow. This allows for consistent analysis of free energy data from different MD engines, making it possible for researchers to compare and combine results from various engines in a more streamlined manner. +In the realm of computational research, various molecular dynamics (MD) engines, including GROMACS (Pronk et al., 2013), AMBER (Case et al., 2014), GOMC (Cummings et al., 2021), and NAMD (Phillips et al., 2020), offer distinct tools for conducting free energy calculations. However, the diversity in output formats and analysis tools among different MD engines complicates the research process. Data generated by each engine requires unique processing and analysis methods, hindering seamless collaboration and comparison of results. -# Citations -Citations to entries in paper.bib should be in -[rMarkdown](http://rmarkdown.rstudio.com/authoring_bibliographies_and_citations.html) -format. +Addressing this complexity is the "alchemical–analysis.py" tool (Klimovich, Shirts, & Mobley, 2015), which precedes alchemlyb. Although "alchemical–analysis.py" has been deprecated, alchemlyb continues to provide a unified, engine-agnostic analysis workflow. Unlike its predecessor, alchemlyb breaks down components into individual tools, allowing users to customize their analysis. This innovation enables consistent processing of free energy data from diverse MD engines, facilitating streamlined comparison and combination of results. -If you want to cite a software repository URL (e.g. something on GitHub without a preferred -citation) then you can do it with the example BibTeX entry below for @fidgit. -For a quick reference, the following citation commands can be used: -- `@author:2001` -> "Author et al. (2001)" -- `[@author:2001]` -> "(Author et al., 2001)" -- `[@author1:2001; @author2:2001]` -> "(Author1 et al., 2001; Author2 et al., 2002)" +Notably, alchemlyb's robust and user-friendly nature has led to its integration into other automated workflow libraries such as Biosimspace (Hedges et al., 2023). This further enhances its accessibility and usability within broader scientific workflows, reinforcing its position as a versatile and essential tool in the field of computational chemistry. -# Figures +# Implementation -Figures can be included like this: -![Caption for example figure.\label{fig:example}](figure.png) -and referenced from text using \autoref{fig:example}. +The binding free energy of a drug within a protein is defined as the disparity in free energy between the drug's end-state in the protein's binding pocket and its alternative end-state in a solution, typically water. Absolute binding free energy calculations employ a thermodynamic cycle that establishes a connection between these two end-states through two alchemical legs, namely the bound and free legs (Figure 1). In the bound leg, the drug is decoupled from the binding pocket, while in the free leg, the same drug is decoupled from the solvent. The resulting free energy difference represents the energy required to transfer the drug from the solvent to the protein binding pocket, constituting the binding free energy of the drug. + +To determine the free energy difference associated with decoupling a drug from its environment, it is essential to ensure sufficient overlap in phase space between the coupled and decoupled states, a condition often challenging to achieve. Overlapping is facilitated by introducing a parameter lambda that connects the two end-states, leading to the creation of a series of intermediate states. Molecular dynamics (MD) engines are employed to simulate the system at these states, generating and accumulating free energy data. + +Alchemlyb offers specific parsers designed to load raw free energy data from various MD engines, converting them into standard pandas dataframes. Two types of free energy data are considered: potential energy differences between adjacent lambda states, suitable for free energy perturbation (FEP) methods (Zwanzig, 1954), and dU/dlambda at all lambda states, suitable for thermodynamic integration (TI) methods (Kirkwood, 1935). + +In alchemlyb, TI (Paliwal & Shirts, 2011) and TI with Gaussian quadrature (Gusev, Gutkin, Kurnikova, & Isayev, 2023) methods are implemented in the TI category. Perturbation category methods include Bennett Acceptance Ratio (BAR) (Bennett, 1976) and Multistate BAR (MBAR) (Shirts & Chodera, 2008). These methods necessitate uncorrelated samples, and alchemlyb provides tools for data resampling based on autocorrelation times (Chodera, Swope, Pitera, Seok, & Dill, 2007). + +To evaluate the accuracy of the free energy estimate, alchemlyb offers specific assessment tools. The error of the TI method is correlated with the average curvature (Pham & Shirts, 2011), while the error of perturbation methods depends on the overlap in sampled energy distributions (Pohorille, Jarzynski, & Chipot, 2010). Alchemlyb visualizes the smoothness of the integrand for TI methods and the overlap matrix for perturbation methods. Additionally, the accumulated samples should be at an equilibrated state, and alchemlyb allows for plotting the convergence of the free energy estimate as a function of simulation time (Yang, Bitetti-Putzer, & Karplus, 2004) to detect potentially un-equilibrated data. + + +Alchemlyb offers all these tools as a library for users to customize each stage of the analysis (Figure 2). Additionally, alchemlyb provides an automated end-to-end tool that reads in the raw input data and performs the decorelation, estimation, and quality plotting of the estimate. This automated workflow allows users to experience a similar process as “alchemical–analysis.py" (Klimovich et al., 2015), which is the predecessor of alchemlyb. -Figure sizes can be customized by adding an optional second parameter: -![Caption for example figure.](figure.png){ width=20% } # Acknowledgements We acknowledge contributions from XXXXX during the genesis of this project. -# References \ No newline at end of file +# References + +