Merge pull request #80 from robinzyb/devel

Devel
robinzyb · Dec 4, 2024 · 074752c · 074752c
2 parents 5688fd7 + f929bed
commit 074752c
Show file tree

Hide file tree

Showing 14 changed files with 205 additions and 91 deletions.
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@
 ![PyPI - pip install](https://img.shields.io/pypi/dm/cp2kdata?logo=pypi&label=pip%20install)
 ![Conda Downloads](https://img.shields.io/conda/dn/conda-forge/cp2kdata?label=conda-forge)
 
-Python Package to postprocess cp2k data, including cube, pdos, output files
+A Python Package to postprocess cp2k data, including cube, pdos, output files
 
 
 # Installation
@@ -35,7 +35,7 @@ pip install .
 - [Manipulate CP2K Pdos Files](./docs/pdos/README.md)
 
 # Additional Features
-- [Plug in for dpdata](./docs/dpdata_plugin.md)
+- [The plugin for dpdata](./docs/dpdata_plugin.md)
 
 # Feature Request
 Any advice is welcome. If you would like to request a new feature, please open an issue in github and upload example input and output files.

diff --git a/cp2kdata/output.py b/cp2kdata/output.py
@@ -78,7 +78,7 @@ def __init__(
             raise ValueError(
                 "please provide cp2k output file with MEDIUM print level. Print Level Low doesn't provide necessary information for initialize the cp2kdata class.")
 
-        # -- set some basic attribute --
+        # -- set some basic attributes --
         self.num_frames = None
         self.init_atomic_coordinates = None
         self.atomic_kind = None
@@ -373,12 +373,23 @@ def parse_md(self):
         self.md_info = parse_md_info(self.filename)
         self.check_md_type(md_type=self.md_info.ensemble_type)
 
+        # parse md energies
         ener_file_list = glob.glob(os.path.join(self.path_prefix, "*.ener"))
         if ener_file_list:
             self.energies_list = parse_md_ener(ener_file_list[0])
 
+        # parse md poses
         pos_xyz_file_list = glob.glob(
-            os.path.join(self.path_prefix, "*pos*.xyz"))
+            os.path.join(self.path_prefix, "*-pos-*.xyz"))
+
+        n_pos_xyz_files = len(pos_xyz_file_list)
+        if n_pos_xyz_files > 1:
+            raise ValueError(
+                f"Cp2kData found {n_pos_xyz_files} pos files.\n"
+                f"{pos_xyz_file_list}.\n"
+                f"Please remove extra pos files and keep only one pos file in the folder."
+                )
+
         if pos_xyz_file_list:
             # TODO: Is it possible to have no pos file?
             self.atomic_frames_list, energies_list_from_pos, self.chemical_symbols = parse_pos_xyz(
@@ -388,10 +399,11 @@ def parse_md(self):
                 self.energies_list = energies_list_from_pos
         else:
             # if no pos file and ener file, parse energies from the output file
-            format_logger(info="Energies", filename=self.filename)
-            self.energies_list = parse_energies_list(self.output_file)
-            self.energies_list = self.drop_last_info(
-                self.cp2k_info, self.energies_list)
+            if not hasattr(self, "energies_list"):
+                format_logger(info="Energies", filename=self.filename)
+                self.energies_list = parse_energies_list(self.output_file)
+                self.energies_list = self.drop_last_info(
+                    self.cp2k_info, self.energies_list)
             self.atomic_frames_list = None
 
         frc_xyz_file_list = glob.glob(

diff --git a/cp2kdata/pdos/pdos.py b/cp2kdata/pdos/pdos.py
@@ -243,15 +243,15 @@ def read_dos_fermi(self):
         return fermi
 
     def read_dos_energies(self):
-            """
-            Reads the DOS energies from the file and converts them to electron volts (eV).
+        """
+        Reads the DOS energies from the file and converts them to electron volts (eV).
 
-            Returns:
-                numpy.ndarray: An array of DOS energies in eV.
-            """
-            energies = np.loadtxt(self.file, usecols=1)
-            energies = energies * au2eV
-            return energies
+        Returns:
+            numpy.ndarray: An array of DOS energies in eV.
+        """
+        energies = np.loadtxt(self.file, usecols=1)
+        energies = energies * au2eV
+        return energies
 
     @property
     def occupation(self):
@@ -260,26 +260,33 @@ def occupation(self):
         return occupation
 
     def get_homo_ener(self):
-            """
-            Get the energy of the highest occupied molecular orbital (HOMO).
+        """
+        Get the energy of the highest occupied molecular orbital (HOMO).
 
-            Returns:
-                float: The energy of the HOMO.
-            """
-            homo_idx = np.where(self.occupation == 0)[0][0]-1
-            homo_ener = self.energies[homo_idx]
+        Returns:
+            float: The energy of the HOMO.
+        """
+        homo_idx = np.where(self.occupation == 0)[0][0]-1
+        homo_ener = self.energies[homo_idx]
 
-            return homo_ener
+        return homo_ener
 
     def get_lumo_ener(self):
-            """
-            Get the energy of the lowest unoccupied molecular orbital (LUMO).
-
-            Returns:
-                lumo_ener (float): The energy of the LUMO.
-            """
-            lumo_ener = self.energies[self.occupation == 0][0]
-            return lumo_ener
+        """
+        Get the energy of the lowest unoccupied molecular orbital (LUMO).
+
+        Returns:
+            lumo_ener (float): The energy of the LUMO.
+        """
+        lumo_ener = self.energies[self.occupation == 0][0]
+        return lumo_ener
+
+    def get_homo_lumo_gap(self):
+        """
+        Get the lumo - homo gap from energy
+        """
+        gap_ener = self.get_lumo_ener() - self.get_homo_ener()
+        return gap_ener
 
     def get_raw_dos(self, dos_type="total", steplen=0.1, usecols=None):
 

diff --git a/cp2kdata/plots/colormaps.py b/cp2kdata/plots/colormaps.py
@@ -0,0 +1,27 @@
+# create color blind friendly colormaps
+
+from matplotlib.colors import LinearSegmentedColormap, ListedColormap
+import matplotlib as mpl
+
+# the colormap was taken from the following source:
+# [1] Wong, Bang. "Points of view: Color coding." nature methods 7.8 (2010): 573.
+color_blind_map = [
+    #[0.0/256, 0.0/256, 0.0/256, 1], # Black
+    [230.0/256, 159.0/256, 0.0/256, 1], # Orange
+    [86.0/256, 180.0/256, 233.0/256, 1], # Sky Blue
+    [0.0/256, 158.0/256, 115.0/256, 1], # Bluish Green
+    [240.0/256, 228.0/256, 66.0/256, 1], # Yellow
+    [0.0/256, 114.0/256, 178.0/256, 1], # Blue
+    [213.0/256, 94.0/256, 0.0/256, 1], # Vermilion
+    [204.0/256, 121.0/256, 167.0/256, 1], # Reddish Purple
+]
+
+cb_lcmap = ListedColormap(color_blind_map, name='cp2kdata_cb_lcmap')
+cb_lscmap = LinearSegmentedColormap.from_list(name='cp2kdata_cb_lscmap', colors=color_blind_map)
+
+mpl.colormaps.register(cmap=cb_lcmap)
+print("color blind friendly colormap registered as cp2kdata_cb_lcmap")
+mpl.colormaps.register(cmap=cb_lscmap)
+print("color blind friendly colormap registered as cp2kdata_cb_lscmap")
+
+
diff --git a/docs/backlog.md b/docs/backlog.md
@@ -1,10 +1,9 @@
 
 # Idea List
-1. manipulate cube, pdos data
-2. modify step information on cube files
-3. extract information from output
-4. generate standard test input and directory
-5. generate nice figures
+1. modify step information on cube files
+2. extract information from output
+3. generate standard test input and directory
+4. generate nice figures
 
 # TO DO
 cli interface
diff --git a/docs/figures/cb_lcmap_plot.png b/docs/figures/cb_lcmap_plot.png
diff --git a/docs/figures/cb_lscmap_plot.png b/docs/figures/cb_lscmap_plot.png
diff --git a/docs/figures/cp2kdata_cb_lcmap.png b/docs/figures/cp2kdata_cb_lcmap.png
diff --git a/docs/figures/cp2kdata_cb_lscmap.png b/docs/figures/cp2kdata_cb_lscmap.png
diff --git a/docs/plots.md b/docs/plots.md
@@ -0,0 +1,104 @@
+# Plotting in CP2KData
+
+## Color blind friendly colormaps
+
+> If a submitted manuscript happens to go to three male reviewers of Northern European descent, the chance that at least one will be color blind is 22 percent.
+
+by {cite}`wong2010points`
+
+This shows the importance of creating color blind friendly plots.
+As suggested by the above reference, I implemented the recommended color blind friendly colormaps in the CP2KData package.
+The usage is summarized in the following,
+
+1. Register the colormaps using cp2kdata
+
+    ```python
+    import matplotlib as mpl
+    import cp2kdata.plots.colormaps
+    ```
+    ```stdout
+    #output
+    color blind friendly colormap registered as cp2kdata_cb_lcmap
+    color blind friendly colormap registered as cp2kdata_cb_lscmap
+    ```
+
+2. Get the colormaps
+
+    There are two colormaps in the package.
+    The first one is a listed colormap, which can also be understood as a discrete colormap.
+    ```python
+    mpl.colormaps['cp2kdata_cb_lcmap']
+    ```
+    ![cbl_cbar](./figures/cp2kdata_cb_lcmap.png)
+
+    The second one is a linear segmented colormap, which can also be understood as a continuous colormap.
+    ```python
+    mpl.colormaps['cp2kdata_cb_lscmap']
+    ```
+    ![cbls_cbar](./figures/cp2kdata_cb_lscmap.png)
+
+3. Example for using the listed colormap
+    ```python
+    import matplotlib.pyplot as plt
+    import matplotlib as mpl
+    import numpy as np
+    import cp2kdata.plots.colormaps
+    plt.style.use('cp2kdata.matplotlibstyle.jcp')
+
+
+    cp2kdata_cb_lcmap = mpl.colormaps['cp2kdata_cb_lcmap']
+    plt.rcParams["axes.prop_cycle"] = plt.cycler("color", cp2kdata_cb_lcmap.colors)
+    row = 1
+    col = 1
+    fig = plt.figure(figsize=(3.37*col, 1.89*row), dpi=300, facecolor='white')
+    gs = fig.add_gridspec(row,col)
+    ax  = fig.add_subplot(gs[0])
+
+    t = np.linspace(-10, 10, 100)
+    def sigmoid(t, t0):
+        return 1 / (1 + np.exp(-(t - t0)))
+
+    nb_colors = len(plt.rcParams['axes.prop_cycle'])
+
+    shifts = np.linspace(-5, 5, nb_colors)
+    amplitudes = np.linspace(1, 1.5, nb_colors)
+    for t0, a in zip(shifts, amplitudes):
+        ax.plot(t, a * sigmoid(t, t0), '-')
+    ax.set_xlim(-10, 10)
+
+    fig.savefig("cb_lcmap_plot.png", dpi=100)
+    ```
+    ![cbl_plot](./figures/cb_lcmap_plot.png)
+4. Example for using the linear segmented colormap
+    ```python
+    import matplotlib.pyplot as plt
+    import matplotlib as mpl
+    import numpy as np
+    import cp2kdata.plots.colormaps
+    plt.style.use('cp2kdata.matplotlibstyle.jcp')
+
+
+    cp2kdata_cb_lscmap = mpl.colormaps['cp2kdata_cb_lscmap']
+    N = 13
+    plt.rcParams["axes.prop_cycle"] = plt.cycler("color", cp2kdata_cb_lscmap(np.linspace(0,1,N)))
+    row = 1
+    col = 1
+    fig = plt.figure(figsize=(3.37*col, 1.89*row), dpi=300, facecolor='white')
+    gs = fig.add_gridspec(row,col)
+    ax  = fig.add_subplot(gs[0])
+
+    t = np.linspace(-10, 10, 100)
+    def sigmoid(t, t0):
+        return 1 / (1 + np.exp(-(t - t0)))
+
+    nb_colors = len(plt.rcParams['axes.prop_cycle'])
+
+    shifts = np.linspace(-5, 5, nb_colors)
+    amplitudes = np.linspace(1, 1.5, nb_colors)
+    for t0, a in zip(shifts, amplitudes):
+        ax.plot(t, a * sigmoid(t, t0), '-')
+    ax.set_xlim(-10, 10)
+
+    fig.savefig("cb_lscmap_plot.png", dpi=100)
+    ```
+    ![cbls_plot](./figures/cb_lscmap_plot.png)
diff --git a/docs/references.md b/docs/references.md
@@ -0,0 +1,3 @@
+# Bibliography
+```{bibliography}
+```
diff --git a/jupyter-book/_toc.yml b/jupyter-book/_toc.yml
@@ -12,6 +12,9 @@ parts:
   - caption: Parameter Test
     chapters:
       - file: docs/input_test
+  - caption: Plots
+    chapters:
+      - file: docs/plots
   - caption: Plugin
     chapters:
       - file: docs/dpdata_plugin

diff --git a/jupyter-book/references.bib b/jupyter-book/references.bib
@@ -1,56 +1,12 @@
 ---
 ---
-
-@inproceedings{holdgraf_evidence_2014,
-	address = {Brisbane, Australia, Australia},
-	title = {Evidence for {Predictive} {Coding} in {Human} {Auditory} {Cortex}},
-	booktitle = {International {Conference} on {Cognitive} {Neuroscience}},
-	publisher = {Frontiers in Neuroscience},
-	author = {Holdgraf, Christopher Ramsay and de Heer, Wendy and Pasley, Brian N. and Knight, Robert T.},
-	year = {2014}
-}
-
-@article{holdgraf_rapid_2016,
-	title = {Rapid tuning shifts in human auditory cortex enhance speech intelligibility},
-	volume = {7},
-	issn = {2041-1723},
-	url = {http://www.nature.com/doifinder/10.1038/ncomms13654},
-	doi = {10.1038/ncomms13654},
-	number = {May},
-	journal = {Nature Communications},
-	author = {Holdgraf, Christopher Ramsay and de Heer, Wendy and Pasley, Brian N. and Rieger, Jochem W. and Crone, Nathan and Lin, Jack J. and Knight, Robert T. and Theunissen, Frédéric E.},
-	year = {2016},
-	pages = {13654},
-	file = {Holdgraf et al. - 2016 - Rapid tuning shifts in human auditory cortex enhance speech intelligibility.pdf:C\:\\Users\\chold\\Zotero\\storage\\MDQP3JWE\\Holdgraf et al. - 2016 - Rapid tuning shifts in human auditory cortex enhance speech intelligibility.pdf:application/pdf}
-}
-
-@inproceedings{holdgraf_portable_2017,
-	title = {Portable learning environments for hands-on computational instruction using container-and cloud-based technology to teach data science},
-	volume = {Part F1287},
-	isbn = {978-1-4503-5272-7},
-	doi = {10.1145/3093338.3093370},
-	abstract = {© 2017 ACM. There is an increasing interest in learning outside of the traditional classroom setting. This is especially true for topics covering computational tools and data science, as both are challenging to incorporate in the standard curriculum. These atypical learning environments offer new opportunities for teaching, particularly when it comes to combining conceptual knowledge with hands-on experience/expertise with methods and skills. Advances in cloud computing and containerized environments provide an attractive opportunity to improve the effciency and ease with which students can learn. This manuscript details recent advances towards using commonly-Available cloud computing services and advanced cyberinfrastructure support for improving the learning experience in bootcamp-style events. We cover the benets (and challenges) of using a server hosted remotely instead of relying on student laptops, discuss the technology that was used in order to make this possible, and give suggestions for how others could implement and improve upon this model for pedagogy and reproducibility.},
-	booktitle = {{ACM} {International} {Conference} {Proceeding} {Series}},
-	author = {Holdgraf, Christopher Ramsay and Culich, A. and Rokem, A. and Deniz, F. and Alegro, M. and Ushizima, D.},
-	year = {2017},
-	keywords = {Teaching, Bootcamps, Cloud computing, Data science, Docker, Pedagogy}
-}
-
-@article{holdgraf_encoding_2017,
-	title = {Encoding and decoding models in cognitive electrophysiology},
-	volume = {11},
-	issn = {16625137},
-	doi = {10.3389/fnsys.2017.00061},
-	abstract = {© 2017 Holdgraf, Rieger, Micheli, Martin, Knight and Theunissen. Cognitive neuroscience has seen rapid growth in the size and complexity of data recorded from the human brain as well as in the computational tools available to analyze this data. This data explosion has resulted in an increased use of multivariate, model-based methods for asking neuroscience questions, allowing scientists to investigate multiple hypotheses with a single dataset, to use complex, time-varying stimuli, and to study the human brain under more naturalistic conditions. These tools come in the form of “Encoding” models, in which stimulus features are used to model brain activity, and “Decoding” models, in which neural features are used to generated a stimulus output. Here we review the current state of encoding and decoding models in cognitive electrophysiology and provide a practical guide toward conducting experiments and analyses in this emerging field. Our examples focus on using linear models in the study of human language and audition. We show how to calculate auditory receptive fields from natural sounds as well as how to decode neural recordings to predict speech. The paper aims to be a useful tutorial to these approaches, and a practical introduction to using machine learning and applied statistics to build models of neural activity. The data analytic approaches we discuss may also be applied to other sensory modalities, motor systems, and cognitive systems, and we cover some examples in these areas. In addition, a collection of Jupyter notebooks is publicly available as a complement to the material covered in this paper, providing code examples and tutorials for predictive modeling in python. The aimis to provide a practical understanding of predictivemodeling of human brain data and to propose best-practices in conducting these analyses.},
-	journal = {Frontiers in Systems Neuroscience},
-	author = {Holdgraf, Christopher Ramsay and Rieger, J.W. and Micheli, C. and Martin, S. and Knight, R.T. and Theunissen, F.E.},
-	year = {2017},
-	keywords = {Decoding models, Encoding models, Electrocorticography (ECoG), Electrophysiology/evoked potentials, Machine learning applied to neuroscience, Natural stimuli, Predictive modeling, Tutorials}
-}
-
-@book{ruby,
-  title     = {The Ruby Programming Language},
-  author    = {Flanagan, David and Matsumoto, Yukihiro},
-  year      = {2008},
-  publisher = {O'Reilly Media}
-}
+@article{wong2010points,
+  title={Points of view: Color coding},
+  author={Wong, Bang},
+  journal={nature methods},
+  volume={7},
+  number={8},
+  pages={573},
+  year={2010},
+  publisher={Nature Publishing Group}
+}
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,6 +12,9 @@ description = "A Small Package to Postprocess Cp2k Output"
 authors = [
     {name = "Yong-Bin Zhuang", email = "[email protected]"}
     ]
+maintainers = [
+    {name = "Yong-Bin Zhuang", email = "[email protected]"}
+    ]
 license = {file = "LICENSE"}
 readme = "README.md"
 classifiers = [