diff --git a/.github/workflows/run_esmac_diags_tests.yml b/.github/workflows/run_esmac_diags_tests.yml
new file mode 100644
index 0000000..a6db113
--- /dev/null
+++ b/.github/workflows/run_esmac_diags_tests.yml
@@ -0,0 +1,25 @@
+name: run_esmac_diags_ci.yml
+on: [push,pull_request ]
+jobs:
+  build_and_publish:
+    name: Setup Environment, and run test suite
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Setup Environment
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          python-version: 3.7
+          mamba-version: "*"
+          channels: conda-forge,defaults
+          channel-priority: true
+          activate-environment: esmac_diags
+          environment-file: environment.yml
+      - name: Install esmac_diags
+        shell: bash -l {0}
+        run: |
+          pip install -e .
+      - name: Run Pytest
+        shell: bash -l {0}
+        run: |
+          pytest
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a81c8ee
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,138 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
diff --git a/Python_Dependency.txt b/Python_Dependency.txt
deleted file mode 100644
index a1f4fb3..0000000
--- a/Python_Dependency.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-os
-sys
-glob
-time
-numpy
-scipy
-matplotlib
-netCDF4
diff --git a/README.md b/README.md
index ddb676d..efe4f5d 100644
--- a/README.md
+++ b/README.md
@@ -3,23 +3,28 @@
 
 This Earth System Model (ESM) aerosol-cloud diagnostics package (ESMAC Diags) is currently used to evaluate aerosols, clouds and aerosol-cloud interactions simulated by the Department of Energy’s (DOE) Energy Exascale Earth System Model (E3SM). The first version (v1.0) focuses on comparing simulated aerosol properties with in-situ aircraft, ship and surface measurements. Various types of diagnostics and metrics are performed for aerosol number, size distribution, chemical composition, and CCN concentration to assess how well E3SM represents observed aerosol properties across spatial scales. Metrics for various meteorological and aerosol precursor quantities from the same field campaigns are also included. Version 2 is under development focusing on aerosol-cloud interactions.
 
-More information can be found in README_ESMAC_Diags_v1.0.pdf
+More information can be found in README_ESMAC_Diags_v1.0-alpha.pdf
 
-# Package dependencies
-This code is dependent on the following python packages:
+## To install
+This code is best run using a conda virtual environment. To install the required environment one can do
+```bash
+conda env create -f environment.yml
+```
+to set up a esmac_diags environment. Note if running this on a HPC system, you may need to load the appropriate module for anaconda. 
 
-os
-sys
-glob
-time
-numpy
-scipy
-matplotlib
-netCDF4
+Once the environment has been created you can activate it with ```conda activate esmac_diags``` and then this code can be installed with
+```bash
+pip install -e .
+```
+Which will install the code as editable allowing you to make changes to the codebase and it be reflected in the installed package. 
 
 
 # Test run
-To verify the package, enter scripts/ directory and run scripts_testcase.csh. Then check the directory in testcase/figures/. There should be three figures generated:
+To verify the package, enter scripts/ directory and run 
+```bash
+python run_testcase.py
+```
+Then go to the directory in testcase/figures/. There should be three figures generated:
 
 flighttrack_ACEENA_20170630a.png
 
diff --git a/README_ESMAC_Diags_v1.0-alpha.pdf b/README_ESMAC_Diags_v1.0-alpha.pdf
index 1a09fb0..17735c0 100644
Binary files a/README_ESMAC_Diags_v1.0-alpha.pdf and b/README_ESMAC_Diags_v1.0-alpha.pdf differ
diff --git a/data b/data
deleted file mode 120000
index 87a636a..0000000
--- a/data
+++ /dev/null
@@ -1 +0,0 @@
-/global/cscratch1/sd/sqtang/EAGLES/Aerosol_diag_pkg/data
\ No newline at end of file
diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..759388c
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,17 @@
+name: esmac_diags
+channels:
+  - conda-forge
+  - anaconda
+  - defaults
+dependencies:
+  - pip
+  - matplotlib
+  - numpy
+  - scipy
+  - sphinx
+  - conda-build
+  - pytest
+  - pytest-shutil
+  - ipython
+  - black
+  - netCDF4
diff --git a/figures b/figures
deleted file mode 120000
index bb5f731..0000000
--- a/figures
+++ /dev/null
@@ -1 +0,0 @@
-/global/cscratch1/sd/sqtang/EAGLES/Aerosol_diag_pkg/figures
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..ec06f16
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,11 @@
+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[tool.pytest.ini_options]
+minversion = "6.0"
+addopts = "-ra -q"
+testpaths = [
+    "tests"
+]
+norecursedirs = "src/esmac_diags/*"
\ No newline at end of file
diff --git a/python/plotting/calc_statistic_flight_CN.py b/python/plotting/calc_statistic_flight_CN.py
deleted file mode 100644
index 6806383..0000000
--- a/python/plotting/calc_statistic_flight_CN.py
+++ /dev/null
@@ -1,490 +0,0 @@
-"""
-# calculate statistics (mean, bias, correlation, RMSE) of Aerosol number concentration
-# for aircraft measurements
-# compare models and CPC measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-# import matplotlib.pyplot as plt
-import os
-import glob
-import numpy as np
-import scipy.stats
-from read_aircraft import read_cpc, read_RF_NCAR
-from read_netcdf import read_merged_size,read_extractflight
-from quality_control import qc_cpc_air, qc_remove_neg, qc_mask_takeoff_landing
-
-#%% settings
-
-from settings import campaign, Model_List, E3SM_aircraft_path, figpath_aircraft_statistics
-
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP, cpcpath,merged_size_path
-elif campaign in ['CSET', 'SOCRATES']:
-    from settings import RFpath
-else:
-    raise ValueError('please check campaign name: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_statistics):
-    os.makedirs(figpath_aircraft_statistics)
-   
-missing_value = -999999.
-
-#%% find files for flight information
-
-lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
-lst.sort()
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-# choose files for specific IOP
-if campaign=='HISCALE':
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-elif campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-        
-alldates = [x.split('_')[-1].split('.')[0] for x in lst]
-    
-#%% read all data
-
-uhsas100_o = np.empty(0)    # large particles. UHSAS for CSET and SOCRATES, PCASP for ACEENA and HISCALE
-cpc10_o = np.empty(0)
-cpc3_o = np.empty(0)
-ncn100_m = []
-ncn10_m = []
-ncn3_m = []
-nmodels=len(Model_List)
-for mm in range(nmodels):
-    ncn100_m.append(np.empty(0))
-    ncn10_m.append(np.empty(0))
-    ncn3_m.append(np.empty(0))
-    
-print('reading '+format(len(alldates))+' files to calculate the statistics: ')
-
-for date in alldates:
-    print(date)
-    
-    #%% read in Models
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-    
-        (timem,heightm,cpc_m,timeunitm,ncn_unit,ncn_longname)=read_extractflight(filename_m,'NCN')
-        (timem,heightm,cpcu_m,timeunitm,ncnu_unit,ncnu_longname)=read_extractflight(filename_m,'NUCN')
-        (timem,heightm,ncnall,timeunitm,ncnall_unit,ncnall_longname)=read_extractflight(filename_m,'NCNall')
-        
-        if campaign=='HISCALE':
-            if IOP=='IOP1':  # PCASP for HISCALE IOP1 size from 0.12 to 3 um
-                ncn100_m[mm] = np.hstack((ncn100_m[mm], np.sum(ncnall[120:,:],0)*1e-6))
-            elif IOP=='IOP2': # PCASP for HISCALE IOP1 size from 0.09 to 3 um
-                ncn100_m[mm] = np.hstack((ncn100_m[mm], np.sum(ncnall[90:,:],0)*1e-6))
-        else:
-            ncn100_m[mm] = np.hstack((ncn100_m[mm], np.sum(ncnall[100:,:],0)*1e-6))  
-        ncn10_m[mm] = np.hstack((ncn10_m[mm], cpc_m*1e-6))    # #/m3 to #/cm3
-        ncn3_m[mm] = np.hstack((ncn3_m[mm], cpcu_m*1e-6))    # #/m3 to #/cm3
-        
-        
-    #%% read in flight measurements (CPC and PCASP) for HISCALE and ACEENA
-    if campaign in ['HISCALE', 'ACEENA']:
-        if date[-1]=='a':
-            flightidx=1
-        else:
-            flightidx=2
-        if campaign=='HISCALE':
-            filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_HiScale001s.ict.txt')
-            filename_merge = merged_size_path+'merged_bin_fims_pcasp_HISCALE_'+date+'.nc'
-        elif campaign=='ACEENA':
-            filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_ACEENA001s.ict')    
-            filename_merge = merged_size_path+'merged_bin_fims_pcasp_opc_ACEENA_'+date+'.nc'
-        filename_c.sort()
-        
-        # read in CPC
-        if len(filename_c)==1 or len(filename_c)==2: # some days have two flights
-            (cpc,cpclist)=read_cpc(filename_c[flightidx-1])
-            # fill missing timestep
-            if np.logical_and(campaign=='ACEENA', date=='20180216a'):
-                cpc=np.insert(cpc,1404,(cpc[:,1403]+cpc[:,1404])/2,axis=1) 
-            elif np.logical_and(campaign=='HISCALE', date=='20160425a'):
-                cpc=np.insert(cpc,0,cpc[:,0],axis=1)
-                cpc[0,0]=cpc[0,0]-1
-            time_cpc = cpc[0,:]
-            cpc10 = cpc[1,:]
-            cpc3 = cpc[2,:]
-        elif len(filename_c)==0:
-            time_cpc=timem
-            cpc10=np.nan*np.empty([len(timem)])
-            cpc3=np.nan*np.empty([len(timem)])
-        else:
-            raise ValueError('find too many files: '+filename_c)
-        
-        # some quality checks
-        (cpc3,cpc10) = qc_cpc_air(cpc3, cpc10)
-        
-        # read in PCASP
-        (time_merge,size,pcasp,timeunit,pcaspunit,pcasplongname)=read_merged_size(filename_merge,'totalnum_pcasp')
-        pcasp=qc_remove_neg(pcasp)
-        if len(time_merge)!=len(time_cpc):
-            raise ValueError('time dimension is inconsistent ')
-        
-        # exclude 30min after takeoff and before landing
-        cpc3 = qc_mask_takeoff_landing(time_cpc,cpc3)
-        cpc10 = qc_mask_takeoff_landing(time_cpc,cpc10)
-        pcasp = qc_mask_takeoff_landing(time_cpc,pcasp)
-        
-        cpc10_o=np.hstack((cpc10_o, cpc10))
-        cpc3_o=np.hstack((cpc3_o, cpc3))
-        uhsas100_o=np.hstack((uhsas100_o, pcasp))
-    
-    #%% read in flight data (for CSET and SOCRATES)
-    elif campaign in ['CSET', 'SOCRATES']:
-        filename = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
-        if len(filename)==1 or len(filename)==2:  # SOCRATES has two flights in 20180217, choose the later one
-            (time_cpc,cpc10,timeunit,cpc10unit,cpc10longname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'CONCN')
-            if campaign=='CSET':
-                (time_cpc,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'CONCU100_RWOOU')
-            elif campaign=='SOCRATES':
-                # there are two variables: CONCU100_CVIU and CONCU100_LWII
-                (time_cpc,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'CONCU100_LWII')
-        else:
-            raise ValueError('find too many files: '+filename)
-        
-        # some quality checks
-        uhsas100=qc_remove_neg(uhsas100)
-        
-        # exclude 30min after takeoff and before landing
-        cpc10 = qc_mask_takeoff_landing(time_cpc,cpc10)
-        uhsas100 = qc_mask_takeoff_landing(time_cpc,uhsas100)
-        
-        cpc10_o=np.hstack((cpc10_o, cpc10))
-        uhsas100_o=np.hstack((uhsas100_o, uhsas100))
-        
-
-#%% calculate statistics
-
-# select only valid data in obs and the corresponding data in models
-idx100 = ~np.isnan(uhsas100_o)
-idx10 = ~np.isnan(cpc10_o)
-idx3 = ~np.isnan(cpc3_o)
-
-mean100 = [None]*(nmodels+1)
-mean10 = [None]*(nmodels+1)
-mean3 = [None]*(nmodels+1)
-std100 = [None]*(nmodels+1)
-std10 = [None]*(nmodels+1)
-std3 = [None]*(nmodels+1)
-bias100 = [None]*(nmodels)
-bias10 = [None]*(nmodels)
-bias3 = [None]*(nmodels)
-corr100 = [None]*(nmodels)
-corr10 = [None]*(nmodels)
-corr3 = [None]*(nmodels)
-rmse100 = [None]*(nmodels)
-rmse10 = [None]*(nmodels)
-rmse3 = [None]*(nmodels)
-p10_100 = [None]*(nmodels+1)
-p10_10 = [None]*(nmodels+1)
-p10_3 = [None]*(nmodels+1)
-p25_100 = [None]*(nmodels+1)
-p25_10 = [None]*(nmodels+1)
-p25_3 = [None]*(nmodels+1)
-p75_100 = [None]*(nmodels+1)
-p75_10 = [None]*(nmodels+1)
-p75_3 = [None]*(nmodels+1)
-p90_100 = [None]*(nmodels+1)
-p90_10 = [None]*(nmodels+1)
-p90_3 = [None]*(nmodels+1)
-    
-if sum(idx10)/len(idx10)<0.1:   # two few observation available
-    # for obs
-    mean10[nmodels] = missing_value
-    std10[nmodels] = missing_value
-    p10_10[nmodels] = missing_value
-    p25_10[nmodels] = missing_value
-    p75_10[nmodels] = missing_value
-    p90_10[nmodels] = missing_value
-    # for models
-    for mm in range(nmodels):
-        mean10[mm] = np.nanmean(ncn10_m[mm][idx10])
-        std10[mm] = np.nanstd(ncn10_m[mm][idx10])
-        p10_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],10)
-        p25_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],25)
-        p75_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],75)
-        p90_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],90)
-        bias10[mm] =  missing_value
-        corr10[mm] = [missing_value, missing_value]
-        rmse10[mm] = missing_value
-else:
-    # for obs
-    mean10[nmodels] = np.nanmean(cpc10_o[idx10])
-    std10[nmodels] = np.nanstd(cpc10_o[idx10])
-    p10_10[nmodels] = np.nanpercentile(cpc10_o[idx10],10)
-    p25_10[nmodels] = np.nanpercentile(cpc10_o[idx10],25)
-    p75_10[nmodels] = np.nanpercentile(cpc10_o[idx10],75)
-    p90_10[nmodels] = np.nanpercentile(cpc10_o[idx10],90)
-    # for models
-    for mm in range(nmodels):
-        mean10[mm] = np.nanmean(ncn10_m[mm][idx10])
-        std10[mm] = np.nanstd(ncn10_m[mm][idx10])
-        p10_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],10)
-        p25_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],25)
-        p75_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],75)
-        p90_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],90)
-        bias10[mm] = mean10[mm] - mean10[nmodels]
-        c10 = scipy.stats.pearsonr(ncn10_m[mm][idx10],cpc10_o[idx10])
-        corr10[mm] = [c10[0],c10[1]]
-        rmse10[mm] = np.sqrt(((ncn10_m[mm][idx10]-cpc10_o[idx10])**2).mean())
-        
-if sum(idx100)/len(idx100)<0.1:   # two few observation available
-    # for obs
-    mean100[nmodels] = missing_value
-    std100[nmodels] = missing_value
-    p10_100[nmodels] = missing_value
-    p25_100[nmodels] = missing_value
-    p75_100[nmodels] = missing_value
-    p90_100[nmodels] = missing_value
-    # for models
-    for mm in range(nmodels):
-        mean100[mm] = np.nanmean(ncn100_m[mm][idx100])
-        std100[mm] = np.nanstd(ncn100_m[mm][idx100])
-        p10_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],10)
-        p25_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],25)
-        p75_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],75)
-        p90_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],90)
-        bias100[mm] =  missing_value
-        corr100[mm] = [missing_value, missing_value]
-        rmse100[mm] = missing_value
-else:
-    # for obs
-    mean100[nmodels] = np.nanmean(uhsas100_o[idx100])
-    std100[nmodels] = np.nanstd(uhsas100_o[idx100])
-    p10_100[nmodels] = np.nanpercentile(uhsas100_o[idx100],10)
-    p25_100[nmodels] = np.nanpercentile(uhsas100_o[idx100],25)
-    p75_100[nmodels] = np.nanpercentile(uhsas100_o[idx100],75)
-    p90_100[nmodels] = np.nanpercentile(uhsas100_o[idx100],90)
-    # for models
-    for mm in range(nmodels):
-        mean100[mm] = np.nanmean(ncn100_m[mm][idx100])
-        std100[mm] = np.nanstd(ncn100_m[mm][idx100])
-        p10_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],10)
-        p25_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],25)
-        p75_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],75)
-        p90_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],90)
-        bias100[mm] = mean100[mm] - mean100[nmodels]
-        c100 = scipy.stats.pearsonr(ncn100_m[mm][idx100],uhsas100_o[idx100])
-        corr100[mm] = [c100[0],c100[1]]
-        rmse100[mm] = np.sqrt(((ncn100_m[mm][idx100]-uhsas100_o[idx100])**2).mean())
-        
-if len(idx3)==0 or sum(idx3)/len(idx3)<0.1:   # two few observation available
-    # for obs
-    mean3[nmodels] = missing_value
-    std3[nmodels] = missing_value
-    p10_3[nmodels] = missing_value
-    p25_3[nmodels] = missing_value
-    p75_3[nmodels] = missing_value
-    p90_3[nmodels] = missing_value
-    # for models
-    for mm in range(nmodels):
-        mean3[mm] = np.nanmean(ncn3_m[mm][idx3])
-        std3[mm] = np.nanstd(ncn3_m[mm][idx3])
-        p10_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],10)
-        p25_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],25)
-        p75_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],75)
-        p90_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],90)
-        bias3[mm] =  missing_value
-        corr3[mm] = [missing_value, missing_value]
-        rmse3[mm] = missing_value
-else:
-    # for obs
-    mean3[nmodels] = np.nanmean(cpc3_o[idx3])
-    std3[nmodels] = np.nanstd(cpc3_o[idx3])
-    p10_3[nmodels] = np.nanpercentile(cpc3_o[idx3],10)
-    p25_3[nmodels] = np.nanpercentile(cpc3_o[idx3],25)
-    p75_3[nmodels] = np.nanpercentile(cpc3_o[idx3],75)
-    p90_3[nmodels] = np.nanpercentile(cpc3_o[idx3],90)
-    # for models
-    for mm in range(nmodels):
-        mean3[mm] = np.nanmean(ncn3_m[mm][idx3])
-        std3[mm] = np.nanstd(ncn3_m[mm][idx3])
-        p10_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],10)
-        p25_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],25)
-        p75_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],75)
-        p90_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],90)
-        bias3[mm] = mean3[mm] - mean3[nmodels]
-        c3 = scipy.stats.pearsonr(ncn3_m[mm][idx3],cpc3_o[idx3])
-        corr3[mm] = [c3[0],c3[1]]
-        rmse3[mm] = np.sqrt(((ncn3_m[mm][idx3]-cpc3_o[idx3])**2).mean())
-
-
-#%% write out files
-
-if campaign in ['HISCALE', 'ACEENA']:   
-    outfile = figpath_aircraft_statistics+'statistics_CN10nm_'+campaign+'_'+IOP+'.txt'
-elif campaign in ['CSET', 'SOCRATES']:
-    outfile = figpath_aircraft_statistics+'statistics_CN10nm_'+campaign+'.txt'
-
-print('write statistics to file '+outfile)
-
-with open(outfile, 'w') as f:
-    f.write('statistics of Aerosol Number Concentration comparing with CPC(>10nm). sample size '+format(sum(idx10))+'\n')
-    line1 = list(Model_List)
-    line1.insert(0,' --- ')
-    line1.append('OBS')
-    for ii in range(len(line1)):
-        f.write(format(line1[ii],'10s')+', ')
-    # write mean
-    f.write('\n mean,\t')
-    for ii in range(len(mean10)):
-        f.write(format(mean10[ii],'10.2f')+', ')
-    # write std
-    f.write('\n std. dev.,')
-    for ii in range(len(std10)):
-        f.write(format(std10[ii],'10.2f')+', ')
-    # write percentiles
-    f.write('\n 10% percentile: ')
-    for ii in range(len(p10_10)):
-        f.write(format(p10_10[ii],'10.2f')+', ')
-    f.write('\n 25% percentile: ')
-    for ii in range(len(p25_10)):
-        f.write(format(p25_10[ii],'10.2f')+', ')
-    f.write('\n 75% percentile: ')
-    for ii in range(len(p75_10)):
-        f.write(format(p75_10[ii],'10.2f')+', ')
-    f.write('\n 90% percentile: ')
-    for ii in range(len(p90_10)):
-        f.write(format(p90_10[ii],'10.2f')+', ')
-    # write bias
-    f.write('\n bias,\t')
-    for ii in range(len(bias10)):
-        f.write(format(bias10[ii],'10.2f')+', ')
-    # write rmse
-    f.write('\n RMSE,\t')
-    for ii in range(len(rmse10)):
-        f.write(format(rmse10[ii],'10.2f')+', ')
-    # write correlation
-    f.write('\n corrcoef,\t')
-    for ii in range(len(rmse10)):
-        f.write(format(corr10[ii][0],'10.4f')+', ')
-    # write p value of correlation
-    f.write('\n P_corr,\t')
-    for ii in range(len(rmse10)):
-        f.write(format(corr10[ii][1],'10.2f')+', ')
-        
-
-if campaign in ['HISCALE', 'ACEENA']:   
-    outfile = figpath_aircraft_statistics+'statistics_CN3nm_'+campaign+'_'+IOP+'.txt'
-    print('write statistics to file '+outfile)
-    with open(outfile, 'w') as f:
-        f.write('statistics of Aerosol Number Concentration comparing with CPC(>3nm). sample size '+format(sum(idx3))+'\n')
-        line1 = list(Model_List)
-        line1.insert(0,' --- ')
-        line1.append('OBS')
-        for ii in range(len(line1)):
-            f.write(format(line1[ii],'10s')+', ')
-        # write mean
-        f.write('\n mean,\t')
-        for ii in range(len(mean3)):
-            f.write(format(mean3[ii],'10.2f')+', ')
-        # write std
-        f.write('\n std. dev.,')
-        for ii in range(len(std3)):
-            f.write(format(std3[ii],'10.2f')+', ')
-        # write percentiles
-        f.write('\n 10% percentile: ')
-        for ii in range(len(p10_3)):
-            f.write(format(p10_3[ii],'10.2f')+', ')
-        f.write('\n 25% percentile: ')
-        for ii in range(len(p25_3)):
-            f.write(format(p25_3[ii],'10.2f')+', ')
-        f.write('\n 75% percentile: ')
-        for ii in range(len(p75_3)):
-            f.write(format(p75_3[ii],'10.2f')+', ')
-        f.write('\n 90% percentile: ')
-        for ii in range(len(p90_3)):
-            f.write(format(p90_3[ii],'10.2f')+', ')
-        # write bias
-        f.write('\n bias,\t')
-        for ii in range(len(bias3)):
-            f.write(format(bias3[ii],'10.2f')+', ')
-        # write rmse
-        f.write('\n RMSE,\t')
-        for ii in range(len(rmse3)):
-            f.write(format(rmse3[ii],'10.2f')+', ')
-        # write correlation
-        f.write('\n corrcoef,\t')
-        for ii in range(len(rmse3)):
-            f.write(format(corr3[ii][0],'10.4f')+', ')
-        # write p value of correlation
-        f.write('\n P_corr,\t')
-        for ii in range(len(rmse3)):
-            f.write(format(corr3[ii][1],'10.2f')+', ')
-        
-        
-if campaign in ['HISCALE', 'ACEENA']:   
-    outfile = figpath_aircraft_statistics+'statistics_CN100nm_'+campaign+'_'+IOP+'.txt'
-elif campaign in ['CSET', 'SOCRATES']:
-    outfile = figpath_aircraft_statistics+'statistics_CN100nm_'+campaign+'.txt'
-print('write statistics to file '+outfile)
-
-with open(outfile, 'w') as f:
-    if campaign in ['CSET', 'SOCRATES']:
-        f.write('statistics of Aerosol Number Concentration comparing with UHSAS(>100nm). sample size '+format(sum(idx100))+'\n')
-    elif campaign=='ACEENA':
-        f.write('statistics of Aerosol Number Concentration comparing with PCASP(>100nm). sample size '+format(sum(idx100))+'\n')
-    elif campaign=='HISCALE':
-        f.write('statistics of Aerosol Number Concentration comparing with PCASP(>120nm for IOP1, >90nm for IOP2). sample size '+format(sum(idx100))+'\n')
-    line1 = list(Model_List)
-    line1.insert(0,' --- ')
-    line1.append('OBS')
-    for ii in range(len(line1)):
-        f.write(format(line1[ii],'10s')+', ')
-    # write mean
-    f.write('\n mean,\t')
-    for ii in range(len(mean100)):
-        f.write(format(mean100[ii],'10.2f')+', ')
-    # write std
-    f.write('\n std. dev.,')
-    for ii in range(len(std100)):
-        f.write(format(std100[ii],'10.2f')+', ')
-    # write percentiles
-    f.write('\n 10% percentile: ')
-    for ii in range(len(p10_100)):
-        f.write(format(p10_100[ii],'10.2f')+', ')
-    f.write('\n 25% percentile: ')
-    for ii in range(len(p25_100)):
-        f.write(format(p25_100[ii],'10.2f')+', ')
-    f.write('\n 75% percentile: ')
-    for ii in range(len(p75_100)):
-        f.write(format(p75_100[ii],'10.2f')+', ')
-    f.write('\n 90% percentile: ')
-    for ii in range(len(p90_100)):
-        f.write(format(p90_100[ii],'10.2f')+', ')
-    # write bias
-    f.write('\n bias,\t')
-    for ii in range(len(bias100)):
-        f.write(format(bias100[ii],'10.2f')+', ')
-    # write rmse
-    f.write('\n RMSE,\t')
-    for ii in range(len(rmse100)):
-        f.write(format(rmse100[ii],'10.2f')+', ')
-    # write correlation
-    f.write('\n corrcoef,\t')
-    for ii in range(len(rmse100)):
-        f.write(format(corr100[ii][0],'10.4f')+', ')
-    # write p value of correlation
-    f.write('\n P_corr,\t')
-    for ii in range(len(rmse100)):
-        f.write(format(corr100[ii][1],'10.2f')+', ')
\ No newline at end of file
diff --git a/python/plotting/calc_statistic_sfc_CN.py b/python/plotting/calc_statistic_sfc_CN.py
deleted file mode 100644
index ea1930a..0000000
--- a/python/plotting/calc_statistic_sfc_CN.py
+++ /dev/null
@@ -1,548 +0,0 @@
-"""
-# calculate statistics (mean, bias, correlation, RMSE) of Aerosol number concentration
-# for surface measurements
-# compare models and CPC measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-# import matplotlib.pyplot as plt
-import os
-import glob
-import numpy as np
-import scipy.stats
-from time_format_change import yyyymmdd2cday, cday2mmdd, timeunit2cday
-from read_ARMdata import read_cpc,read_uhsas
-from read_netcdf import read_E3SM
-from quality_control import  qc_remove_neg, qc_mask_qcflag_cpc,qc_mask_qcflag
-from specific_data_treatment import avg_time_1d
-
-
-#%% settings
-
-from settings import campaign, cpcsfcpath, cpcusfcpath, uhsassfcpath, Model_List,  \
-    IOP, start_date, end_date, E3SM_sfc_path, figpath_sfc_statistics
-
-# set time range you want to average
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-
-# # in calendar day
-# if campaign=='ACEENA':
-#     if IOP=='IOP1':
-#         time_range = [172,212]
-#     elif IOP=='IOP2':
-#         time_range = [1,59]
-# elif campaign=='HiScale':
-#     if IOP=='IOP1':
-#         time_range = [115,157]
-#     elif IOP=='IOP2':
-#         time_range = [238,268]
-        
-        
-if not os.path.exists(figpath_sfc_statistics):
-    os.makedirs(figpath_sfc_statistics)
-   
-missing_value = -999999.
-missing_value = np.nan
-
-
-#%% read in obs data
-if campaign=='ACEENA':
-    # cpc
-    if IOP=='IOP1':
-        lst = glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.2017062*')+glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201707*')
-    elif IOP=='IOP2':
-        lst = glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201801*')+glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201802*')
-    lst.sort()
-    t_cpc=np.empty(0)
-    cpc=np.empty(0)
-    for filename in lst:
-        (time,data,qc,timeunit,cpcunit)=read_cpc(filename)
-        data = qc_mask_qcflag(data,qc)
-        timestr=timeunit.split(' ')
-        date=timestr[2]
-        cday=yyyymmdd2cday(date,'noleap')
-        # average in time for consistent comparison with model
-        time2=np.arange(0,86400,3600)
-        data2 = avg_time_1d(np.array(time),np.array(data),time2)
-        t_cpc=np.hstack((t_cpc, cday+time2/86400))
-        cpc=np.hstack((cpc, data2))
-    # fill missing days
-    t_cpc2=np.arange(cday1*24,cday2*24+0.01,1)/24.
-    cpc2=avg_time_1d(t_cpc,cpc,t_cpc2)
-    cpc=cpc2
-    t_cpc=t_cpc2
-    cpc = qc_remove_neg(cpc)
-    # no cpcu
-    t_cpcu = np.array(np.nan)
-    cpcu = np.array(np.nan)
-    
-    # uhsas
-    if IOP=='IOP1':
-        lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.2017062*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201707*')
-    elif IOP=='IOP2':
-        lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201801*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201802*')
-    lst.sort()
-    t_uhsas=np.empty(0)
-    uhsas=np.empty(0)
-    for filename in lst:
-        (time,dmin,dmax,data,timeunit,uhsasunit,long_name)=read_uhsas(filename)
-        # sum up uhsas data for size >100nm
-        data=np.ma.filled(data,np.nan)
-        idx100 = dmin>=100
-        data1=np.nansum(data[:,idx100],1)
-        # average in time for consistent comparison with model
-        time2=np.arange(0,86400,3600)
-        data2 = avg_time_1d(np.array(time),np.array(data1),time2)
-        t_uhsas=np.hstack((t_uhsas, timeunit2cday(timeunit)+time2/86400))
-        uhsas=np.hstack((uhsas, data2))
-    # fill missing days
-    t_uhsas2=np.arange(cday1*24,cday2*24+0.01,1)/24.
-    uhsas2=avg_time_1d(t_uhsas,uhsas,t_uhsas2)
-    uhsas=uhsas2
-    t_uhsas=t_uhsas2
-    
-    
-elif campaign=='HISCALE':  
-    # cpc
-    if IOP=='IOP1':
-        lst = glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201604*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201605*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201606*')
-    elif IOP=='IOP2':
-        lst = glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201608*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201609*')
-    lst.sort()
-    t_cpc=np.empty(0)
-    cpc=np.empty(0)
-    if len(lst)==0:
-        t_cpc = np.array(np.nan)
-        cpc = np.array(np.nan)
-    else:
-        for filename in lst:
-            (time,data,qc,timeunit,cpcunit)=read_cpc(filename)
-            data = qc_mask_qcflag_cpc(data,qc)
-            timestr=timeunit.split(' ')
-            date=timestr[2]
-            cday=yyyymmdd2cday(date,'noleap')
-            t_cpc= np.hstack((t_cpc,cday+time/86400))
-            cpc=np.hstack((cpc,data))
-        # average in time for consistent comparison with model
-        t_cpc2=np.arange(cday1*24,cday2*24+0.01,1)/24.
-        cpc2=avg_time_1d(t_cpc,cpc,t_cpc2)
-        cpc=cpc2
-        t_cpc=t_cpc2
-        cpc = qc_remove_neg(cpc)
-  
-    # cpcu
-    if IOP=='IOP1':
-        lst = glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201604*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201605*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201606*')
-    elif IOP=='IOP2':
-        lst = glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201608*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201609*')
-    lst.sort()
-    t_cpcu=np.empty(0)
-    cpcu=np.empty(0)
-    if len(lst)==0:
-        t_cpcu = np.array(np.nan)
-        cpcu = np.array(np.nan)
-    else:
-        for filename in lst:
-            (time,data,qc,timeunit,cpcuunit)=read_cpc(filename)
-            data = qc_mask_qcflag_cpc(data,qc)
-            timestr=timeunit.split(' ')
-            date=timestr[2]
-            cday=yyyymmdd2cday(date,'noleap')
-            # t_cpcu= np.hstack((t_cpcu,cday+time/86400))
-            # cpcu=np.hstack((cpcu,data))
-            # average in time for consistent comparison with model
-            time2=np.arange(0,86400,3600)
-            data2 = avg_time_1d(np.array(time),np.array(data),time2)
-            t_cpcu=np.hstack((t_cpcu, cday+time2/86400))
-            cpcu=np.hstack((cpcu, data2))
-        cpcu = qc_remove_neg(cpcu)
-        # # average in time for consistent comparison with model
-        # t_cpcu2=np.arange(t_cpcu[0]*24,t_cpcu[-1]*24,1)/24.
-        # cpcu2=avg_time_1d(t_cpcu,cpcu,t_cpcu2)
-        # cpcu=cpcu2
-        # t_cpcu=t_cpcu2
-        
-    # uhsas
-    if IOP=='IOP1':
-        lst = glob.glob(uhsassfcpath+'sgpaosuhsasS01.a1.201604*')+glob.glob(uhsassfcpath+'sgpaosuhsasS01.a1.201605*')+glob.glob(uhsassfcpath+'sgpaosuhsasS01.a1.201606*')
-    elif IOP=='IOP2':
-        lst = glob.glob(uhsassfcpath+'sgpaosuhsasS01.a1.201608*')+glob.glob(uhsassfcpath+'sgpaosuhsasS01.a1.201609*')
-    lst.sort()
-    t_uhsas=np.empty(0)
-    uhsas=np.empty(0)
-    for filename in lst:
-        (time,dmin,dmax,data,timeunit,uhsasunit,long_name)=read_uhsas(filename)
-        # sum up uhsas data for size >100nm
-        data=np.ma.filled(data,np.nan)
-        idx100 = dmin>=100
-        data1=np.nansum(data[:,idx100],1)
-        # average in time for consistent comparison with model
-        time2=np.arange(0,86400,3600)
-        data2 = avg_time_1d(np.array(time),np.array(data1),time2)
-        t_uhsas=np.hstack((t_uhsas, timeunit2cday(timeunit)+time2/86400))
-        uhsas=np.hstack((uhsas, data2))
-    # fill missing days
-    t_uhsas2=np.arange(cday1*24,cday2*24+0.01,1)/24.
-    uhsas2=avg_time_1d(t_uhsas,uhsas,t_uhsas2)
-    uhsas=uhsas2
-    t_uhsas=t_uhsas2
-    
-    
-#%% read in models
-ncn100_m = []
-ncn_m = []
-nucn_m = []
-nmodels = len(Model_List)
-for mm in range(nmodels):
-    tmp_ncn100=np.empty(0)
-    tmp_ncn=np.empty(0)
-    tmp_nucn=np.empty(0)
-    timem=np.empty(0)
-    for cday in range(cday1,cday2+1):
-        mmdd=cday2mmdd(cday)
-        date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        
-        filename_input = E3SM_sfc_path+'SFC_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        (time,ncn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCN')
-        (time,nucn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NUCN')
-        (time,ncnall,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCNall')
-        
-        timem = np.hstack((timem,time))
-        tmp_ncn = np.hstack((tmp_ncn,ncn*1e-6))
-        tmp_nucn = np.hstack((tmp_nucn,nucn*1e-6))
-        tmp_ncn100 = np.hstack((tmp_ncn100, np.sum(ncnall[100:,:],0)*1e-6))  
-    
-    ncn100_m.append(tmp_ncn100)
-    ncn_m.append(tmp_ncn)
-    nucn_m.append(tmp_nucn)
-    
-#%% calculate statistics
-
-# only choose the prescribed time range
-idx = np.logical_and(t_cpc>=cday1, t_cpc<=cday2)
-cpc=cpc[idx]
-t_cpc=t_cpc[idx]
-idx = np.logical_and(t_cpcu>=cday1, t_cpcu<=cday2)
-cpcu=cpcu[idx]
-t_cpcu=t_cpcu[idx]
-idx = np.logical_and(t_uhsas>=cday1, t_uhsas<=cday2)
-uhsas=uhsas[idx]
-t_uhsas=t_uhsas[idx]
-idx = np.logical_and(timem>=cday1, timem<=cday2)
-for mm in range(nmodels):
-    ncn100_m[mm]=ncn100_m[mm][idx]
-    ncn_m[mm]=ncn_m[mm][idx]
-    nucn_m[mm]=nucn_m[mm][idx]
-timem=timem[idx]
-
-
-
-# select only valid data in obs and the corresponding data in models
-idx100 = ~np.isnan(uhsas)
-idx10 = ~np.isnan(cpc)
-idx3 = ~np.isnan(cpcu)
-
-mean100 = [None]*(nmodels+1)
-mean10 = [None]*(nmodels+1)
-mean3 = [None]*(nmodels+1)
-std100 = [None]*(nmodels+1)
-std10 = [None]*(nmodels+1)
-std3 = [None]*(nmodels+1)
-bias100 = [None]*(nmodels)
-bias10 = [None]*(nmodels)
-bias3 = [None]*(nmodels)
-corr100 = [None]*(nmodels)
-corr10 = [None]*(nmodels)
-corr3 = [None]*(nmodels)
-rmse100 = [None]*(nmodels)
-rmse10 = [None]*(nmodels)
-rmse3 = [None]*(nmodels)
-p10_100 = [None]*(nmodels+1)
-p10_10 = [None]*(nmodels+1)
-p10_3 = [None]*(nmodels+1)
-p25_100 = [None]*(nmodels+1)
-p25_10 = [None]*(nmodels+1)
-p25_3 = [None]*(nmodels+1)
-p75_100 = [None]*(nmodels+1)
-p75_10 = [None]*(nmodels+1)
-p75_3 = [None]*(nmodels+1)
-p90_100 = [None]*(nmodels+1)
-p90_10 = [None]*(nmodels+1)
-p90_3 = [None]*(nmodels+1)
-    
-if len(idx100)==0 or sum(idx100)/len(idx100)<0.1:   # two few observation available
-    # for obs
-    mean100[nmodels] = missing_value
-    std100[nmodels] = missing_value
-    p10_100[nmodels] = missing_value
-    p25_100[nmodels] = missing_value
-    p75_100[nmodels] = missing_value
-    p90_100[nmodels] = missing_value
-    # for models
-    for mm in range(nmodels):
-        mean100[mm] = np.nanmean(ncn100_m[mm][idx100])
-        std100[mm] = np.nanstd(ncn100_m[mm][idx100])
-        p10_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],10)
-        p25_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],25)
-        p75_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],75)
-        p90_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],90)
-        bias100[mm] =  missing_value
-        corr100[mm] = [missing_value, missing_value]
-        rmse100[mm] = missing_value
-else:
-    # for obs
-    mean100[nmodels] = np.nanmean(uhsas[idx100])
-    std100[nmodels] = np.nanstd(uhsas[idx100])
-    p10_100[nmodels] = np.nanpercentile(uhsas[idx100],10)
-    p25_100[nmodels] = np.nanpercentile(uhsas[idx100],25)
-    p75_100[nmodels] = np.nanpercentile(uhsas[idx100],75)
-    p90_100[nmodels] = np.nanpercentile(uhsas[idx100],90)
-    # for models
-    for mm in range(nmodels):
-        mean100[mm] = np.nanmean(ncn100_m[mm][idx100])
-        std100[mm] = np.nanstd(ncn100_m[mm][idx100])
-        p10_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],10)
-        p25_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],25)
-        p75_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],75)
-        p90_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],90)
-        bias100[mm] = mean100[mm] - mean100[nmodels]
-        c100 = scipy.stats.pearsonr(ncn100_m[mm][idx100],uhsas[idx100])
-        corr100[mm] = [c100[0],c100[1]]
-        rmse100[mm] = np.sqrt(((ncn100_m[mm][idx100]-uhsas[idx100])**2).mean())
-
-if len(idx10)==0 or sum(idx10)/len(idx10)<0.1:   # two few observation available
-    # for obs
-    mean10[nmodels] = missing_value
-    std10[nmodels] = missing_value
-    p10_10[nmodels] = missing_value
-    p25_10[nmodels] = missing_value
-    p75_10[nmodels] = missing_value
-    p90_10[nmodels] = missing_value
-    # for models
-    for mm in range(nmodels):
-        mean10[mm] = np.nanmean(ncn_m[mm][idx10])
-        std10[mm] = np.nanstd(ncn_m[mm][idx10])
-        p10_10[mm] = np.nanpercentile(ncn_m[mm][idx10],10)
-        p25_10[mm] = np.nanpercentile(ncn_m[mm][idx10],25)
-        p75_10[mm] = np.nanpercentile(ncn_m[mm][idx10],75)
-        p90_10[mm] = np.nanpercentile(ncn_m[mm][idx10],90)
-        bias10[mm] =  missing_value
-        corr10[mm] = [missing_value, missing_value]
-        rmse10[mm] = missing_value
-else:
-    # for obs
-    mean10[nmodels] = np.nanmean(cpc[idx10])
-    std10[nmodels] = np.nanstd(cpc[idx10])
-    p10_10[nmodels] = np.nanpercentile(cpc[idx10],10)
-    p25_10[nmodels] = np.nanpercentile(cpc[idx10],25)
-    p75_10[nmodels] = np.nanpercentile(cpc[idx10],75)
-    p90_10[nmodels] = np.nanpercentile(cpc[idx10],90)
-    # for models
-    for mm in range(nmodels):
-        mean10[mm] = np.nanmean(ncn_m[mm][idx10])
-        std10[mm] = np.nanstd(ncn_m[mm][idx10])
-        p10_10[mm] = np.nanpercentile(ncn_m[mm][idx10],10)
-        p25_10[mm] = np.nanpercentile(ncn_m[mm][idx10],25)
-        p75_10[mm] = np.nanpercentile(ncn_m[mm][idx10],75)
-        p90_10[mm] = np.nanpercentile(ncn_m[mm][idx10],90)
-        bias10[mm] = mean10[mm] - mean10[nmodels]
-        c10 = scipy.stats.pearsonr(ncn_m[mm][idx10],cpc[idx10])
-        corr10[mm] = [c10[0],c10[1]]
-        rmse10[mm] = np.sqrt(((ncn_m[mm][idx10]-cpc[idx10])**2).mean())
-
-if len(idx3)==0 or sum(idx3)/len(idx3)<0.1:   # two few observation available
-    # for obs
-    mean3[nmodels] = missing_value
-    std3[nmodels] = missing_value
-    p10_3[nmodels] = missing_value
-    p25_3[nmodels] = missing_value
-    p75_3[nmodels] = missing_value
-    p90_3[nmodels] = missing_value
-    # for models
-    for mm in range(nmodels):
-        mean3[mm] = np.nanmean(nucn_m[mm][idx3])
-        std3[mm] = np.nanstd(nucn_m[mm][idx3])
-        p10_3[mm] = np.nanpercentile(nucn_m[mm][idx3],10)
-        p25_3[mm] = np.nanpercentile(nucn_m[mm][idx3],25)
-        p75_3[mm] = np.nanpercentile(nucn_m[mm][idx3],75)
-        p90_3[mm] = np.nanpercentile(nucn_m[mm][idx3],90)
-        bias3[mm] =  missing_value
-        corr3[mm] = [missing_value, missing_value]
-        rmse3[mm] = missing_value
-else:
-    # for obs
-    mean3[nmodels] = np.nanmean(cpcu[idx3])
-    std3[nmodels] = np.nanstd(cpcu[idx3])
-    p10_3[nmodels] = np.nanpercentile(cpcu[idx3],10)
-    p25_3[nmodels] = np.nanpercentile(cpcu[idx3],25)
-    p75_3[nmodels] = np.nanpercentile(cpcu[idx3],75)
-    p90_3[nmodels] = np.nanpercentile(cpcu[idx3],90)
-    # for models
-    for mm in range(nmodels):
-        mean3[mm] = np.nanmean(nucn_m[mm][idx3])
-        std3[mm] = np.nanstd(nucn_m[mm][idx3])
-        p10_3[mm] = np.nanpercentile(nucn_m[mm][idx3],10)
-        p25_3[mm] = np.nanpercentile(nucn_m[mm][idx3],25)
-        p75_3[mm] = np.nanpercentile(nucn_m[mm][idx3],75)
-        p90_3[mm] = np.nanpercentile(nucn_m[mm][idx3],90)
-        bias3[mm] = mean3[mm] - mean3[nmodels]
-        c3 = scipy.stats.pearsonr(nucn_m[mm][idx3],cpcu[idx3])
-        corr3[mm] = [c3[0],c3[1]]
-        rmse3[mm] = np.sqrt(((nucn_m[mm][idx3]-cpcu[idx3])**2).mean())
-
-
-#%% write out files
-
-outfile = figpath_sfc_statistics+'statistics_CN10nm_'+campaign+'_'+IOP+'.txt'
-print('write statistics to file '+outfile)
-
-with open(outfile, 'w') as f:
-    f.write('statistics of Aerosol Number Concentration comparing with CPC(>10nm). sample size '+format(sum(idx10))+'\n')
-    line1 = list(Model_List)
-    line1.insert(0,' --- ')
-    line1.append('OBS')
-    for ii in range(len(line1)):
-        f.write(format(line1[ii],'10s')+', ')
-    # write mean
-    f.write('\n mean,\t')
-    for ii in range(len(mean10)):
-        f.write(format(mean10[ii],'10.2f')+', ')
-    # write std
-    f.write('\n std. dev.,')
-    for ii in range(len(std10)):
-        f.write(format(std10[ii],'10.2f')+', ')
-    # write percentiles
-    f.write('\n 10% percentile: ')
-    for ii in range(len(p10_10)):
-        f.write(format(p10_10[ii],'10.2f')+', ')
-    f.write('\n 25% percentile: ')
-    for ii in range(len(p25_10)):
-        f.write(format(p25_10[ii],'10.2f')+', ')
-    f.write('\n 75% percentile: ')
-    for ii in range(len(p75_10)):
-        f.write(format(p75_10[ii],'10.2f')+', ')
-    f.write('\n 90% percentile: ')
-    for ii in range(len(p90_10)):
-        f.write(format(p90_10[ii],'10.2f')+', ')
-    # write bias
-    f.write('\n bias,\t')
-    for ii in range(len(bias10)):
-        f.write(format(bias10[ii],'10.2f')+', ')
-    # write rmse
-    f.write('\n RMSE,\t')
-    for ii in range(len(rmse10)):
-        f.write(format(rmse10[ii],'10.2f')+', ')
-    # write correlation
-    f.write('\n corrcoef,\t')
-    for ii in range(len(rmse10)):
-        f.write(format(corr10[ii][0],'10.4f')+', ')
-    # write p value of correlation
-    f.write('\n P_corr,\t')
-    for ii in range(len(rmse10)):
-        f.write(format(corr10[ii][1],'10.2f')+', ')
-        
-        
-outfile = figpath_sfc_statistics+'statistics_CN3nm_'+campaign+'_'+IOP+'.txt'
-print('write statistics to file '+outfile)
-
-with open(outfile, 'w') as f:
-    f.write('statistics of Aerosol Number Concentration comparing with CPC(>3nm). sample size '+format(sum(idx3))+'\n')
-    line1 = list(Model_List)
-    line1.insert(0,' --- ')
-    line1.append('OBS')
-    for ii in range(len(line1)):
-        f.write(format(line1[ii],'10s')+', ')
-    # write mean
-    f.write('\n mean,\t')
-    for ii in range(len(mean3)):
-        f.write(format(mean3[ii],'10.2f')+', ')
-    # write std
-    f.write('\n std. dev.,')
-    for ii in range(len(std3)):
-        f.write(format(std3[ii],'10.2f')+', ')
-    # write percentiles
-    f.write('\n 10% percentile: ')
-    for ii in range(len(p10_3)):
-        f.write(format(p10_3[ii],'10.2f')+', ')
-    f.write('\n 25% percentile: ')
-    for ii in range(len(p25_3)):
-        f.write(format(p25_3[ii],'10.2f')+', ')
-    f.write('\n 75% percentile: ')
-    for ii in range(len(p75_3)):
-        f.write(format(p75_3[ii],'10.2f')+', ')
-    f.write('\n 90% percentile: ')
-    for ii in range(len(p90_3)):
-        f.write(format(p90_3[ii],'10.2f')+', ')
-    # write bias
-    f.write('\n bias,\t')
-    for ii in range(len(bias3)):
-        f.write(format(bias3[ii],'10.2f')+', ')
-    # write rmse
-    f.write('\n RMSE,\t')
-    for ii in range(len(rmse3)):
-        f.write(format(rmse3[ii],'10.2f')+', ')
-    # write correlation
-    f.write('\n corrcoef,\t')
-    for ii in range(len(rmse3)):
-        f.write(format(corr3[ii][0],'10.4f')+', ')
-    # write p value of correlation
-    f.write('\n P_corr,\t')
-    for ii in range(len(rmse3)):
-        f.write(format(corr3[ii][1],'10.2f')+', ')
-    
-
-outfile = figpath_sfc_statistics+'statistics_CN100nm_'+campaign+'_'+IOP+'.txt'
-print('write statistics to file '+outfile)
-
-with open(outfile, 'w') as f:
-    f.write('statistics of Aerosol Number Concentration comparing with UHSAS (>100nm). sample size '+format(sum(idx100))+'\n')
-    line1 = list(Model_List)
-    line1.insert(0,' --- ')
-    line1.append('OBS')
-    for ii in range(len(line1)):
-        f.write(format(line1[ii],'10s')+', ')
-    # write mean
-    f.write('\n mean,\t')
-    for ii in range(len(mean100)):
-        f.write(format(mean100[ii],'10.2f')+', ')
-    # write std
-    f.write('\n std. dev.,')
-    for ii in range(len(std100)):
-        f.write(format(std100[ii],'10.2f')+', ')
-    # write percentiles
-    f.write('\n 10% percentile: ')
-    for ii in range(len(p10_100)):
-        f.write(format(p10_100[ii],'10.2f')+', ')
-    f.write('\n 25% percentile: ')
-    for ii in range(len(p25_100)):
-        f.write(format(p25_100[ii],'10.2f')+', ')
-    f.write('\n 75% percentile: ')
-    for ii in range(len(p75_100)):
-        f.write(format(p75_100[ii],'10.2f')+', ')
-    f.write('\n 90% percentile: ')
-    for ii in range(len(p90_100)):
-        f.write(format(p90_100[ii],'10.2f')+', ')
-    # write bias
-    f.write('\n bias,\t')
-    for ii in range(len(bias100)):
-        f.write(format(bias100[ii],'10.2f')+', ')
-    # write rmse
-    f.write('\n RMSE,\t')
-    for ii in range(len(rmse100)):
-        f.write(format(rmse100[ii],'10.2f')+', ')
-    # write correlation
-    f.write('\n corrcoef,\t')
-    for ii in range(len(rmse100)):
-        f.write(format(corr100[ii][0],'10.4f')+', ')
-    # write p value of correlation
-    f.write('\n P_corr,\t')
-    for ii in range(len(rmse100)):
-        f.write(format(corr100[ii][1],'10.2f')+', ')
-    
\ No newline at end of file
diff --git a/python/plotting/calc_statistic_ship_CN.py b/python/plotting/calc_statistic_ship_CN.py
deleted file mode 100644
index 3b64284..0000000
--- a/python/plotting/calc_statistic_ship_CN.py
+++ /dev/null
@@ -1,391 +0,0 @@
-"""
-# calculate statistics (mean, bias, correlation, RMSE) of Aerosol number concentration
-# for ship measurements
-# compare models and CPC/UHSAS measurements
-"""
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import numpy as np
-import scipy.stats
-from read_ARMdata import read_cpc, read_uhsas
-from read_netcdf import read_E3SM
-from time_format_change import  cday2mmdd
-from specific_data_treatment import mask_model_ps, avg_time_1d
-from quality_control import qc_mask_qcflag, qc_remove_neg
-
-
-#%% settings
-
-from settings import campaign, Model_List, shipcpcpath, shipmetpath, shipuhsaspath, E3SM_ship_path, figpath_ship_statistics
-
-if not os.path.exists(figpath_ship_statistics):
-    os.makedirs(figpath_ship_statistics)
-missing_value = np.nan
-
-lst = glob.glob(E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[0]+'_shipleg*.nc')
-lst.sort()
-
-nmodels = len(Model_List)
-cpcall = np.empty(0)
-uhsasall = np.empty(0)
-ncn10all = []
-ncn100all = []
-for mm in range(nmodels):
-    ncn10all.append(np.empty(0))
-    ncn100all.append(np.empty(0))
-    
-for ll in range(len(lst)):
-    
-    if campaign=='MAGIC':
-        legnum=lst[ll][-5:-3]
-    elif campaign=='MARCUS':
-        legnum=lst[ll][-4]
-    else: 
-        raise ValueError('please check campaign name: '+campaign)
-    print('legnum '+format(legnum))
-    
-    #%% read in model
-    datam = list()
-    databins = list()
-    for mm in range(nmodels):
-        filenamem = E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
-    
-        (timem,NCNall,timeunitm,datamunit,datamlongname)=read_E3SM(filenamem,'NCNall')
-        (timem,data,timeunitm,datamunit,datamlongname)=read_E3SM(filenamem,'NCN')
-    
-        datam.append(data*1e-6)    # change unit from 1/m3 to 1/cm3
-        databins.append(NCNall*1e-6)    # change unit from 1/m3 to 1/cm3
-        
-        # mask data where model grid is not at ocean surface (Ps is too different than obs)
-        filenamem = E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
-        (timem,psm,timeunitx,psmunit,psmlongname)=read_E3SM(filenamem,'PS')
-        datamask = mask_model_ps(timem,0.01*psm,legnum,campaign,shipmetpath)
-        
-        datam[mm][datamask]=np.nan
-        
-    year0 = str(int(timeunitm.split()[2][0:4])+1)
-    
-    #%% read in observations
-    # find the days related to the ship leg
-    day = [int(a) for a in timem]
-    day = list(set(day))
-    day.sort()
-
-    # CPC    
-    t_cpc=np.empty(0)
-    cpc=np.empty(0)
-    for dd in day:
-        
-        if campaign=='MAGIC':
-            if int(legnum)<=9:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*')
-                else:
-                    filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*')
-            else:
-                filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*')
-            if len(filenameo)==0:
-                continue  # some days may be missing
-        elif campaign=='MARCUS':
-            if int(legnum)<=2:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
-                else:
-                    filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
-            else:
-                filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
-            if len(filenameo)==0:
-                continue  # some days may be missing
-                
-                
-        (time,obs,qc,timeunit,dataunit)=read_cpc(filenameo[0])
-        obs = qc_mask_qcflag(obs,qc)
-        t_cpc=np.hstack((t_cpc, dd+time/86400))
-        cpc=np.hstack((cpc, obs))
-        
-    # if time expands two years, add 365 days to the second year
-    if t_cpc[0]>t_cpc[-1]:
-        t_cpc[t_cpc<=t_cpc[-1]]=t_cpc[t_cpc<=t_cpc[-1]]+365
-
-    # UHSAS
-    t_uh=np.empty(0)
-    uhsas=np.empty(0)
-    for dd in day:
-        
-        if campaign=='MAGIC':
-            if int(legnum)<=9:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
-                else:
-                    filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*.cdf')
-            else:
-                filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
-        elif campaign=='MARCUS':
-            if int(legnum)<=2:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
-                else:
-                    filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
-            else:
-                filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
-        
-        if len(filenameo)==0:
-            continue  # some days may be missing
-        if len(filenameo)>1:
-            raise ValueError('find too many files: '+filenameo)
-            
-        (time,dmin,dmax,obs,timeunit,uhunit,uhlongname)=read_uhsas(filenameo[0])
-        obs=np.ma.filled(obs)
-        obs=qc_remove_neg(obs)
-        uhsas=np.hstack((uhsas, np.nansum(obs,1)))
-        t_uh = np.hstack((t_uh,time/86400+dd))
-        
-    # if no obs available, fill one data with NaN
-    if len(t_uh)==0:
-        t_uh=[timem[0],timem[1]]
-        uhsas=np.full((2),np.nan)
-        
-    # if time expands two years, add 365 days to the second year
-    if t_uh[0]>t_uh[-1]:
-        t_uh[t_uh<=t_uh[-1]]=t_uh[t_uh<=t_uh[-1]]+365
-    
-    
-    #%% Calculate model aerosol number concentration for UHSAS size range
-    b1 = int(dmin[0])
-    b2 = int(dmax[-1])
-    datam2=list()
-    for mm in range(nmodels):
-        datam2.append(np.nansum(databins[mm][b1-1:b2,:],0))
-        datam2[mm][datamask]=np.nan
-    
-    #%% average into 1hr resolution
-    time0 = np.arange(timem[0],timem[-1],1./24)
-    cpc = avg_time_1d(t_cpc,cpc,time0)
-    uhsas = avg_time_1d(t_uh,uhsas,time0)
-    for mm in range(nmodels):
-        datam[mm] = avg_time_1d(timem,datam[mm],time0)
-        datam2[mm] = avg_time_1d(timem,datam2[mm],time0)
-        
-    #%% 
-    cpcall = np.hstack((cpcall,cpc))
-    uhsasall = np.hstack((uhsasall,uhsas))
-    for mm in range(nmodels):
-        ncn10all[mm] = np.hstack((ncn10all[mm],datam[mm]))
-        ncn100all[mm] = np.hstack((ncn100all[mm],datam2[mm]))
-        
-         
-#%% calculate statistics
-
-if ncn10all[0].shape != cpcall.shape or ncn100all[0].shape != uhsasall.shape:
-    raise ValueError('observation and model dimensions are inconsitent ')
-
-# select only valid data in obs and the corresponding data in models (all data are not NAN)
-idx10 = sum(np.vstack((~np.isnan(ncn10all),~np.isnan(cpcall))))==nmodels+1
-idx100 = sum(np.vstack((~np.isnan(ncn100all),~np.isnan(uhsasall))))==nmodels+1
-
-mean10 = [None]*(nmodels+1)
-mean100 = [None]*(nmodels+1)
-std10 = [None]*(nmodels+1)
-std100 = [None]*(nmodels+1)
-bias10 = [None]*(nmodels)
-bias100 = [None]*(nmodels)
-corr10 = [None]*(nmodels)
-corr100 = [None]*(nmodels)
-rmse10 = [None]*(nmodels)
-rmse100 = [None]*(nmodels)
-p10_100 = [None]*(nmodels+1)
-p10_10 = [None]*(nmodels+1)
-p25_100 = [None]*(nmodels+1)
-p25_10 = [None]*(nmodels+1)
-p75_100 = [None]*(nmodels+1)
-p75_10 = [None]*(nmodels+1)
-p90_100 = [None]*(nmodels+1)
-p90_10 = [None]*(nmodels+1)
-    
-
-if len(idx10)==0 or sum(idx10)/len(idx10)<0.1:   # two few observation available
-    # for obs
-    mean10[nmodels] = missing_value
-    std10[nmodels] = missing_value
-    p10_10[nmodels] = missing_value
-    p25_10[nmodels] = missing_value
-    p75_10[nmodels] = missing_value
-    p90_10[nmodels] = missing_value
-    # for models
-    for mm in range(nmodels):
-        mean10[mm] = np.nanmean(ncn10all[mm][idx10])
-        std10[mm] = np.nanstd(ncn10all[mm][idx10])
-        p10_10[mm] = np.nanpercentile(ncn10all[mm][idx10],10)
-        p25_10[mm] = np.nanpercentile(ncn10all[mm][idx10],25)
-        p75_10[mm] = np.nanpercentile(ncn10all[mm][idx10],75)
-        p90_10[mm] = np.nanpercentile(ncn10all[mm][idx10],90)
-        bias10[mm] =  missing_value
-        corr10[mm] = [missing_value, missing_value]
-        rmse10[mm] = missing_value
-else:
-    # for obs
-    mean10[nmodels] = np.nanmean(cpcall[idx10])
-    std10[nmodels] = np.nanstd(cpcall[idx10])
-    p10_10[nmodels] = np.nanpercentile(cpcall[idx10],10)
-    p25_10[nmodels] = np.nanpercentile(cpcall[idx10],25)
-    p75_10[nmodels] = np.nanpercentile(cpcall[idx10],75)
-    p90_10[nmodels] = np.nanpercentile(cpcall[idx10],90)
-    # for models
-    for mm in range(nmodels):
-        mean10[mm] = np.nanmean(ncn10all[mm][idx10])
-        std10[mm] = np.nanstd(ncn10all[mm][idx10])
-        p10_10[mm] = np.nanpercentile(ncn10all[mm][idx10],10)
-        p25_10[mm] = np.nanpercentile(ncn10all[mm][idx10],25)
-        p75_10[mm] = np.nanpercentile(ncn10all[mm][idx10],75)
-        p90_10[mm] = np.nanpercentile(ncn10all[mm][idx10],90)
-        bias10[mm] = mean10[mm] - mean10[nmodels]
-        c10 = scipy.stats.pearsonr(ncn10all[mm][idx10],cpcall[idx10])
-        corr10[mm] = [c10[0],c10[1]]
-        rmse10[mm] = np.sqrt(((ncn10all[mm][idx10]-cpcall[idx10])**2).mean())
-
-if len(idx100)==0 or sum(idx100)/len(idx100)<0.1:   # two few observation available
-    # for obs
-    mean100[nmodels] = missing_value
-    std100[nmodels] = missing_value
-    p10_100[nmodels] = missing_value
-    p25_100[nmodels] = missing_value
-    p75_100[nmodels] = missing_value
-    p90_100[nmodels] = missing_value
-    # for models
-    for mm in range(nmodels):
-        mean100[mm] = np.nanmean(ncn100all[mm][idx100])
-        std100[mm] = np.nanstd(ncn100all[mm][idx100])
-        p10_100[mm] = np.nanpercentile(ncn100all[mm][idx100],10)
-        p25_100[mm] = np.nanpercentile(ncn100all[mm][idx100],25)
-        p75_100[mm] = np.nanpercentile(ncn100all[mm][idx100],75)
-        p90_100[mm] = np.nanpercentile(ncn100all[mm][idx100],90)
-        bias100[mm] =  missing_value
-        corr100[mm] = [missing_value, missing_value]
-        rmse100[mm] = missing_value
-else:
-    # for obs
-    mean100[nmodels] = np.nanmean(uhsasall[idx100])
-    std100[nmodels] = np.nanstd(uhsasall[idx100])
-    p10_100[nmodels] = np.nanpercentile(uhsasall[idx100],10)
-    p25_100[nmodels] = np.nanpercentile(uhsasall[idx100],25)
-    p75_100[nmodels] = np.nanpercentile(uhsasall[idx100],75)
-    p90_100[nmodels] = np.nanpercentile(uhsasall[idx100],90)
-    # for models
-    for mm in range(nmodels):
-        mean100[mm] = np.nanmean(ncn100all[mm][idx100])
-        std100[mm] = np.nanstd(ncn100all[mm][idx100])
-        p10_100[mm] = np.nanpercentile(ncn100all[mm][idx100],10)
-        p25_100[mm] = np.nanpercentile(ncn100all[mm][idx100],25)
-        p75_100[mm] = np.nanpercentile(ncn100all[mm][idx100],75)
-        p90_100[mm] = np.nanpercentile(ncn100all[mm][idx100],90)
-        bias100[mm] = mean100[mm] - mean100[nmodels]
-        c100 = scipy.stats.pearsonr(ncn100all[mm][idx100],uhsasall[idx100])
-        corr100[mm] = [c100[0],c100[1]]
-        rmse100[mm] = np.sqrt(((ncn100all[mm][idx100]-uhsasall[idx100])**2).mean())
-
-
-#%% write out files
-
-outfile = figpath_ship_statistics+'statistics_CN10nm_'+campaign+'.txt'
-print('write statistics to file '+outfile)
-
-with open(outfile, 'w') as f:
-    f.write('statistics of Aerosol Number Concentration comparing with CPC(>10nm). sample size '+format(sum(idx10))+'\n')
-    line1 = list(Model_List)
-    line1.insert(0,' --- ')
-    line1.append('OBS')
-    for ii in range(len(line1)):
-        f.write(format(line1[ii],'10s')+', ')
-    # write mean
-    f.write('\n mean,\t')
-    for ii in range(len(mean10)):
-        f.write(format(mean10[ii],'10.2f')+', ')
-    # write std
-    f.write('\n std. dev.,')
-    for ii in range(len(std10)):
-        f.write(format(std10[ii],'10.2f')+', ')
-    # write percentiles
-    f.write('\n 10% percentile: ')
-    for ii in range(len(p10_10)):
-        f.write(format(p10_10[ii],'10.2f')+', ')
-    f.write('\n 25% percentile: ')
-    for ii in range(len(p25_10)):
-        f.write(format(p25_10[ii],'10.2f')+', ')
-    f.write('\n 75% percentile: ')
-    for ii in range(len(p75_10)):
-        f.write(format(p75_10[ii],'10.2f')+', ')
-    f.write('\n 90% percentile: ')
-    for ii in range(len(p90_10)):
-        f.write(format(p90_10[ii],'10.2f')+', ')
-    # write bias
-    f.write('\n bias,\t')
-    for ii in range(len(bias10)):
-        f.write(format(bias10[ii],'10.2f')+', ')
-    # write rmse
-    f.write('\n RMSE,\t')
-    for ii in range(len(rmse10)):
-        f.write(format(rmse10[ii],'10.2f')+', ')
-    # write correlation
-    f.write('\n corrcoef,\t')
-    for ii in range(len(rmse10)):
-        f.write(format(corr10[ii][0],'10.4f')+', ')
-    # write p value of correlation
-    f.write('\n P_corr,\t')
-    for ii in range(len(rmse10)):
-        f.write(format(corr10[ii][1],'10.2f')+', ')
-        
-        
-outfile = figpath_ship_statistics+'statistics_CN100nm_'+campaign+'.txt'
-print('write statistics to file '+outfile)
-
-with open(outfile, 'w') as f:
-    f.write('statistics of Aerosol Number Concentration comparing with UHSAS100(>100nm). sample size '+format(sum(idx100))+'\n')
-    line1 = list(Model_List)
-    line1.insert(0,' --- ')
-    line1.append('OBS')
-    for ii in range(len(line1)):
-        f.write(format(line1[ii],'10s')+', ')
-    # write mean
-    f.write('\n mean,\t')
-    for ii in range(len(mean100)):
-        f.write(format(mean100[ii],'10.2f')+', ')
-    # write std
-    f.write('\n std. dev.,')
-    for ii in range(len(std100)):
-        f.write(format(std100[ii],'10.2f')+', ')
-    # write percentiles
-    f.write('\n 10% percentile: ')
-    for ii in range(len(p10_100)):
-        f.write(format(p10_100[ii],'10.2f')+', ')
-    f.write('\n 25% percentile: ')
-    for ii in range(len(p25_100)):
-        f.write(format(p25_100[ii],'10.2f')+', ')
-    f.write('\n 75% percentile: ')
-    for ii in range(len(p75_100)):
-        f.write(format(p75_100[ii],'10.2f')+', ')
-    f.write('\n 90% percentile: ')
-    for ii in range(len(p90_100)):
-        f.write(format(p90_100[ii],'10.2f')+', ')
-    # write bias
-    f.write('\n bias,\t')
-    for ii in range(len(bias100)):
-        f.write(format(bias100[ii],'10.2f')+', ')
-    # write rmse
-    f.write('\n RMSE,\t')
-    for ii in range(len(rmse100)):
-        f.write(format(rmse100[ii],'10.2f')+', ')
-    # write correlation
-    f.write('\n corrcoef,\t')
-    for ii in range(len(rmse100)):
-        f.write(format(corr100[ii][0],'10.4f')+', ')
-    # write p value of correlation
-    f.write('\n P_corr,\t')
-    for ii in range(len(rmse100)):
-        f.write(format(corr100[ii][1],'10.2f')+', ')
-    
-      
-        
-        
diff --git a/python/plotting/contour_flight_timeseries_AerosolSize.py b/python/plotting/contour_flight_timeseries_AerosolSize.py
deleted file mode 100644
index c289691..0000000
--- a/python/plotting/contour_flight_timeseries_AerosolSize.py
+++ /dev/null
@@ -1,188 +0,0 @@
-"""
-# plot aircraft track data
-# timeseries of aerosol size distribution
-# compare models and aircraft measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_aircraft import read_RF_NCAR
-from specific_data_treatment import lwc2cflag, avg_time_2d
-# from time_format_change import yyyymmdd2cday, hhmmss2sec
-from read_netcdf import read_merged_size,read_extractflight
-
-
-#%% settings
-
-from settings import campaign,  Model_List,  E3SM_aircraft_path, figpath_aircraft_timeseries
-
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP, merged_size_path
-elif campaign in ['CSET', 'SOCRATES']:
-    from settings import RFpath
-else:
-    raise ValueError('please check campaign name: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_timeseries):
-    os.makedirs(figpath_aircraft_timeseries)
-    
-#%% find files for flight information
-lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
-lst.sort()
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-# choose files for specific IOP
-if campaign=='HISCALE':
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-elif campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-        
-alldates = [x.split('_')[-1].split('.')[0] for x in lst]
-    
-# dN/dlnDp for model
-dlnDp_m = np.empty((3000))
-for bb in range(3000):
-    dlnDp_m[bb]=np.log((bb+2)/(bb+1))
-
-for date in alldates:
-    
-    #%% read in Models
-    nmodels=len(Model_List)
-    data_m = []
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        (timem,heightm,datam,timeunitm,datamunit,datamlongname)=read_extractflight(filename_m,'NCNall')
-        datam=datam*1e-6    # #/m3 to #/cm3
-        # average in time for quicker plot
-        time2 = np.arange(timem[0],timem[-1],60)
-        data2 = avg_time_2d(timem,datam.T,time2)
-        datam = data2.T
-        # change to dN/dlnDp
-        for tt in range(len(time2)):
-            datam[:,tt]=datam[:,tt]/dlnDp_m
-        data_m.append(datam) 
-        
-    # timem = (np.array(timem)-int(timem[0]))*24
-    timem = time2/3600.
-    
-    
-    #%% read observation        
-    if campaign in ['HISCALE', 'ACEENA']:
-        if date[-1]=='a':
-            flightidx=1
-        else:
-            flightidx=2
-    
-        if campaign=='HISCALE':
-            filename = merged_size_path+'merged_bin_fims_pcasp_'+campaign+'_'+date+'.nc'
-        elif campaign=='ACEENA':
-            filename = merged_size_path+'merged_bin_fims_pcasp_opc_'+campaign+'_'+date+'.nc'
-        #% read in flight information
-        (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
-        (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
-        (time,size,height,timeunit,zunit,long_name)=read_merged_size(filename,'height')
-        (time,size,sizeh,timeunit,dataunit,long_name)=read_merged_size(filename,'size_high')
-        (time,size,sizel,timeunit,dataunit,long_name)=read_merged_size(filename,'size_low')
-        (time,size,merge,timeunit,dataunit,long_name)=read_merged_size(filename,'size_distribution_merged')
-        time=np.ma.compressed(time)
-        size=size*1000.
-    
-    elif campaign in ['CSET', 'SOCRATES']:
-        filename = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
-        # cloud flag
-        (time,lwc,timeunit,lwcunit,lwclongname,size,cellunit)=read_RF_NCAR(filename[-1],'PLWCC')
-        # calculate cloud flag based on LWC
-        cflag=lwc2cflag(lwc,lwcunit)
-        if campaign=='CSET':
-            (time,uhsas,timeunit,dataunit,long_name,size,cellunit)=read_RF_NCAR(filename[-1],'CUHSAS_RWOOU')
-        elif campaign=='SOCRATES':
-            # there are two variables: CUHSAS_CVIU and CUHSAS_LWII
-            (time,uhsas,timeunit,dataunit,long_name,size,cellunit)=read_RF_NCAR(filename[-1],'CUHSAS_LWII')
-        merge = uhsas[:,0,:]
-        size=size*1000.
-        sizeh = size
-        sizel = np.hstack((2*size[0]-size[1],  size[0:-1]))
-        
-    # merge=merge.T
-    # time=time/3600.
-    ## average in time for quicker plot
-    time2=np.arange(time[0],time[-1],60)
-    data2 = avg_time_2d(time,merge,time2)
-    merge = data2.T
-    time=time2/3600.
-
-    # change to dN/dlnDp
-    for bb in range(len(size)):
-        dlnDp=np.log(sizeh[bb]/sizel[bb])
-        merge[bb,:]=merge[bb,:]/dlnDp
-        
-        
-    
-    #%% make plot
-    
-    figname = figpath_aircraft_timeseries+'AerosolSize_'+campaign+'_'+date+'.png'
-    print('plotting figures to '+figname)
-    
-    #fig = plt.figure()
-    fig,ax = plt.subplots(nmodels+1,1,figsize=(8,2*(nmodels+1)))   # figsize in inches
-    plt.tight_layout(h_pad=1.1)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    plt.subplots_adjust(right=0.9,bottom=0.1)
-    
-    leveltick=[0.1,1,10,100,1000,10000]
-    levellist=np.arange(np.log(leveltick[0]),11,.5)
-    
-    merge[merge<0.01]=0.01
-    h1 = ax[0].contourf(time,size,np.log(merge),levellist,cmap=plt.get_cmap('jet'))
-    
-    d_mam=np.arange(1,3001)
-    h2=[]
-    for mm in range(nmodels):
-        datam = data_m[mm]
-        datam[datam<0.01]=0.01
-        h_m = ax[mm+1].contourf(timem,d_mam,np.log(datam),levellist,cmap=plt.get_cmap('jet'))
-        h2.append(h_m)
-
-    # colorbar
-    cax = plt.axes([0.95, 0.2, 0.02, 0.6])
-    cbar=fig.colorbar(h2[0], cax=cax, ticks=np.log(leveltick))
-    cbar.ax.set_yticklabels(leveltick, fontsize=14)
-    
-    # set axis
-    for ii in range(nmodels+1):
-        ax[ii].set_xlim(timem[0],timem[-1])
-        ax[ii].set_yscale('log')
-        ax[ii].set_ylim(3, 5000)
-        ax[ii].set_yticks([10,100,1000])
-        ax[ii].tick_params(color='k',labelsize=14)
-        if ii==0:
-            ax[ii].text(0.01, 0.94, 'OBS', fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
-        else:
-            ax[ii].text(0.01, 0.94, Model_List[ii-1], fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
-        
-    ax[1].set_ylabel('Diameter (nm)',fontsize=14)
-    ax[0].set_title('Size Distribution (#/dlnDp, cm-3)',fontsize=15)
-    ax[nmodels].set_xlabel('time (hour UTC) in '+date,fontsize=14)
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-    plt.close()
-    
-    
\ No newline at end of file
diff --git a/python/plotting/contour_sfc_diurnalcycle_AerosolSize.py b/python/plotting/contour_sfc_diurnalcycle_AerosolSize.py
deleted file mode 100644
index 3aacb10..0000000
--- a/python/plotting/contour_sfc_diurnalcycle_AerosolSize.py
+++ /dev/null
@@ -1,230 +0,0 @@
-"""
-# plot surface diurnal cycle of aerosol size distribution
-# compare models and surface measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import yyyymmdd2cday, cday2mmdd
-from read_surface import read_smpsb_pnnl,read_smps_bin
-from read_ARMdata import read_uhsas, read_smps_bnl
-from read_netcdf import read_E3SM
-from specific_data_treatment import  avg_time_2d
-from quality_control import qc_mask_qcflag, qc_remove_neg,qc_correction_nanosmps
-
-#%% settings
-
-from settings import campaign, Model_List, IOP, start_date, end_date, E3SM_sfc_path, figpath_sfc_timeseries
-if campaign=='ACEENA':
-    from settings import uhsassfcpath
-elif campaign=='HISCALE':
-    if IOP=='IOP1':
-        from settings import smps_bnl_path, nanosmps_bnl_path
-    elif IOP=='IOP2':
-        from settings import smps_pnnl_path
-
-if not os.path.exists(figpath_sfc_timeseries):
-    os.makedirs(figpath_sfc_timeseries)
-    
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-    
-#%% read in obs data
-if campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.2017062*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201707*')
-    elif IOP=='IOP2':
-        lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201801*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201802*')
-    lst.sort()
-    t_uhsas=np.empty(0)
-    uhsas=np.empty((0,99))
-    for filename in lst:
-        (time,dmin,dmax,data,timeunit,dataunit,long_name) = read_uhsas(filename)
-        timestr=timeunit.split(' ')
-        date=timestr[2]
-        cday=yyyymmdd2cday(date,'noleap')
-        # average in time for quicker plot
-        time2=np.arange(300,86400,600)
-        data2 = avg_time_2d(time,data,time2)
-        t_uhsas=np.hstack((t_uhsas, cday+time2/86400))
-        uhsas=np.vstack((uhsas, data2))
-    size_u = (dmin+dmax)/2
-    uhsas=qc_remove_neg(uhsas)
-    # change to dN/dlogDp
-    dlnDp_u=np.empty(99)
-    for bb in range(len(size_u)):
-        dlnDp_u[bb]=np.log10(dmax[bb]/dmin[bb])
-        uhsas[:,bb]=uhsas[:,bb]/dlnDp_u[bb]
-    
-    time0 = np.array(t_uhsas)
-    size = np.array(size_u)
-    obs = np.array(uhsas.T)
-    
-elif campaign=='HISCALE':    
-    if IOP=='IOP1':
-        lst = glob.glob(smps_bnl_path+'*.nc')
-        lst.sort()
-        t_smps=np.empty(0)
-        smps=np.empty((0,192))
-        for filename in lst:
-            (time,size,flag,timeunit,dataunit,smps_longname)=read_smps_bnl(filename,'status_flag')
-            (time,size,data,timeunit,smpsunit,smps_longname)=read_smps_bnl(filename,'number_size_distribution')
-            data=qc_mask_qcflag(data,flag)
-            data=qc_remove_neg(data)
-            timestr=timeunit.split(' ')
-            date=timestr[2]
-            cday=yyyymmdd2cday(date,'noleap')
-            t_smps=np.hstack((t_smps, cday+time/86400))
-            smps=np.vstack((smps, data))
-        smps=smps.T
-        # combine with nanoSMPS
-        lst2 = glob.glob(nanosmps_bnl_path+'*.nc')
-        lst2.sort()
-        t_nano=np.empty(0)
-        nanosmps=np.empty((0,192))
-        for filename2 in lst2:
-            (timen,sizen,flagn,timenunit,datanunit,long_name)=read_smps_bnl(filename2,'status_flag')
-            (timen,sizen,datan,timenunit,nanounit,nanoname)=read_smps_bnl(filename2,'number_size_distribution')
-            datan=qc_mask_qcflag(datan,flagn)
-            datan=qc_remove_neg(datan)
-            timestr=timenunit.split(' ')
-            date=timestr[2]
-            cday=yyyymmdd2cday(date,'noleap')
-            t_nano=np.hstack((t_nano, cday+timen/86400))
-            nanosmps=np.vstack((nanosmps, datan))
-        # nanosmps is overcounting, adjust nanosmps value for smooth transition to SMPS
-        nanosmps=qc_correction_nanosmps(nanosmps.T)
-        for tt in range(smps.shape[1]):
-            if any(t_nano==t_smps[tt]):
-                smps[0:80,tt]=nanosmps[0:80,t_nano==t_smps[tt]].reshape(80)
-        
-    elif IOP=='IOP2':
-        data=read_smpsb_pnnl(smps_pnnl_path+'HiScaleSMPSb_SGP_20160827_R1.ict')
-        size=read_smps_bin(smps_pnnl_path+'NSD_column_size_chart.txt')
-        time=data[0,:]
-        smps=data[1:-1,:]
-        flag=data[-1,:]
-        cday=yyyymmdd2cday('2016-08-27','noleap')
-        t_smps=cday+time/86400
-        smps=qc_mask_qcflag(smps.T,flag).T
-        
-    time0 = np.array(t_smps)
-    size = np.array(size)
-    obs = np.array(smps)  
-    
-    # SMPS is already divided by log10
-    
-else:
-    raise ValueError('please check campaign name: '+campaign)
-
-# only choose the time period between start_date and end_date
-obs=obs[:,np.logical_and(time0>=cday1, time0<cday2+1)]
-time0=time0[np.logical_and(time0>=cday1, time0<cday2+1)]
-
-
-#%% read in models
-model = []
-nmodels = len(Model_List)
-for mm in range(nmodels):
-    tmp_data=np.empty((3000,0))
-    timem=np.empty(0)
-    for cday in range(cday1,cday2+1):
-        mmdd=cday2mmdd(cday)
-        date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        
-        filename_input = E3SM_sfc_path+'SFC_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        (time,ncn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCNall')
-        
-        timem = np.hstack((timem,time))
-        tmp_data = np.hstack((tmp_data,ncn*1e-6))
-    
-    # change to dN/dlog10Dp
-    for bb in range(3000):
-        dlnDp=np.log10((bb+2)/(bb+1))
-        tmp_data[bb,:]=tmp_data[bb,:]/dlnDp
-    
-    model.append(tmp_data)
-
-sizem = np.arange(1,3001)
-    
-
-#%% calculate diurnal cycle
-days = np.arange(cday1, cday2+1)
-
-time_dc = np.arange(30,1440.,60)
-obs_dc = np.full((len(size),len(time_dc),len(days)),np.nan)
-n_valid = list()
-for dd in range(len(days)):
-    nn=0
-    for tt in range(len(time_dc)):
-        time_tmp = days[dd]+time_dc[tt]/1440.
-        idx = np.abs(time0-time_tmp).argmin()
-        if (time0[idx]-time_tmp)*1440 <= 30:    
-            obs_dc[:,tt,dd] = obs[:,idx]
-obs_dc = np.nanmean(obs_dc,2)
-
-# for E3SM data
-model_dc = []
-for mm in range(nmodels):
-    tmp_model = np.full((3000,24,len(days)),np.nan)
-    for dd in range(len(days)):
-        idx=np.logical_and(timem>=days[dd], timem<days[dd]+1)
-        tmp_model[:,:,dd] = model[mm][:,idx]
-    model_dc.append(np.nanmean(tmp_model,2))
-
-#%% make plot
-
-figname = figpath_sfc_timeseries+'diurnalcycle_AerosolSize_'+campaign+'_'+IOP+'.png'
-print('plotting figures to '+figname)
-
-#fig = plt.figure()
-fig,ax = plt.subplots(nmodels+1,1,figsize=(6,2*(nmodels+1)))   # figsize in inches
-plt.tight_layout(h_pad=1.1)   #pad=0.4, w_pad=0.5, h_pad=1.0
-plt.subplots_adjust(right=0.9,bottom=0.1)
-
-leveltick=[0.1,1,10,100,1000,10000,100000]
-levellist=np.arange(np.log(leveltick[0]),12,.5)
-
-obs_dc[obs_dc<0.01]=0.01
-h1 = ax[0].contourf(time_dc/60,size,np.log(obs_dc),levellist,cmap=plt.get_cmap('jet'))
-
-h2=[]
-for mm in range(nmodels):
-    datam = model_dc[mm]
-    datam[datam<0.01]=0.01
-    h_m = ax[mm+1].contourf(np.arange(0,24),sizem,np.log(datam),levellist,cmap=plt.get_cmap('jet'))
-    h2.append(h_m)
-
-# colorbar
-cax = plt.axes([0.95, 0.2, 0.02, 0.6])
-cbar=fig.colorbar(h2[0], cax=cax, ticks=np.log(leveltick))
-cbar.ax.set_yticklabels(leveltick, fontsize=14)
-
-# set axis
-for ii in range(nmodels+1):
-    ax[ii].set_xlim(0,24)
-    ax[ii].set_xticks(np.arange(0,24,3))
-    ax[ii].set_yscale('log')
-    ax[ii].set_ylim(1, 5000)
-    ax[ii].set_yticks([1,10,100,1000])
-    ax[ii].tick_params(color='k',labelsize=14)
-    if ii==0:
-        ax[ii].text(0.01, 0.94, 'OBS', fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
-    else:
-        ax[ii].text(0.01, 0.94, Model_List[ii-1], fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
-    
-ax[1].set_ylabel('Diameter (nm)',fontsize=14)
-ax[0].set_title('Size Distribution (#/dlogDp, cm-3) '+campaign+' '+IOP,fontsize=15)
-ax[nmodels].set_xlabel('Hour UTC',fontsize=14)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
diff --git a/python/plotting/contour_sfc_timeseries_AerosolSize.py b/python/plotting/contour_sfc_timeseries_AerosolSize.py
deleted file mode 100644
index 3afeb79..0000000
--- a/python/plotting/contour_sfc_timeseries_AerosolSize.py
+++ /dev/null
@@ -1,217 +0,0 @@
-"""
-# plot surface timeseries of aerosol size distribution
-# compare models and surface measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import yyyymmdd2cday, cday2mmdd
-from read_surface import read_smpsb_pnnl,read_smps_bin
-from read_ARMdata import read_uhsas, read_smps_bnl
-from read_netcdf import read_E3SM
-from specific_data_treatment import  avg_time_2d
-from quality_control import qc_mask_qcflag, qc_remove_neg,qc_correction_nanosmps
-
-#%% settings
-
-from settings import campaign, Model_List, IOP, start_date, end_date, E3SM_sfc_path, figpath_sfc_timeseries
-if campaign=='ACEENA':
-    from settings import uhsassfcpath
-elif campaign=='HISCALE':
-    if IOP=='IOP1':
-        from settings import smps_bnl_path, nanosmps_bnl_path
-    elif IOP=='IOP2':
-        from settings import smps_pnnl_path
-
-if not os.path.exists(figpath_sfc_timeseries):
-    os.makedirs(figpath_sfc_timeseries)
-    
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-
-# set time resolution for plotting. longer time needs coarser resolution to prevent memory error
-dt_res = 3600  # in sec
-
-
-#%% read in obs data
-if campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.2017062*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201707*')
-    elif IOP=='IOP2':
-        lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201801*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201802*')
-    lst.sort()
-    t_uhsas=np.empty(0)
-    uhsas=np.empty((0,99))
-    for filename in lst:
-        (time,dmin,dmax,data,timeunit,dataunit,long_name) = read_uhsas(filename)
-        timestr=timeunit.split(' ')
-        date=timestr[2]
-        cday=yyyymmdd2cday(date,'noleap')
-        # average in time for quicker plot
-        time2=np.arange(0,86400,dt_res)
-        data2 = avg_time_2d(time,data,time2)
-        t_uhsas=np.hstack((t_uhsas, cday+time2/86400))
-        uhsas=np.vstack((uhsas, data2))
-    size_u = (dmin+dmax)/2
-    uhsas=qc_remove_neg(uhsas)
-    # change to dN/dlogDp
-    dlnDp_u=np.empty(99)
-    for bb in range(len(size_u)):
-        dlnDp_u[bb]=np.log10(dmax[bb]/dmin[bb])
-        uhsas[:,bb]=uhsas[:,bb]/dlnDp_u[bb]
-    
-    timeo = np.array(t_uhsas)
-    size = np.array(size_u)
-    obs = np.array(uhsas.T)
-    
-elif campaign=='HISCALE':    
-    if IOP=='IOP1':
-        lst = glob.glob(smps_bnl_path+'*.nc')
-        lst.sort()
-        t_smps=np.empty(0)
-        smps=np.empty((0,192))
-        for filename in lst:
-            (time,size,flag,timeunit,dataunit,smps_longname)=read_smps_bnl(filename,'status_flag')
-            (time,size,data,timeunit,smpsunit,smps_longname)=read_smps_bnl(filename,'number_size_distribution')
-            data=qc_mask_qcflag(data,flag)
-            data=qc_remove_neg(data)
-            timestr=timeunit.split(' ')
-            date=timestr[2]
-            cday=yyyymmdd2cday(date,'noleap')
-            # average in time for quicker plot
-            time2=np.arange(0,86400,dt_res)
-            data2 = avg_time_2d(time,data,time2)
-            t_smps=np.hstack((t_smps, cday+time2/86400))
-            smps=np.vstack((smps, data2))
-        smps=smps.T
-        # combine with nanoSMPS
-        lst2 = glob.glob(nanosmps_bnl_path+'*.nc')
-        lst2.sort()
-        t_nano=np.empty(0)
-        nanosmps=np.empty((0,192))
-        for filename2 in lst2:
-            (timen,sizen,flagn,timenunit,datanunit,long_name)=read_smps_bnl(filename2,'status_flag')
-            (timen,sizen,datan,timenunit,nanounit,nanoname)=read_smps_bnl(filename2,'number_size_distribution')
-            datan=qc_mask_qcflag(datan,flagn)
-            datan=qc_remove_neg(datan)
-            timestr=timenunit.split(' ')
-            date=timestr[2]
-            cday=yyyymmdd2cday(date,'noleap')
-            # average in time for quicker plot
-            time2=np.arange(0,86400,dt_res)
-            data2 = avg_time_2d(timen,datan,time2)
-            t_nano=np.hstack((t_nano, cday+time2/86400))
-            nanosmps=np.vstack((nanosmps, data2))
-        # nanosmps is overcounting, adjust nanosmps value for smooth transition to SMPS
-        nanosmps=qc_correction_nanosmps(nanosmps.T)
-        for tt in range(smps.shape[1]):
-            if any(t_nano==t_smps[tt]):
-                smps[0:80,tt]=nanosmps[0:80,t_nano==t_smps[tt]].reshape(80)
-        
-    elif IOP=='IOP2':
-        data=read_smpsb_pnnl(smps_pnnl_path+'HiScaleSMPSb_SGP_20160827_R1.ict')
-        size=read_smps_bin(smps_pnnl_path+'NSD_column_size_chart.txt')
-        time=data[0,:]
-        smps=data[1:-1,:]
-        flag=data[-1,:]
-        smps=qc_mask_qcflag(smps.T,flag).T
-        cday=yyyymmdd2cday('2016-08-27','noleap')
-        # average in time for quicker plot
-        time2 = np.arange(time[0],time[-1],dt_res)
-        smps = avg_time_2d(time,smps.T,time2)
-        smps = smps.T
-        t_smps=cday+time2/86400
-        
-    timeo = np.array(t_smps)
-    size = np.array(size)
-    obs = np.array(smps)  
-    
-    # SMPS is already divided by log10
-    
-else:
-    raise ValueError('please check campaign name: '+campaign)
-    
-#%% read in models
-model = []
-nmodels = len(Model_List)
-for mm in range(nmodels):
-    tmp_data=np.empty((3000,0))
-    timem=np.empty(0)
-    for cday in range(cday1,cday2+1):
-        mmdd=cday2mmdd(cday)
-        date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        
-        filename_input = E3SM_sfc_path+'SFC_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        (time,ncn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCNall')
-        
-        timem = np.hstack((timem,time))
-        tmp_data = np.hstack((tmp_data,ncn*1e-6))
-    
-    # change to dN/dlog10Dp
-    for bb in range(3000):
-        dlnDp=np.log10((bb+2)/(bb+1))
-        tmp_data[bb,:]=tmp_data[bb,:]/dlnDp
-    
-    model.append(tmp_data)
-
-sizem = np.arange(1,3001)
-
-#%% make plot
-
-figname = figpath_sfc_timeseries+'timeseries_AerosolSize_'+campaign+'_'+IOP+'.png'
-print('plotting figures to '+figname)
-
-#fig = plt.figure()
-fig,ax = plt.subplots(nmodels+1,1,figsize=(8,2*(nmodels+1)))   # figsize in inches
-plt.tight_layout(h_pad=1.1)   #pad=0.4, w_pad=0.5, h_pad=1.0
-plt.subplots_adjust(right=0.9,bottom=0.1)
-
-leveltick=[0.1,1,10,100,1000,10000,100000]
-levellist=np.arange(np.log(leveltick[0]),12,.5)
-
-obs[obs<0.01]=0.01
-h1 = ax[0].contourf(timeo,size,np.log(obs),levellist,cmap=plt.get_cmap('jet'))
-
-# d_mam=np.arange(1,3001)
-h2=[]
-for mm in range(nmodels):
-    datam = model[mm]
-    datam[datam<0.01]=0.01
-    h_m = ax[mm+1].contourf(timem,sizem,np.log(datam),levellist,cmap=plt.get_cmap('jet'))
-    h2.append(h_m)
-
-# colorbar
-cax = plt.axes([0.95, 0.2, 0.02, 0.6])
-cbar=fig.colorbar(h2[0], cax=cax, ticks=np.log(leveltick))
-cbar.ax.set_yticklabels(leveltick, fontsize=14)
-
-# set axis
-for ii in range(nmodels+1):
-    ax[ii].set_xlim(cday1,cday2)
-    ax[ii].set_yscale('log')
-    ax[ii].set_ylim(1, 5000)
-    ax[ii].set_yticks([1,10,100,1000])
-    ax[ii].tick_params(color='k',labelsize=14)
-    if ii==0:
-        ax[ii].text(0.01, 0.94, 'OBS', fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
-    else:
-        ax[ii].text(0.01, 0.94, Model_List[ii-1], fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
-    ax[ii].set_ylabel('Diameter (nm)',fontsize=14)
-    
-ax[0].set_title('Size Distribution (#/dlogDp, cm$^{-3}$) '+campaign+' '+IOP,fontsize=15)
-ax[nmodels].set_xlabel('Calendar Day',fontsize=14)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
-
-
-
diff --git a/python/plotting/contour_ship_timeseries_AerosolSize.py b/python/plotting/contour_ship_timeseries_AerosolSize.py
deleted file mode 100644
index 5af3720..0000000
--- a/python/plotting/contour_ship_timeseries_AerosolSize.py
+++ /dev/null
@@ -1,170 +0,0 @@
-# plot timeseries of surface aerosol size distribution along each ship leg
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_ARMdata import read_uhsas
-from read_netcdf import read_E3SM
-from time_format_change import cday2mmdd
-from specific_data_treatment import  avg_time_2d
-from quality_control import qc_remove_neg
-
-#%% settings
-
-from settings import campaign, Model_List, shipuhsaspath, E3SM_ship_path, figpath_ship_timeseries
-
-if not os.path.exists(figpath_ship_timeseries):
-    os.makedirs(figpath_ship_timeseries)
-
-lst = glob.glob(E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[0]+'_shipleg*.nc')
-lst.sort()
-
-for ll in range(len(lst)):
-    if campaign=='MAGIC':
-        legnum=lst[ll][-5:-3]
-    elif campaign=='MARCUS':
-        legnum=lst[ll][-4]
-    
-
-    #%% read in model
-    nmodels=len(Model_List)
-    datam = list()
-    for mm in range(nmodels):
-        filenamem = E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
-    
-        (timem0,data,timeunitm,datamunit,datamlongname)=read_E3SM(filenamem,'NCNall')
-    
-        # if time expands two years, add 365 days to the second year
-        if timem0[0]>timem0[-1]:
-            timem0[timem0<=timem0[-1]]=timem0[timem0<=timem0[-1]]+365
-        
-        # average in time for quicker plot
-        timem=np.arange(timem0[0]-0.1,timem0[-1]+0.1,1/24.)
-        data2 = avg_time_2d(timem0,data.T,timem)
-        data2 = data2.T
-        
-        # change to dN/dlnDp
-        for bb in range(3000):
-            dlnDp=np.log((bb+2)/(bb+1))
-            data2[bb,:]=data2[bb,:]/dlnDp
-        datam.append(data2*1e-6)    # change unit from 1/m3 to 1/cm3
-        
-    year0 = str(int(timeunitm.split()[2][0:4])+1)
-    
-    #%% read in observations
-    # find the days related to the ship leg
-    day = [int(a) for a in timem]
-    day = list(set(day))
-    day.sort()
-    
-    nbins = 99 # for UHSAS at MAGIC
-    t_uh=np.empty(0)
-    uhsasall=np.empty((0,nbins))
-    for dd in day:
-        if campaign=='MAGIC':
-            if int(legnum)<=9:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
-                else:
-                    filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*.cdf')
-            else:
-                filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
-        elif campaign=='MARCUS':
-            if int(legnum)<=2:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
-                else:
-                    filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
-            else:
-                filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
-        
-        if len(filenameo)==0:
-            continue  # some days may be missing
-        if len(filenameo)>1:
-            raise ValueError('find too many files: ' + filenameo)
-        
-        
-        (time,dmin,dmax,uhsas,timeunit,uhunit,uhlongname)=read_uhsas(filenameo[0])
-        
-        uhsas=np.ma.filled(uhsas)
-        uhsas=qc_remove_neg(uhsas)
-        
-        # average in time for quicker plot
-        time2=np.arange(1800,86400,3600)
-        data2 = avg_time_2d(time,uhsas,time2)
-        uhsasall=np.vstack((uhsasall, data2))
-        t_uh = np.hstack((t_uh,time2/86400+dd))
-        
-    # if no obs available, fill one data with NaN
-    if len(t_uh)==0:
-        t_uh=[timem[0],timem[1]]
-        uhsasall=np.full((2,nbins),np.nan)
-        
-    # if time expands two years, add 365 days to the second year
-    if t_uh[0]>t_uh[-1]:
-        t_uh[t_uh<=t_uh[-1]]=t_uh[t_uh<=t_uh[-1]]+365
-        
-    size_u = (dmin+dmax)/2
-    dsize_u = dmax-dmin
-    
-    uhsasall=qc_remove_neg(uhsasall)
-    
-    # change to dN/dlnDp
-    dlnDp_u=np.empty(nbins)
-    for bb in range(len(size_u)):
-        dlnDp_u[bb]=np.log(dmax[bb]/dmin[bb])
-        uhsasall[:,bb]=uhsasall[:,bb]/dlnDp_u[bb]
-    
-    #%% make plot
-        
-    figname = figpath_ship_timeseries+'timeseries_AerosolSize_'+campaign+'_ship'+legnum+'.png'
-    print('plotting figures to '+figname)
-    
-    #fig = plt.figure()
-    fig,ax = plt.subplots(nmodels+1,1,figsize=(8,2*(nmodels+1)))   # figsize in inches
-    plt.tight_layout(h_pad=1.1)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    plt.subplots_adjust(right=0.9,bottom=0.1)
-    
-    leveltick=[0.1,1,10,100,1000,10000,100000]
-    levellist=np.arange(np.log(leveltick[0]),12,.5)
-    
-    uhsasall[uhsasall<0.01]=0.01
-    h1 = ax[0].contourf(t_uh,size_u,np.log(uhsasall.T),levellist,cmap=plt.get_cmap('jet'))
-    
-    size_m=np.arange(1,3001)
-    h2=[]
-    for mm in range(nmodels):
-        data = datam[mm]
-        data[data<0.01]=0.01
-        h_m = ax[mm+1].contourf(timem,size_m,np.log(data),levellist,cmap=plt.get_cmap('jet'))
-        h2.append(h_m)
-
-    # colorbar
-    cax = plt.axes([0.92, 0.2, 0.02, 0.6])
-    cbar=fig.colorbar(h2[0], cax=cax, ticks=np.log(leveltick))
-    cbar.ax.set_yticklabels(leveltick, fontsize=14)
-    
-    # set axis
-    for ii in range(nmodels+1):
-        ax[ii].set_xlim(timem[0],timem[-1])
-        ax[ii].set_yscale('log')
-        ax[ii].set_ylim(5, 3000)
-        ax[ii].set_yticks([10,100,1000])
-        ax[ii].tick_params(color='k',labelsize=14)
-        if ii==0:
-            ax[ii].text(0.01, 0.94, 'OBS', fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
-        else:
-            ax[ii].text(0.01, 0.94, Model_List[ii-1], fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
-        
-    ax[1].set_ylabel('Diameter (nm)',fontsize=14)
-    ax[0].set_title('Size Distribution (#/dlnDp, cm-3)',fontsize=15)
-    ax[nmodels].set_xlabel('Calendar Day in '+year0,fontsize=14)
-    
-    fig.text(.08, .97,'ship leg '+legnum, fontsize=12)
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-    
\ No newline at end of file
diff --git a/python/plotting/plot_flight_pdf_AerosolSize.py b/python/plotting/plot_flight_pdf_AerosolSize.py
deleted file mode 100644
index 3089688..0000000
--- a/python/plotting/plot_flight_pdf_AerosolSize.py
+++ /dev/null
@@ -1,241 +0,0 @@
-"""
-# plot mean aerosol size ditribution for aircraft track data
-# average for each IOP
-# compare models and aircraft measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_aircraft import read_RF_NCAR
-from specific_data_treatment import lwc2cflag
-# from time_format_change import yyyymmdd2cday, hhmmss2sec
-from read_netcdf import read_merged_size,read_extractflight
-
-from specific_data_treatment import  avg_time_2d
-from quality_control import qc_mask_cloudflag, qc_uhsas_RF_NCAR,qc_remove_neg,qc_mask_takeoff_landing
-
-#%% settings
-
-from settings import campaign,  Model_List, color_model,  \
-    E3SM_aircraft_path, figpath_aircraft_statistics
-
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP, merged_size_path
-elif campaign in ['CSET', 'SOCRATES']:
-    from settings import RFpath
-else:
-    raise ValueError('campaign name is not recognized: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_statistics):
-    os.makedirs(figpath_aircraft_statistics)
-    
-    
-#%% find files for flight information
-
-lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
-lst.sort()
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-# choose files for specific IOP
-if campaign=='HISCALE':
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-elif campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-        
-alldates = [x.split('_')[-1].split('.')[0] for x in lst]
-    
-print('reading '+format(len(alldates))+' files to calculate mean aerosol pdf: ')
-
-nmodels=len(Model_List)
-pdfall_m = [np.empty((3000,0)) for mm in range(nmodels)]
-size_m = np.zeros(3000)
-pdf_model = [size_m for mm in range(nmodels)]
-if 'pdf_obs' in locals():
-    del pdf_obs
-
-# number of valid timesteps
-n_o = 0
-n_m = [0 for mm in range(nmodels)]
-    
-
-# dN/dlnDp for model
-dlnDp_m = np.empty((3000))
-for bb in range(3000):
-    dlnDp_m[bb]=np.log((bb+2)/(bb+1))
-
-for date in alldates[:]:
-    print(date)
-    
-    #%% read in Models
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        (timem,heightm,datam,timeunitm,datamunit,datamlongname)=read_extractflight(filename_m,'NCNall')
-        datam=datam*1e-6    # #/m3 to #/cm3
-        
-        # average in time for quicker plot
-        time2=np.arange(300,86400,600)
-        data2 = avg_time_2d(timem,datam.T,time2)
-        datam=data2.T
-        timem=time2
-        
-        for tt in range(len(timem)):
-            datam[:,tt]=datam[:,tt]/dlnDp_m
-            
-        pdfall_m[mm] = np.column_stack((pdfall_m[mm],datam))
-        for tt in range(len(timem)):
-            if ~np.isnan(datam[0,tt]):
-                pdf_model[mm] = pdf_model[mm]+datam[:,tt]
-                n_m[mm]=n_m[mm]+1
-        
-    #%% read observation        
-    if campaign in ['HISCALE', 'ACEENA']:
-        if date[-1]=='a':
-            flightidx=1
-        else:
-            flightidx=2
-            
-        if campaign=='HISCALE':
-            filename = merged_size_path+'merged_bin_fims_pcasp_'+campaign+'_'+date+'.nc'
-        elif campaign=='ACEENA':
-            filename = merged_size_path+'merged_bin_fims_pcasp_opc_'+campaign+'_'+date+'.nc'
-    
-        (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
-        (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
-        (time,size,legnum,timeunit,zunit,long_name)=read_merged_size(filename,'leg_number')
-        (time,size,sizeh,timeunit,dataunit,long_name)=read_merged_size(filename,'size_high')
-        (time,size,sizel,timeunit,dataunit,long_name)=read_merged_size(filename,'size_low')
-        (time,size,merge,timeunit,dataunit,long_name)=read_merged_size(filename,'size_distribution_merged')
-        time=np.ma.compressed(time)
-        size=size*1000.
-        merge = qc_mask_cloudflag(merge,cflag)
-        
-        # average in time for quicker plot
-        time2=np.arange(300,86400,600)
-        data2 = avg_time_2d(time,merge,time2)
-        merge = data2.T
-        time=time2/3600.
-        
-
-    elif campaign in ['CSET', 'SOCRATES']:
-        filename = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
-        # cloud flag
-        (time,lwc,timeunit,lwcunit,lwclongname,size,cellunit)=read_RF_NCAR(filename[-1],'PLWCC')
-        if campaign=='CSET':
-            (time,uhsas,timeunit,dataunit,long_name,size,cellunit)=read_RF_NCAR(filename[-1],'CUHSAS_RWOOU')
-        elif campaign=='SOCRATES':
-            # there are two variables: CUHSAS_CVIU and CUHSAS_LWII
-            (time,uhsas,timeunit,dataunit,long_name,size,cellunit)=read_RF_NCAR(filename[-1],'CUHSAS_LWII')
-        uhsas=uhsas[:,0,:]
-        # calculate cloud flag based on LWC
-        cflag=lwc2cflag(lwc,lwcunit)
-        uhsas = qc_mask_cloudflag(uhsas,cflag)
-        uhsas= qc_uhsas_RF_NCAR(uhsas)
-        
-        # average in time for quicker plot
-        time2=np.arange(300,86400,600)
-        data2 = avg_time_2d(time,uhsas,time2)
-        merge = data2.T
-        time0 = np.array(time)
-        time=time2/3600.
-        
-        size=size*1000.
-        sizeh = size
-        sizel = np.hstack((2*size[0]-size[1],  size[0:-1]))
-    
-    # change to dN/dlnDp
-    for bb in range(len(size)):
-        dlnDp=np.log(sizeh[bb]/sizel[bb])
-        merge[bb,:]=merge[bb,:]/dlnDp
-    
-    merge=qc_remove_neg(merge)
-    
-    # exclude 30min after takeoff and before landing
-    merge = qc_mask_takeoff_landing(time2,merge)
-    
-    # fig,ax=plt.subplots()
-    # ax.plot(merge[9,:])
-    # ax.set_title(date)
-    # error
-    
-    if ('pdf_obs' in locals()) == False:
-        pdf_obs = np.zeros(len(size)) 
-        pdfall_o = np.empty((len(size),0))
-    idx_valid = ~np.isnan(np.mean(merge,0))
-    pdf_obs = pdf_obs + np.sum(merge[:,idx_valid],1)
-    pdfall_o = np.hstack((pdfall_o,np.array(merge[:,idx_valid])))
-    n_o = n_o + np.sum(idx_valid)
-    
-
-#%% calculate mean pdf
-
-pdf_obs[pdf_obs<1e-3]=np.nan
-pdf_obs=pdf_obs/n_o
-for mm in range(nmodels):
-    pdf_model[mm]=pdf_model[mm]/n_m[mm]
-
-#%%
-pdfall_o[pdfall_o<0]=np.nan
-pct1_o = [np.nanpercentile(pdfall_o[i,:],10) for i in range(len(size))]
-pct2_o = [np.nanpercentile(pdfall_o[i,:],90) for i in range(len(size))]
-pct1_m = [[] for mm in range(nmodels)]
-pct2_m = [[] for mm in range(nmodels)]
-for mm in range(nmodels):
-    pct1_m[mm] = [np.nanpercentile(pdfall_m[mm][i,:],10) for i in range(3000)]
-    pct2_m[mm] = [np.nanpercentile(pdfall_m[mm][i,:],90) for i in range(3000)]
-
-#%% make plot
-
-if campaign in ['HISCALE', 'ACEENA']:
-    figname = figpath_aircraft_statistics+'pdf_AerosolSize_'+campaign+'_'+IOP+'.png'
-else:
-    figname = figpath_aircraft_statistics+'pdf_AerosolSize_'+campaign+'.png'
-
-print('plotting figures to '+figname)
-
-#fig = plt.figure()
-fig,ax = plt.subplots(figsize=(4,2.5))   # figsize in inches
-
-ax.plot(size,pdf_obs,color='k',label='Obs')
-for mm in range(nmodels):
-    ax.plot(np.arange(1,3001),pdf_model[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-
-ax.fill_between(size,pct1_o,pct2_o, alpha=0.5, facecolor='gray')
-for mm in range(nmodels):
-    ax.fill_between(np.arange(1,3001),pct1_m[mm],pct2_m[mm], alpha=0.2, facecolor=color_model[mm])
-
-ax.legend(loc='upper right', shadow=False, fontsize='medium')
-ax.tick_params(color='k',labelsize=12)
-ax.set_xscale('log')
-ax.set_yscale('log')
-ax.set_ylim(0.01,1e4)
-ax.set_xlim(0.67,4500)
-ax.set_xlabel('Diameter (nm)',fontsize=13)
-ax.set_ylabel('#/dlnDp (cm$^{-3}$)',fontsize=13)
-
-if campaign in ['HISCALE', 'ACEENA']:
-    ax.set_title(campaign+' '+IOP,fontsize=14)
-else:
-    ax.set_title(campaign,fontsize=14)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
-
diff --git a/python/plotting/plot_flight_pdf_percentile_SeparateCloud_aceena.py b/python/plotting/plot_flight_pdf_percentile_SeparateCloud_aceena.py
deleted file mode 100644
index 4641a12..0000000
--- a/python/plotting/plot_flight_pdf_percentile_SeparateCloud_aceena.py
+++ /dev/null
@@ -1,415 +0,0 @@
-"""
-# plot_flight_pdf_percentile_SeparateCloud_aceena.py
-# plot pdf and percentiles in several aerosol size bins for aircraft data
-# separated by observed PBLH 
-"""
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-# from time_format_change import  hhmmss2sec,yyyymmdd2cday
-from read_aircraft import read_cpc
-from read_netcdf import read_merged_size,read_extractflight
-from quality_control import qc_remove_neg
-
-#%% settings
-
-from settings import campaign, cpcpath,merged_size_path, \
-    Model_List, color_model, IOP, E3SM_aircraft_path, figpath_aircraft_statistics
-
-if not os.path.exists(figpath_aircraft_statistics):
-    os.makedirs(figpath_aircraft_statistics)
-   
-# set final bin sizes
-binl = np.array([3, 15, 70, 300, 1000])
-binh = np.array([10, 70, 300, 1000, 3000])
-binm = (binl+binh)/2
-
-d_mam=np.arange(1,3001)
-blen = len(binm)
-
-# numbers of bins in merged size data
-b2len=67  
-
-#%% find files for flight information
-
-lst = glob.glob(merged_size_path+'merged_bin_*'+campaign+'*.nc')
-lst.sort()
-
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-
-# choose files for specific IOP
-if campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+campaign+'_')
-        lst = glob.glob(a[0]+'*'+IOP+'*')
-        lst.sort()
-else:
-    raise ValueError('this code is only for ACEENA, check the campaign settings')
-
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-
-#%% read all data
-
-# pdf average for legs
-pdf_sfc_obs=np.zeros([b2len,0])
-pdf_near_obs=np.zeros([b2len,0])
-pdf_above_obs=np.zeros([b2len,0])
-
-cpcdiff_sfc=np.zeros([0])
-cpcdiff_near=np.zeros([0])
-cpcdiff_above=np.zeros([0])
-
-nmodels=len(Model_List)
-pdf_sfc_model=[]
-pdf_near_model=[]
-pdf_above_model=[]
-for mm in range(nmodels):
-    pdf_sfc_model.append(np.zeros([3000,0]))
-    pdf_near_model.append(np.zeros([3000,0]))
-    pdf_above_model.append(np.zeros([3000,0]))
-
-# pdf for the final bin sizes
-p2_sfc_obs = []
-p2_near_obs = []
-p2_above_obs = []
-p2_sfc_model=[]
-p2_near_model=[]
-p2_above_model=[]
-for mm in range(nmodels):
-    p2_sfc_model.append([])
-    p2_near_model.append([])
-    p2_above_model.append([])
-    
-print('reading '+format(len(lst))+' files to calculate the statistics: ')
-
-for filename in lst:
-    
-    # get date info:        
-    date=filename[-12:-3]
-    if date[-1]=='a':
-        flightidx=1
-    else:
-        flightidx=2
-    print(date)
-    
-    #%% read aerosol size distribution
-    (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
-    (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
-    (time,size,legnum,timeunit,cunit,long_name)=read_merged_size(filename,'leg_number')
-    (time,size,height,timeunit,zunit,long_name)=read_merged_size(filename,'height')
-    (time,size,sizeh,timeunit,dataunit,long_name)=read_merged_size(filename,'size_high')
-    (time,size,sizel,timeunit,dataunit,long_name)=read_merged_size(filename,'size_low')
-    (time,size,merge,timeunit,dataunit,long_name)=read_merged_size(filename,'size_distribution_merged')
-    time=np.ma.compressed(time)
-    timelen = len(time)
-    time=time/3600.
-    size=np.ma.compressed(size)*1000  # um to nm
-    sizel=sizel*1000
-    sizeh=sizeh*1000
-    merge=qc_remove_neg(merge)
-    
-    
-
-    #%% read in CPC measurements
-    
-    if campaign=='ACEENA':
-        filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_ACEENA001s.ict')    
-    else:
-        raise ValueError('this code is only for ACEENA, check the campaign settings')
-    filename_c.sort()
-    # read in data
-    if len(filename_c)==1 or len(filename_c)==2: # some days have two flights
-        (cpc,cpclist)=read_cpc(filename_c[flightidx-1])
-        if np.logical_and(campaign=='ACEENA', date=='20180216a'):
-            cpc=np.insert(cpc,1404,(cpc[:,1403]+cpc[:,1404])/2,axis=1)
-        elif np.logical_and(campaign=='HiScale', date=='20160425a'):
-            cpc=np.insert(cpc,0,cpc[:,0],axis=1)
-            cpc[0,0]=cpc[0,0]-1
-        time_cpc = cpc[0,:]/3600
-        cpc10 = cpc[1,:]
-        cpc3 = cpc[2,:]
-    elif len(filename_c)==0:
-        time_cpc=time
-        cpc10=np.nan*np.empty([len(time)])
-        cpc3=np.nan*np.empty([len(time)])
-    else:
-        raise ValueError('find too many files in ' + filename_c)
-    
-    cpcdiff = cpc3-cpc10
-    cpcdiff=qc_remove_neg(cpcdiff)
-    
-    #%% read in Models
-    datam2 = []
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        (timem,heightm,datam,timeunitm,datamunit,datamlongname)=read_extractflight(filename_m,'NCNall')
-        datam2.append(datam*1e-6)    # #/m3 to #/cm3
-    
-    timem = (timem - int(timem[0]))*24
-    
-    if len(timem)!=len(time) or len(time)!=len(time_cpc):
-        raise ValueError('time dimension for obs and/or model are not consistent')
-    
-    #%% get leg information near surface, near cloud base and above cloud
-    # leg_sfc = np.ma.compressed (np.unique(legnum[height<=200])[1:])
-    leg_sfc = list()
-    leg_near = list()
-    leg_above = list()
-    leg_toomuchcld = list()
-    leg_nocld = list()
-    leg_nodata = list()
-    
-    for ii in range(1,max(legnum)+1):
-    # for ii in range(5,7):
-        idx_l = legnum==ii
-        # if any(cflag[idx_l]==1):
-        # make sure cloud flag less than 10% of time and FIMS is not all missing (e.g., TD mode)
-        if np.sum(cflag[idx_l])/len(cflag[idx_l]) >= 0.1:
-        # if np.sum(cflag[idx_l]) > 1:
-            leg_toomuchcld.append(ii)
-            continue
-        if all(np.isnan(merge[idx_l,10])):
-            leg_nodata.append(ii)
-            continue
-        
-        legheight = np.mean(height[idx_l])
-        # if legheight<=250:   # leg number near surface
-        #     leg_sfc.append(ii)
-            
-        # find the mean cloud height within 1hr of the leg
-        i = np.argwhere(legnum==ii)
-        i_start = max(i[0][0]-3600, 0)
-        i_end = min(i[-1][0]+3600, len(cflag))
-        if all(cflag[i_start:i_end]!=1):
-            leg_nocld.append(ii)
-            if legheight>2500:
-                leg_above.append(ii)
-            elif legheight<=250:   # leg number near surface
-                leg_sfc.append(ii)
-            continue
-        idx_c = cflag[i_start:i_end]==1
-        cldheight = np.mean(height[i_start:i_end][idx_c])
-        cldmax = np.max(height[i_start:i_end][idx_c])
-        cldmin = np.min(height[i_start:i_end][idx_c])
-        # if (legheight-cldheight)<=200 and (legheight-cldheight)>=-400:
-        if legheight>=max(cldmin,250) and legheight<=cldmax:
-            leg_near.append(ii)
-        elif legheight<min(cldmin,250):   # leg number near surface
-            leg_sfc.append(ii)
-        # elif (legheight-cldheight)>500:
-        elif legheight>cldmax:
-            leg_above.append(ii)
-
-    #%% calculate all pdfs
-    for ii in range(len(leg_sfc)):
-        idx = legnum==leg_sfc[ii]
-        tmp_obs = np.nanmean(merge[idx,:],0)
-        tmp_obs[tmp_obs==0]=np.nan
-        pdf_sfc_obs = np.hstack((pdf_sfc_obs, np.reshape(tmp_obs,(b2len,1))))
-        cpcdiff_sfc = np.hstack((cpcdiff_sfc, np.nanmean(cpcdiff[idx])))
-        for mm in range(nmodels):
-            tmp_model = np.nanmean(datam2[mm][:,idx],1)
-            tmp_model[tmp_model==0]=np.nan
-            pdf_sfc_model[mm] = np.hstack((pdf_sfc_model[mm], np.reshape(tmp_model,(3000,1))))
-        
-    for ii in range(len(leg_near)):
-        idx = legnum==leg_near[ii]
-        tmp_obs = np.nanmean(merge[idx,:],0)
-        tmp_obs[tmp_obs==0]=np.nan
-        pdf_near_obs = np.hstack((pdf_near_obs, np.reshape(tmp_obs,(b2len,1))))
-        cpcdiff_near = np.hstack((cpcdiff_near, np.nanmean(cpcdiff[idx])))
-        for mm in range(nmodels):
-            tmp_model = np.nanmean(datam2[mm][:,idx],1)
-            tmp_model[tmp_model==0]=np.nan
-            pdf_near_model[mm] = np.hstack((pdf_near_model[mm], np.reshape(tmp_model,(3000,1))))
-        
-    for ii in range(len(leg_above)):
-        idx = legnum==leg_above[ii]
-        tmp_obs = np.nanmean(merge[idx,:],0)
-        tmp_obs[tmp_obs==0]=np.nan
-        pdf_above_obs = np.hstack((pdf_above_obs, np.reshape(tmp_obs,(b2len,1))))
-        cpcdiff_above = np.hstack((cpcdiff_above, np.nanmean(cpcdiff[idx])))
-        for mm in range(nmodels):
-            tmp_model = np.nanmean(datam2[mm][:,idx],1)
-            tmp_model[tmp_model==0]=np.nan
-            pdf_above_model[mm] = np.hstack((pdf_above_model[mm], np.reshape(tmp_model,(3000,1))))
-    
-
-#%% change to the pre-defined size bins
-    
-for bb in range(blen):
-    idx_m = np.logical_and(d_mam>=binl[bb], d_mam<=binh[bb])
-    for mm in range(nmodels):
-        p2_sfc_model[mm].append(np.nansum(pdf_sfc_model[mm][idx_m,:],0))
-        p2_near_model[mm].append(np.nansum(pdf_near_model[mm][idx_m,:],0))
-        p2_above_model[mm].append(np.nansum(pdf_above_model[mm][idx_m,:],0))
-
-    if bb==0:
-        p2_sfc_obs.append(cpcdiff_sfc[~np.isnan(cpcdiff_sfc)])
-        p2_near_obs.append(cpcdiff_near[~np.isnan(cpcdiff_near)])
-        p2_above_obs.append(cpcdiff_above[~np.isnan(cpcdiff_above)])
-    else:
-        idx_o = np.logical_and(sizel>=binl[bb], sizeh<=binh[bb])
-        if any(idx_o):
-            tmp_sfc = np.nansum(pdf_sfc_obs[idx_o,:],0)
-            tmp_near = np.nansum(pdf_near_obs[idx_o,:],0)
-            tmp_above = np.nansum(pdf_above_obs[idx_o,:],0)
-            p2_sfc_obs.append(tmp_sfc[tmp_sfc!=0])
-            p2_near_obs.append(tmp_near[tmp_near!=0])
-            p2_above_obs.append(tmp_above[tmp_above!=0])
-        else:
-           raise ValueError("no sample is found in the size bin")
-            
-#%% calculate dlnDp for dN/dlnDp
-d_mam=np.arange(1,3001)
-dlnDp_m=np.full(3000,np.nan)
-for bb in range(3000):
-    dlnDp_m[bb]=np.log((bb+2)/(bb+1))
-dlnDp_o=np.empty(len(size))
-for bb in range(len(size)):
-    dlnDp_o[bb]=np.log(sizeh[bb]/sizel[bb])
-
-
-#%% plot entire pdf below and above PBL
-figname = figpath_aircraft_statistics+'SeparateCloud_pdf_AerosolSize_ACEENA_'+IOP+'.png'
-print('plotting PDF figures to '+figname)
-
-fig,(ax1,ax2,ax3) = plt.subplots(3,1,figsize=(6,8))
-
-ax1.plot(size,np.nanmedian(pdf_above_obs,1)/dlnDp_o,color='k',linewidth=1,label='Obs')
-for mm in range(nmodels):
-    ax1.plot(d_mam,np.nanmedian(pdf_above_model[mm],1)/dlnDp_m,color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax1.tick_params(color='k',labelsize=14)
-ax1.set_xscale('log')
-ax1.set_yscale('log')
-
-ax2.plot(size,np.nanmedian(pdf_near_obs,1)/dlnDp_o,color='k',linewidth=1,label='Obs')
-for mm in range(nmodels):
-    ax2.plot(d_mam,np.nanmedian(pdf_near_model[mm],1)/dlnDp_m,color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax2.tick_params(color='k',labelsize=14)
-ax2.set_xscale('log')
-ax2.set_yscale('log')
-
-ax3.plot(size,np.nanmedian(pdf_sfc_obs,1)/dlnDp_o,color='k',linewidth=1,label='Obs')
-for mm in range(nmodels):
-    ax3.plot(d_mam,np.nanmedian(pdf_sfc_model[mm],1)/dlnDp_m,color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax3.tick_params(color='k',labelsize=14)
-ax3.set_xscale('log')
-ax3.set_yscale('log')
-
-# ax0.set_xlim(5,4000)
-# ax1.set_xlim(5,4000)
-ax1.set_ylim(1e-3,1e5)
-ax2.set_ylim(1e-3,1e5)
-ax3.set_ylim(1e-3,1e5)
-
-ax2.set_ylabel('aerosol #/dlnDp (cm$^{-3}$)',fontsize=14)
-ax3.set_xlabel('Diameter (nm)',fontsize=14)
-l=ax3.legend(loc='lower center', shadow=False, fontsize='medium')
-
-ax1.set_title('size distribution for ACEENA '+IOP,fontsize=15)
-
-ax3.text(200,3000,'Near Surface ('+str(pdf_sfc_obs.shape[1])+' legs)',fontsize=12)
-ax2.text(200,3000,'Near Clouds ('+str(pdf_near_obs.shape[1])+' legs)',fontsize=12)
-ax1.text(200,3000,'Above Clouds ('+str(pdf_above_obs.shape[1])+' legs)',fontsize=12)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
-
-#%% plot percentile on sizes
-
-figname = figpath_aircraft_statistics+'SeparateCloud_percentile_AerosolSize_ACEENA_'+IOP+'.png'
-print('plotting percentile figures to '+figname)
-
-# set position shift so that models and obs are not overlapped
-p_shift = np.arange(nmodels+1)
-p_shift = (p_shift - p_shift.mean())*0.2
-
-fig,(ax1,ax2,ax3) = plt.subplots(3,1,figsize=(6,8))
-    
-ax1.boxplot(p2_above_obs,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(blen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax1.boxplot(p2_above_model[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(blen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-
-ax2.boxplot(p2_near_obs,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(blen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax2.boxplot(p2_near_model[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(blen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    
-ax3.boxplot(p2_sfc_obs,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(blen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax3.boxplot(p2_sfc_model[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(blen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    
-ax3.tick_params(color='k',labelsize=12)
-ax2.tick_params(color='k',labelsize=12)
-ax1.tick_params(color='k',labelsize=12)
-ax3.set_yscale('log')
-ax2.set_yscale('log')
-ax1.set_yscale('log')
-ax1.set_xlim(-.5,blen-.5)
-ax2.set_xlim(-.5,blen-.5)
-ax3.set_xlim(-.5,blen-.5)
-
-ax3.set_xlabel('Diameter (nm)',fontsize=14)
-ax2.set_ylabel('aerosol # (cm$^{-3}$)',fontsize=14)
-ax1.set_title('percentile for ACEENA '+IOP,fontsize=15)
-
-ax3.text(2.4,4000,'Near Surface ('+str(pdf_sfc_obs.shape[1])+' legs)',fontsize=12)
-ax2.text(2.4,4000,'Near Clouds ('+str(pdf_near_obs.shape[1])+' legs)',fontsize=12)
-ax1.text(2.4,4000,'Above Clouds ('+str(pdf_above_obs.shape[1])+' legs)',fontsize=12)
-
-xlabel=[str(binl[x])+'-'+str(binh[x]) for x in range(blen)]
-ax1.set_xticks(range(len(binm)))
-ax1.set_xticklabels(xlabel)
-ax2.set_xticks(range(len(binm)))
-ax2.set_xticklabels(xlabel)
-ax3.set_xticks(range(len(binm)))
-ax3.set_xticklabels(xlabel)
-ax1.set_ylim(1e-3,1e5)
-ax2.set_ylim(1e-3,1e5)
-ax3.set_ylim(1e-3,1e5)
-
-# plot temporal lines for label
-ax3.plot([],c='k',label='Obs')
-for mm in range(nmodels):
-    ax3.plot([],c=color_model[mm],label=Model_List[mm])
-ax3.legend(loc='lower left', shadow=False, fontsize='medium')
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
-
diff --git a/python/plotting/plot_flight_pdf_percentile_SeparatePBLH_hiscale.py b/python/plotting/plot_flight_pdf_percentile_SeparatePBLH_hiscale.py
deleted file mode 100644
index 3639357..0000000
--- a/python/plotting/plot_flight_pdf_percentile_SeparatePBLH_hiscale.py
+++ /dev/null
@@ -1,383 +0,0 @@
-"""
-# plot pdf and percentiles in several aerosol size bins for aircraft data
-# separated by observed PBLH 
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import  hhmmss2sec,yyyymmdd2cday
-from read_ARMdata import read_pblhtmpl1
-from read_surface import read_dl_pblh
-from read_aircraft import read_cpc
-from read_netcdf import read_merged_size,read_extractflight
-from quality_control import qc_remove_neg
-
-#%% settings
-
-from settings import campaign, cpcpath,merged_size_path, pblhpath, dlpath, \
-    Model_List, color_model, IOP, E3SM_aircraft_path, figpath_aircraft_statistics
-
-if not os.path.exists(figpath_aircraft_statistics):
-    os.makedirs(figpath_aircraft_statistics)
-   
-# set final bin sizes
-binl = np.array([3, 15, 70, 400, 1000])
-binh = np.array([10, 70, 400, 1000, 3000])
-binm = (binl+binh)/2
-
-# set a range around PBLH (PBLH +/- heightdiff) that only data outside of the range are counted
-heightdiff = 100
-   
-#%% read in doppler lidar data. this is all days in one file
-dl=read_dl_pblh(dlpath+'sgpdlC1_mlh_0.08.txt')
-
-mlh_dl = dl[6,:]*1000
-day_dl = np.array(mlh_dl[:])
-time_dl = np.array(mlh_dl[:])
-for tt in range(len(time_dl)):
-    yyyymmdd=format(int(dl[0,tt]),'04d')+format(int(dl[1,tt]),'02d')+format(int(dl[2,tt]),'02d')
-    hhmmss=format(int(dl[3,tt]),'02d')+':'+format(int(dl[4,tt]),'02d')+':'+format(int(dl[5,tt]),'02d')
-    day_dl[tt]=yyyymmdd2cday(yyyymmdd)
-    time_dl[tt]=hhmmss2sec(hhmmss)
-mlh_dl=qc_remove_neg(mlh_dl)
-
-
-#%% find files for flight information
-
-lst = glob.glob(merged_size_path+'merged_bin_*'+campaign+'*.nc')
-lst.sort()
-
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-
-# choose files for specific IOP
-if campaign=='HISCALE':
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+campaign+'_')
-        lst = glob.glob(a[0]+'*'+IOP+'*')
-        lst.sort()
-else:
-    raise ValueError('this code is only for HISCALE, check the campaign settings')
-
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-    
-#%% read all data
-
-# pdf average for legs
-pdf_below_obs=np.full([44,len(lst)*10],np.nan)
-pdf_above_obs=np.full([44,len(lst)*10],np.nan)
-
-cpcdiff_above=np.full([len(lst)*10],np.nan)
-cpcdiff_below=np.full([len(lst)*10],np.nan)
-
-nmodels=len(Model_List)
-pdf_below_model=[]
-pdf_above_model=[]
-for mm in range(nmodels):
-    pdf_below_model.append(np.full([3000,len(lst)*10],np.nan))
-    pdf_above_model.append(np.full([3000,len(lst)*10],np.nan))
-
-n_below=0
-n_above=0
-n_total=0
-    
-print('reading '+format(len(lst))+' files to calculate the statistics: ')
-
-for filename in lst:
-    
-    # get date info:        
-    date=filename[-12:-3]
-    if date[-1]=='a':
-        flightidx=1
-    else:
-        flightidx=2
-    print(date)
-    
-    #%% read aerosol size distribution
-    (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
-    (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
-    (time,size,legnum,timeunit,cunit,long_name)=read_merged_size(filename,'leg_number')
-    (time,size,height,timeunit,zunit,long_name)=read_merged_size(filename,'height')
-    (time,size,sizeh,timeunit,dataunit,long_name)=read_merged_size(filename,'size_high')
-    (time,size,sizel,timeunit,dataunit,long_name)=read_merged_size(filename,'size_low')
-    (time,size,merge,timeunit,dataunit,long_name)=read_merged_size(filename,'size_distribution_merged')
-    time=np.ma.compressed(time)
-    timelen = len(time)
-    time=time/3600.
-    size=np.ma.compressed(size)*1000  # um to nm
-    sizel=sizel*1000
-    sizeh=sizeh*1000
-    merge=qc_remove_neg(merge)
-    merge=merge.T
-    
-    #%% read in CPC measurements
-    
-    if campaign=='HISCALE':
-        filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_HiScale001s.ict.txt')
-    else:
-        raise ValueError('this code is only for HISCALE, check the campaign settings')
-    filename_c.sort()
-    # read in data
-    if len(filename_c)==1 or len(filename_c)==2: # some days have two flights
-        (cpc,cpclist)=read_cpc(filename_c[flightidx-1])
-        if np.logical_and(campaign=='ACEENA', date=='20180216a'):
-            cpc=np.insert(cpc,1404,(cpc[:,1403]+cpc[:,1404])/2,axis=1)
-        elif np.logical_and(campaign=='HISCALE', date=='20160425a'):
-            cpc=np.insert(cpc,0,cpc[:,0],axis=1)
-            cpc[0,0]=cpc[0,0]-1
-        time_cpc = cpc[0,:]/3600
-        cpc10 = cpc[1,:]
-        cpc3 = cpc[2,:]
-    elif len(filename_c)==0:
-        time_cpc=time
-        cpc10=np.nan*np.empty([len(time)])
-        cpc3=np.nan*np.empty([len(time)])
-    else:
-        raise ValueError('find too many files')
-    
-    cpcdiff = cpc3-cpc10
-    cpcdiff=qc_remove_neg(cpcdiff)
-    
-    
-    #%% read in PBLH data from MPL
-    filename_mpl=glob.glob(pblhpath+'sgppblhtmpl1sawyerliC1*'+date[0:8]+'*.nc')
-    # read in data
-    if len(filename_mpl)==1:
-        (time_pblh,timeunit,mpl) = read_pblhtmpl1(filename_mpl[0])
-    elif len(filename_mpl)==0:
-        print('no pblh file in this day. skip...')
-        continue
-    else:
-        raise ValueError('find too many files: ' + filename_mpl)
-    time_pblh=time_pblh/3600
-    
-    #%% choose the same time of DL. get pblh
-    cday0=yyyymmdd2cday(date[0:8])
-    idx_dl = day_dl==cday0
-    time_dl2 = time_dl[idx_dl]/3600
-    mlh_dl2 = mlh_dl[idx_dl]
-    
-    #%% read in Models
-    datam2 = []
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        (timem,heightm,datam,timeunitm,datamunit,datamlongname)=read_extractflight(filename_m,'NCNall')
-        datam2.append(datam*1e-6)    # #/m3 to #/cm3
-    
-    timem = timem/3600
-    
-    if len(timem)!=len(time) or len(time)!=len(time_cpc):
-        raise ValueError('time dimension for obs and/or model are not consistent')
-    
-    #%% get pdf for legs below and above PBLH
-    
-    for ii in range(max(legnum)):
-        idx_leg = [legnum==ii+1]
-        # get the mean pblh for this leg
-        time_leg=time[legnum==ii+1]
-        cflag_leg=cflag[legnum==ii+1]
-        if np.sum(cflag_leg==1)>1: #0.01*len(cflag_leg):
-            continue  # don't use legs with >10% cloud flag
-        
-        idx_dl2 = np.logical_and(time_dl2>=time_leg[0], time_dl2<=time_leg[-1])
-        if idx_dl2.any()==False:
-            idx_dl2 = np.logical_and(time_dl2>=time_leg[0]-2, time_dl2<=time_leg[-1]+2) # extend time range
-        if idx_dl2.any():
-            pblh = np.nanmean(mlh_dl2[idx_dl2])
-        else:# use MPL pblh
-            idx_mpl = np.logical_and(time_pblh>=time_leg[0], time_pblh<time_leg[-1])
-            if any(idx_mpl)==False:
-                idx_mpl=np.logical_and(time_pblh>=time_leg[0]-2, time_pblh<time_leg[-1]+2)
-            pblh=np.mean(mpl[idx_mpl])
-            
-        
-        # average for each legs first
-        hmean = np.mean(height[legnum==ii+1])
-        if hmean<pblh-heightdiff:  # below PBLH
-            pdf_below_obs[:,n_below] = np.nanmean(merge[:,legnum==ii+1],1)
-            cpcdiff_below[n_below] = np.nanmean(cpcdiff[legnum==ii+1])
-            for mm in range(nmodels):
-                pdf_below_model[mm][:,n_below] = np.nanmean(datam2[mm][:,legnum==ii+1],1)
-            n_below=n_below+1
-        elif hmean>pblh+heightdiff:
-            pdf_above_obs[:,n_above] = np.nanmean(merge[:,legnum==ii+1],1)
-            cpcdiff_above[n_above] = np.nanmean(cpcdiff[legnum==ii+1])
-            for mm in range(nmodels):
-                pdf_above_model[mm][:,n_above] = np.nanmean(datam2[mm][:,legnum==ii+1],1)
-            n_above=n_above+1
-            
-            
-#%% change to the pre-defined size bins
-        
-d_model=np.arange(1,3001)
-blen = len(binm)
-p2_below_obs = list()
-p2_above_obs = list()
-p2_above_model = list()
-p2_below_model = list()
-for mm in range(nmodels):
-    p2_above_model.append([])
-    p2_below_model.append([])
-
-for bb in range(blen):
-    idx_m = np.logical_and(d_model>=binl[bb], d_model<=binh[bb])
-    for mm in range(nmodels):
-        data_below = np.nansum(pdf_below_model[mm][idx_m,:],0)
-        data_above = np.nansum(pdf_above_model[mm][idx_m,:],0)
-        # exclude pre-assigned data space that are not used
-        p2_below_model[mm].append(data_below[range(n_below)])
-        p2_above_model[mm].append(data_above[range(n_above)])
-    if bb==0:
-        p2_below_obs.append(cpcdiff_below[~np.isnan(cpcdiff_below)])
-        p2_above_obs.append(cpcdiff_above[~np.isnan(cpcdiff_above)])
-    else:
-        idx_o = np.logical_and(sizel>=binl[bb], sizeh<=binh[bb])
-        if any(idx_o):
-            tmp_below = np.nansum(pdf_below_obs[idx_o,:],0)
-            tmp_above = np.nansum(pdf_above_obs[idx_o,:],0)
-            # exclude not used or not detected (0 value) data
-            p2_below_obs.append(tmp_below[tmp_below!=0])
-            p2_above_obs.append(tmp_above[tmp_above!=0])
-        else:
-            p2_below_obs.append(np.full([n_below],np.nan))
-            p2_above_obs.append(np.full([n_above],np.nan))
-
-#%% change to dN/dlnDp
-# model
-dlnDp=np.empty(3000)
-for bb in range(3000):
-    dlnDp[bb]=np.log((bb+2)/(bb+1))
-for nn in range(n_below):
-    for mm in range(nmodels):
-        pdf_below_model[mm][:,nn]=pdf_below_model[mm][:,nn]/dlnDp
-for nn in range(n_above):
-    for mm in range(nmodels):
-        pdf_above_model[mm][:,nn]=pdf_above_model[mm][:,nn]/dlnDp
-    
-# Obs
-dlnDp=np.empty(len(size))
-for bb in range(len(size)):
-    dlnDp[bb]=np.log(sizeh[bb]/sizel[bb])
-for nn in range(n_below):
-    pdf_below_obs[:,nn]=pdf_below_obs[:,nn]/dlnDp  
-for nn in range(n_above):  
-    pdf_above_obs[:,nn]=pdf_above_obs[:,nn]/dlnDp
-    
-     
-#%% plot entire pdf below and above PBL
-figname = figpath_aircraft_statistics+'SeparatePBLH_pdf_AerosolSize_HISCALE_'+IOP+'.png'
-print('plotting PDF figures to '+figname)
-
-fig,(ax0,ax1) = plt.subplots(2,1,figsize=(8,6))
-idx_v=range(n_above)
-h3=ax0.plot(size,np.nanmean(pdf_above_obs[:,idx_v],1),color='k',linewidth=1,label='Obs')
-for mm in range(nmodels):
-    ax0.plot(np.arange(1,3001),np.nanmean(pdf_above_model[mm][:,idx_v],1),color=color_model[mm],linewidth=1, label=Model_List[mm])
-# ax0.legend(loc='lower center', shadow=False, fontsize='large')
-ax0.tick_params(color='k',labelsize=14)
-ax0.set_xscale('log')
-ax0.set_yscale('log')
-
-idx_v=range(n_below)
-h3=ax1.plot(size,np.nanmean(pdf_below_obs[:,idx_v],1),color='k',linewidth=1,label='Obs')
-for mm in range(nmodels):
-    ax1.plot(np.arange(1,3001),np.nanmean(pdf_below_model[mm][:,idx_v],1),color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax1.legend(loc='lower left', shadow=False, fontsize='large')
-ax1.tick_params(color='k',labelsize=14)
-ax1.set_xscale('log')
-ax1.set_yscale('log')
-
-# ax0.set_xlim(5,4000)
-# ax1.set_xlim(5,4000)
-ax0.set_ylim(1e-3,1e5)
-ax1.set_ylim(1e-3,1e5)
-ax1.set_xlabel('Diameter (nm)',fontsize=14)
-ax0.set_ylabel('aerosol #/dlnDp (cm$^{-3}$)',fontsize=13)
-ax1.set_ylabel('aerosol #/dlnDp (cm$^{-3}$)',fontsize=13)
-ax0.set_title('size distribution for Hi-Scale '+IOP,fontsize=15)
-fig.text(.65,.83,'Above PBL ('+str(n_above)+' legs)',fontsize=12)
-fig.text(.65,.43,'Below PBL ('+str(n_below)+' legs)',fontsize=12)
-# fig.text(.68,.83,'Above PBL ('+format(n_above/n_total*100,'.1f')+'%)',fontsize=12)
-# fig.text(.68,.43,'Below PBL ('+format(n_below/n_total*100,'.1f')+'%)',fontsize=12)
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
-
-#%% plot percentile on sizes
-
-figname = figpath_aircraft_statistics+'SeparatePBLH_percentile_AerosolSize_HISCALE_'+IOP+'.png'
-print('plotting percentile figures to '+figname)
-
-# set position shift so that models and obs are not overlapped
-p_shift = np.arange(nmodels+1)
-p_shift = (p_shift - p_shift.mean())*0.2
-
-fig,(ax0,ax1) = plt.subplots(2,1,figsize=(8,6))
-    
-ax0.boxplot(p2_above_obs,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(blen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax0.boxplot(p2_above_model[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(blen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-
-ax1.boxplot(p2_below_obs,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(blen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax1.boxplot(p2_below_model[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(blen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    
-ax0.tick_params(color='k',labelsize=12)
-ax1.tick_params(color='k',labelsize=14)
-# ax0.set_xscale('log')
-# ax1.set_xscale('log')
-ax0.set_yscale('log')
-ax1.set_yscale('log')
-ax0.set_xlim(-1,blen)
-ax1.set_xlim(-1,blen)
-ax1.set_xlabel('Diameter (nm)',fontsize=14)
-ax0.set_ylabel('aerosol # (cm$^{-3}$)',fontsize=14)
-ax1.set_ylabel('aerosol # (cm$^{-3}$)',fontsize=14)
-ax0.set_title('percentile for Hi-Scale '+IOP,fontsize=15)
-fig.text(.66,.83,'Above PBL ('+str(n_above)+' legs)',fontsize=12)
-fig.text(.66,.43,'Below PBL ('+str(n_below)+' legs)',fontsize=12)
-
-xlabel=[str(binl[x])+'-'+str(binh[x]) for x in range(blen)]
-ax0.set_xticks(range(len(binm)))
-ax0.set_xticklabels(xlabel)
-ax1.set_xticks(range(len(binm)))
-ax1.set_xticklabels(xlabel)
-# ax0.set_yticks([1,3,5,7,9,11,12,13,14,15,16])
-# ax0.set_yticklabels(range(400,4100,400))
-ax0.set_ylim(1e-3,1e5)
-ax1.set_ylim(1e-3,1e5)
-
-# plot temporal lines for label
-ax1.plot([],c='k',label='Obs')
-for mm in range(nmodels):
-    ax1.plot([],c=color_model[mm],label=Model_List[mm])
-ax1.legend(loc='lower left', shadow=False, fontsize='large')
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
\ No newline at end of file
diff --git a/python/plotting/plot_flight_percentile_lat_CCN.py b/python/plotting/plot_flight_percentile_lat_CCN.py
deleted file mode 100644
index 738da39..0000000
--- a/python/plotting/plot_flight_percentile_lat_CCN.py
+++ /dev/null
@@ -1,433 +0,0 @@
-"""
-# plot percentile of aerosol number concentration binned by different latitudes
-# separated by below-cloud, near-cloud and above-cloud
-# for aircraft measurements in CSET or SOCRATES
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import glob
-import os
-import matplotlib.pyplot as plt
-import numpy as np
-from read_aircraft import read_ccn_socrates, read_RF_NCAR
-from read_netcdf import read_extractflight
-from quality_control import qc_remove_neg
-
-#%% settings
-
-plot_method = 'all'  # 'height': separate by height. 'all': all heights below 5km
-
-from settings import campaign, Model_List, color_model, \
-    latbin, E3SM_aircraft_path, figpath_aircraft_statistics
-
-if campaign in ['CSET', 'SOCRATES']:
-    from settings import ccnpath, RFpath
-else:
-    raise ValueError('This code is only for CSET or SOCRATES. check campaign setting: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_statistics):
-    os.makedirs(figpath_aircraft_statistics)
-   
-dlat = latbin[1]-latbin[0]
-latmin = latbin-dlat/2
-latmax = latbin+dlat/2
-latlen = len(latbin)
-    
-nmodels=len(Model_List)
-
-#%% find files for flight information
-
-lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
-lst.sort()
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-alldates = [x.split('_')[-1].split('.')[0] for x in lst]
-
-#%% define variables by latitude bins below, near and above clouds
-    
-ccna_below_lat = []
-ccna_near_lat = []
-ccna_above_lat = []
-ccnb_below_lat = []
-ccnb_near_lat = []
-ccnb_above_lat = []
-for bb in range(latlen):
-    ccna_below_lat.append(np.empty(0))
-    ccna_near_lat.append(np.empty(0))
-    ccna_above_lat.append(np.empty(0))
-    ccnb_below_lat.append(np.empty(0))
-    ccnb_near_lat.append(np.empty(0))
-    ccnb_above_lat.append(np.empty(0))
-    
-ccn3_below_lat = []
-ccn3_near_lat = []
-ccn3_above_lat = []
-ccn5_below_lat = []
-ccn5_near_lat = []
-ccn5_above_lat = []
-for mm in range(nmodels):
-    ccn3_below_lat.append([np.empty(0) for bb in range(latlen)])
-    ccn3_near_lat.append([np.empty(0) for bb in range(latlen)])
-    ccn3_above_lat.append([np.empty(0) for bb in range(latlen)])
-    ccn5_below_lat.append([np.empty(0) for bb in range(latlen)])
-    ccn5_near_lat.append([np.empty(0) for bb in range(latlen)])
-    ccn5_above_lat.append([np.empty(0) for bb in range(latlen)])
-
-print('reading '+format(len(alldates))+' files to calculate the statistics: ')
-
-for date in alldates:
-    print(date)
-    
-    #%% read in Models
-    
-    ccn3=[]
-    ccn5=[]
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-    
-        (timem,heightm,ccn3_tmp,timeunitm,ccn3_unit,ccn3_longname)=read_extractflight(filename_m,'CCN3')
-        (timem,heightm,ccn5_tmp,timeunitm,ccn5_unit,ccn5_longname)=read_extractflight(filename_m,'CCN5')
-        ccn3.append(ccn3_tmp)
-        ccn5.append(ccn5_tmp)
-        
-    # get supersaturation
-    SS3 = ccn3_longname.split('=')[-1]
-    SS5 = ccn5_longname.split('=')[-1]
-       
-    #%% read in observations for CSET and SOCRATES
-    # CSET does not have observed CCN
-    if campaign=='CSET':
-        timea=timem
-        SSa=np.nan*np.empty([len(timem)])
-        ccna=np.nan*np.empty([len(timem)])
-        timeb=timem
-        SSb=np.nan*np.empty([len(timem)])
-        ccnb=np.nan*np.empty([len(timem)])
-        
-    # SOCRATES
-    elif campaign=='SOCRATES':
-        filename_ccn=glob.glob(ccnpath+'CCNscanning_SOCRATES_GV_RF*'+date[0:8]+'_R0.ict')
-        if len(filename_ccn)==1:
-            (data0,ccnlist)=read_ccn_socrates(filename_ccn[0])
-            time_ccn = data0[0,:]
-            ccn = data0[1,:]
-            SS = data0[3,:]
-            ccn=qc_remove_neg(ccn)
-            timea=time_ccn
-            timeb=time_ccn
-            ccna=np.array(ccn)
-            ccnb=np.array(ccn)
-            idxa=np.logical_and(SS>0.05, SS<0.15)
-            ccna[idxa==False]=np.nan
-            SSa=np.full((len(timea)),0.1)
-            idxb=np.logical_and(SS>0.45, SS<0.55)
-            ccnb[idxb==False]=np.nan
-            SSb=np.full((len(timeb)),0.5)
-        elif len(filename_ccn)==0:
-            timea=timem
-            SSa=np.nan*np.empty([len(timem)])
-            ccna=np.nan*np.empty([len(timem)])
-            timeb=timem
-            SSb=np.nan*np.empty([len(timem)])
-            ccnb=np.nan*np.empty([len(timem)])
-        else:
-            raise ValueError('find too many files: ' + filename_ccn)
-            
-    if any(timea!=timeb):
-        raise ValueError('inconsitent time dimension')
-        
-    
-    # need latitude from RF file
-    lst = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
-    if len(lst)==1 or len(lst)==2:  # SOCRATES has two flights in 20180217, choose the later one
-        filename=lst[-1]
-    else:
-        raise ValueError('find no file or too many files: ' + lst)
-    (time,lat,timeunit,latunit,latlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LAT')
-    
-    # exclude NaNs
-    idx = np.logical_or(~np.isnan(ccna), ~np.isnan(ccnb))
-    ccna=ccna[idx]
-    ccnb=ccnb[idx]
-    SSa=SSa[idx]
-    SSb=SSb[idx]
-    
-    # for interpolation of model results
-    timea=timea[idx]
-    timeb=timeb[idx]
-    time=timea
-    # interpolate model results into observational time
-    for mm in range(nmodels):
-        ccn3[mm] = (np.interp(timea,timem,ccn3[mm])) 
-        ccn5[mm] = (np.interp(timeb,timem,ccn5[mm])) 
-    height = np.interp(timeb,timem,heightm)
-    lat = np.interp(timeb,timem,lat)
-        
-        
-    #%% separate data by cloud or height
-    flag_below = np.zeros(len(time))
-    flag_near = np.zeros(len(time))
-    flag_above = np.zeros(len(time))
-    
-    if plot_method == 'height':
-        for ii in range(len(time)):
-            if height[ii]>5000:
-                continue   # exclude measurements above 5km
-            elif height[ii]<2000:
-                flag_below[ii]=1
-            elif height[ii]>=2000:
-                flag_above[ii]=1
-                    
-    # option 3: use all heights below 5km
-    elif plot_method == 'all':
-        for ii in range(len(time)):
-            if height[ii]<=5000: # exclude measurements above 5km
-                flag_below[ii]=1
-                    
-    for bb in range(latlen):
-        idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
-        if any(flag_below[idx]==1):
-            ccna_below_lat[bb] = np.hstack((ccna_below_lat[bb], ccna[idx][flag_below[idx]==1]))
-            ccnb_below_lat[bb] = np.hstack((ccnb_below_lat[bb], ccnb[idx][flag_below[idx]==1]))
-            for mm in range(nmodels):
-                ccn3_below_lat[mm][bb] = np.hstack((ccn3_below_lat[mm][bb], ccn3[mm][idx][flag_below[idx]==1]))
-                ccn5_below_lat[mm][bb] = np.hstack((ccn5_below_lat[mm][bb], ccn5[mm][idx][flag_below[idx]==1]))
-        if any(flag_near[idx]==1):
-            ccna_near_lat[bb] = np.hstack((ccna_near_lat[bb], ccna[idx][flag_near[idx]==1]))
-            ccnb_near_lat[bb] = np.hstack((ccnb_near_lat[bb], ccnb[idx][flag_near[idx]==1]))
-            for mm in range(nmodels):
-                ccn3_near_lat[mm][bb] = np.hstack((ccn3_near_lat[mm][bb], ccn3[mm][idx][flag_near[idx]==1]))
-                ccn5_near_lat[mm][bb] = np.hstack((ccn5_near_lat[mm][bb], ccn5[mm][idx][flag_near[idx]==1]))
-        if any(flag_above[idx]==1):
-            ccna_above_lat[bb] = np.hstack((ccna_above_lat[bb], ccna[idx][flag_above[idx]==1]))
-            ccnb_above_lat[bb] = np.hstack((ccnb_above_lat[bb], ccnb[idx][flag_above[idx]==1]))
-            for mm in range(nmodels):
-                ccn3_above_lat[mm][bb] = np.hstack((ccn3_above_lat[mm][bb], ccn3[mm][idx][flag_above[idx]==1]))
-                ccn5_above_lat[mm][bb] = np.hstack((ccn5_above_lat[mm][bb], ccn5[mm][idx][flag_above[idx]==1]))
-          
-#%% remove nan elements in the observations
-for bb in range(latlen):
-    ccna=ccna_below_lat[bb]
-    ccna_below_lat[bb] = ccna[~np.isnan(ccna)]
-    ccnb=ccnb_below_lat[bb]
-    ccnb_below_lat[bb] = ccnb[~np.isnan(ccnb)]
-    ccna=ccna_near_lat[bb]
-    ccna_near_lat[bb] = ccna[~np.isnan(ccna)]
-    ccnb=ccnb_near_lat[bb]
-    ccnb_near_lat[bb] = ccnb[~np.isnan(ccnb)]
-    ccna=ccna_above_lat[bb]
-    ccna_above_lat[bb] = ccna[~np.isnan(ccna)]
-    ccnb=ccnb_above_lat[bb]
-    ccnb_above_lat[bb] = ccnb[~np.isnan(ccnb)]
-
-
-#%% make plot
-# set position shift so that models and obs are not overlapped
-p_shift = np.arange(nmodels+1)
-p_shift = (p_shift - p_shift.mean())*0.2
-
-#%% plot separate by height
-if plot_method == 'height':
-    # for ccna 
-    figname = figpath_aircraft_statistics+'percentile_lat_CCN3_byheight_'+campaign+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax3) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-        
-    ax1.boxplot(ccna_above_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax1.boxplot(ccn3_above_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax1.tick_params(color='k',labelsize=15)
-    # ax1.set_yscale('log')
-    ax1.set_xlim(-1,latlen)
-    ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    ax1.set_ylim(-10,50)
-    ax1.plot([],c='k',label='OBS')
-    for mm in range(nmodels):
-        ax1.plot([],c=color_model[mm],label=Model_List[mm])
-        
-    ax3.boxplot(ccna_below_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax3.boxplot(ccn3_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax3.tick_params(color='k',labelsize=15)
-    # ax3.set_yscale('log')
-    ax3.set_xlim(-1,latlen)
-    ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    ax3.set_ylim(-10,100)
-    # plot temporal lines for label
-    ax3.plot([],c='k',label='OBS')
-    for mm in range(nmodels):
-        ax3.plot([],c=color_model[mm],label=Model_List[mm])
-    
-    ax3.set_xlabel('Latitude',fontsize=16)
-    
-    ax1.set_xticklabels([])
-    ax3.set_xticklabels([])
-    ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-    ax1.set_title('Percentile of CCN (SS='+format(np.nanmean(SSa),'.2f')+'%) # (cm$^{-3}$) '+campaign,fontsize=17)
-    fig.text(0.1,0.9,'2-5km',fontsize=15)
-    fig.text(0.1,0.4,'0-2km',fontsize=15)
-    
-    ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-    
-    # plot for ccnb (SS=0.5%)
-    figname = figpath_aircraft_statistics+'percentile_lat_CCN5_byheight_'+campaign+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax3) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-        
-    ax1.boxplot(ccnb_above_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax1.boxplot(ccn5_above_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax1.tick_params(color='k',labelsize=15)
-    # ax1.set_yscale('log')
-    ax1.set_xlim(-1,latlen)
-    ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    ax1.plot([],c='k',label='OBS')
-    for mm in range(nmodels):
-        ax1.plot([],c=color_model[mm],label=Model_List[mm])
-        
-    ax3.boxplot(ccnb_below_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax3.boxplot(ccn5_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax3.tick_params(color='k',labelsize=15)
-    # ax3.set_yscale('log')
-    ax3.set_xlim(-1,latlen)
-    ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    # ax3.set_ylim(-10,400)
-    # plot temporal lines for label
-    ax3.plot([],c='k',label='OBS')
-    for mm in range(nmodels):
-        ax3.plot([],c=color_model[mm],label=Model_List[mm])
-    
-    ax3.set_xlabel('Latitude',fontsize=16)
-    
-    ax1.set_xticklabels([])
-    ax3.set_xticklabels([])
-    ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-    ax1.set_title('Percentile of CCN (SS='+format(np.nanmean(SSb),'.2f')+'%) # (cm$^{-3}$) '+campaign,fontsize=17)
-    fig.text(0.1,0.9,'2-5km',fontsize=15)
-    fig.text(0.1,0.4,'0-2km',fontsize=15)
-    
-    ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-#%%    
-
-elif plot_method == 'all':
-    #%% for OBS
-    figname = figpath_aircraft_statistics+'percentile_lat_CCN_'+campaign+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax3) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-        
-    ax1.boxplot(ccna_below_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax1.boxplot(ccn3_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax1.tick_params(color='k',labelsize=15)
-    # ax1.set_yscale('log')
-    ax1.set_xlim(-1,latlen)
-    ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    if campaign=='SOCRATES':
-        ax1.set_ylim(-10,200)
-    elif campaign=='CSET':
-        ax1.set_ylim(-10,200)
-    ax1.plot([],c='k',label='OBS')
-    for mm in range(nmodels):
-        ax1.plot([],c=color_model[mm],label=Model_List[mm])
-        
-    ax3.boxplot(ccnb_below_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax3.boxplot(ccn5_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax3.tick_params(color='k',labelsize=15)
-    # ax3.set_yscale('log')
-    ax3.set_xlim(-1,latlen)
-    ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    if campaign=='SOCRATES':
-        ax3.set_ylim(-10,500)
-    elif campaign=='CSET':
-        ax3.set_ylim(-10,500)
-    # plot temporal lines for label
-    ax3.plot([],c='k',label='OBS')
-    for mm in range(nmodels):
-        ax3.plot([],c=color_model[mm],label=Model_List[mm])
-    
-    ax3.set_xlabel('Latitude',fontsize=16)
-    
-    ax1.set_xticklabels([])
-    ax3.set_xticklabels([])
-    ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-    ax1.set_title('Percentile of CCN # (cm$^{-3}$) '+campaign,fontsize=17)
-    
-    ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
-    ax3.legend(loc='upper right', shadow=False, fontsize='x-large')
-    fig.text(0.1,0.9,'SS='+format(np.nanmean(SSa),'.2f')+'%',fontsize=15)
-    fig.text(0.1,0.4,'SS='+format(np.nanmean(SSb),'.2f')+'%',fontsize=15)
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-#%%
-else:
-    raise ValueError('does not recognize plot_method: '+plot_method)
-    
-   
\ No newline at end of file
diff --git a/python/plotting/plot_flight_percentile_lat_CN.py b/python/plotting/plot_flight_percentile_lat_CN.py
deleted file mode 100644
index e2a8e5e..0000000
--- a/python/plotting/plot_flight_percentile_lat_CN.py
+++ /dev/null
@@ -1,605 +0,0 @@
-"""
-# plot percentile of aerosol number concentration binned by different latitudes
-# separated by below-cloud, near-cloud and above-cloud
-# for aircraft measurements in CSET or SOCRATES
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_aircraft import read_RF_NCAR
-from read_netcdf import read_extractflight
-from specific_data_treatment import lwc2cflag
-from quality_control import qc_mask_takeoff_landing,qc_remove_neg,qc_mask_cloudflag
-
-#%% settings
-
-plot_method = 'all'  # 'cloud': separate by cloud. 'height': separate by height. 'all': all heights below 5km
-
-from settings import campaign, Model_List, color_model,  \
-    latbin, E3SM_aircraft_path, figpath_aircraft_statistics
-
-if campaign in ['CSET', 'SOCRATES']:
-    from settings import RFpath
-else:
-    raise ValueError('This code is only for CSET or SOCRATES. check campaign setting: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_statistics):
-    os.makedirs(figpath_aircraft_statistics)
-    
-dlat = latbin[1]-latbin[0]
-latmin = latbin-dlat/2
-latmax = latbin+dlat/2
-latlen = len(latbin)
-    
-nmodels=len(Model_List)
-
-#%% find files for flight information
-
-lst = glob.glob(E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[0]+'_*.nc')
-lst.sort()
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-alldates = [x.split('_')[-1].split('.')[0] for x in lst]
-
-
-#%% define variables by latitude bins below, near and above clouds
-    
-cpc_below_lat = []
-cpc_near_lat = []
-cpc_above_lat = []
-uhsas_below_lat = []
-uhsas_near_lat = []
-uhsas_above_lat = []
-for bb in range(latlen):
-    cpc_below_lat.append(np.empty(0))
-    cpc_near_lat.append(np.empty(0))
-    cpc_above_lat.append(np.empty(0))
-    uhsas_below_lat.append(np.empty(0))
-    uhsas_near_lat.append(np.empty(0))
-    uhsas_above_lat.append(np.empty(0))
-    
-ncn10_below_lat = []
-ncn10_near_lat = []
-ncn10_above_lat = []
-ncn100_below_lat = []
-ncn100_near_lat = []
-ncn100_above_lat = []
-for mm in range(nmodels):
-    ncn10_below_lat.append([np.empty(0) for bb in range(latlen)])
-    ncn10_near_lat.append([np.empty(0) for bb in range(latlen)])
-    ncn10_above_lat.append([np.empty(0) for bb in range(latlen)])
-    ncn100_below_lat.append([np.empty(0) for bb in range(latlen)])
-    ncn100_near_lat.append([np.empty(0) for bb in range(latlen)])
-    ncn100_above_lat.append([np.empty(0) for bb in range(latlen)])
-
-print('reading '+format(len(alldates))+' files to calculate the statistics: ')
-
-for date in alldates:
-    print(date)
-    
-    #%% read in Models
-    cpc100_m = []
-    cpc10_m = []
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-    
-        (timem,heightm,cpc_m,timeunitm,ncn_unit,ncn_longname)=read_extractflight(filename_m,'NCN')
-        (timem,heightm,ncnall,timeunitm,ncnall_unit,ncnall_longname)=read_extractflight(filename_m,'NCNall')
-        
-        cpc100_m.append(np.sum(ncnall[100:,:],0)*1e-6) # #/m3 to #/cm3
-        cpc10_m.append(cpc_m*1e-6)    # #/m3 to #/cm3
-    
-    #%% read in observations
-    # note that it is only for CSET and SOCRATES
-    lst = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
-    if len(lst)==1 or len(lst)==2:  # SOCRATES has two flights in 20180217, choose the later one
-        filename=lst[-1]
-    else:
-        raise ValueError('find no file or too many files: '+lst)
-    (time,height,timeunit,hunit,hlongname,cellsize,cellunit)=read_RF_NCAR(filename,'ALT')
-    (time,lat,timeunit,latunit,latlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LAT')
-    (time,lwc,timeunit,lwcunit,lwclongname,cellsize,cellunit)=read_RF_NCAR(filename,'PLWCC')
-    (time,cpc10,timeunit,cpc10unit,cpc10longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCN')
-    if campaign=='CSET':
-        (time,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCU100_RWOOU')
-        # (time,uhsas500,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCU500_RWOOU')
-    elif campaign=='SOCRATES':
-        # there are two variables: CONCU100_CVIU and CONCU100_LWII
-        (time,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCU100_LWII')
-        # (time,uhsas500,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCU500_LWII')
-    
-    # exclude 30min after takeoff and before landing
-    height=qc_mask_takeoff_landing(time,height)
-    lat=qc_mask_takeoff_landing(time,lat)
-    lwc=qc_mask_takeoff_landing(time,lwc)
-    cpc10=qc_mask_takeoff_landing(time,cpc10)
-    uhsas100=qc_mask_takeoff_landing(time,uhsas100)
-    timem=qc_mask_takeoff_landing(time,timem)
-    for mm in range(nmodels):
-        cpc10_m[mm]=qc_mask_takeoff_landing(time,cpc10_m[mm])
-        cpc100_m[mm]=qc_mask_takeoff_landing(time,cpc100_m[mm])
-    
-    # calculate cloud flag based on LWC
-    cldflag=lwc2cflag(lwc,lwcunit)
-    
-    cpc10 = qc_mask_cloudflag(cpc10,cldflag)
-    cpc10 = qc_remove_neg(cpc10)
-    uhsas100 = qc_mask_cloudflag(uhsas100,cldflag)
-    uhsas100 = qc_remove_neg(uhsas100)
-    
-    # if min(lat)<28:
-    #     print(np.nanmax(uhsas100[np.logical_and(lat>25,lat<28)]))
-    
-    #%% separate data by cloud or height
-    flag_below = np.zeros(len(time))
-    flag_near = np.zeros(len(time))
-    flag_above = np.zeros(len(time))
-    
-    # option 1: separate data by cloud and put in each latitude bin
-    if plot_method == 'cloud':
-        for ii in range(len(time)):
-            if height[ii]>5000:
-                continue   # exclude measurements above 5km
-            # check if  there is cloud within 1hr window
-            i_start = max(ii-1800, 0)
-            i_end = min(ii+1800, len(time))
-            if any(cldflag[i_start:i_end]==1):
-                cheight=height[i_start:i_end][cldflag[i_start:i_end]==1]
-                cldmax = np.max(cheight)
-                cldmin = np.min(cheight)
-                if height[ii]<min(cldmin,2000):
-                    flag_below[ii]=1
-                elif height[ii]>=cldmin and height[ii]<=cldmax:
-                    flag_near[ii]=1
-                elif height[ii]>max(cldmax,1000):
-                    flag_above[ii]=1
-    
-    # option 2: separate data by height
-    elif plot_method == 'height':
-        for ii in range(len(time)):
-            if height[ii]>5000:
-                continue   # exclude measurements above 5km
-            # check if  there is cloud within 1hr window
-            i_start = max(ii-1800, 0)
-            i_end = min(ii+1800, len(time))
-            if any(cldflag[i_start:i_end]==1):
-                cheight=height[i_start:i_end][cldflag[i_start:i_end]==1]
-                cldmax = np.max(cheight)
-                cldmin = np.min(cheight)
-                if height[ii]<min(cldmin,2000):
-                    flag_below[ii]=1
-                elif height[ii]>max(cldmax,2000):
-                    flag_above[ii]=1
-            else:
-                if height[ii]<2000:
-                    flag_below[ii]=1
-                elif height[ii]>=2000:
-                    flag_above[ii]=1
-                    
-    # option 3: use all heights below 5km
-    elif plot_method == 'all':
-        for ii in range(len(time)):
-            if height[ii]<=5000: # exclude measurements above 5km
-                flag_below[ii]=1
-                    
-    for bb in range(latlen):
-        idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
-        if any(flag_below[idx]==1):
-            cpc_below_lat[bb] = np.hstack((cpc_below_lat[bb], cpc10[idx][flag_below[idx]==1]))
-            uhsas_below_lat[bb] = np.hstack((uhsas_below_lat[bb], uhsas100[idx][flag_below[idx]==1]))
-            for mm in range(nmodels):
-                ncn10_below_lat[mm][bb] = np.hstack((ncn10_below_lat[mm][bb], cpc10_m[mm][idx][flag_below[idx]==1]))
-                ncn100_below_lat[mm][bb] = np.hstack((ncn100_below_lat[mm][bb], cpc100_m[mm][idx][flag_below[idx]==1]))
-        if any(flag_near[idx]==1):
-            cpc_near_lat[bb] = np.hstack((cpc_near_lat[bb], cpc10[idx][flag_near[idx]==1]))
-            uhsas_near_lat[bb] = np.hstack((uhsas_near_lat[bb], uhsas100[idx][flag_near[idx]==1]))
-            for mm in range(nmodels):
-                ncn10_near_lat[mm][bb] = np.hstack((ncn10_near_lat[mm][bb], cpc10_m[mm][idx][flag_near[idx]==1]))
-                ncn100_near_lat[mm][bb] = np.hstack((ncn100_near_lat[mm][bb], cpc100_m[mm][idx][flag_near[idx]==1]))
-        if any(flag_above[idx]==1):
-            cpc_above_lat[bb] = np.hstack((cpc_above_lat[bb], cpc10[idx][flag_above[idx]==1]))
-            uhsas_above_lat[bb] = np.hstack((uhsas_above_lat[bb], uhsas100[idx][flag_above[idx]==1]))
-            for mm in range(nmodels):
-                ncn10_above_lat[mm][bb] = np.hstack((ncn10_above_lat[mm][bb], cpc10_m[mm][idx][flag_above[idx]==1]))
-                ncn100_above_lat[mm][bb] = np.hstack((ncn100_above_lat[mm][bb], cpc100_m[mm][idx][flag_above[idx]==1]))
-          
-#%% remove nan elements in the observations
-for bb in range(latlen):
-    cpc=cpc_below_lat[bb]
-    cpc_below_lat[bb] = cpc[~np.isnan(cpc)]
-    uhsas=uhsas_below_lat[bb]
-    uhsas_below_lat[bb] = uhsas[~np.isnan(uhsas)]
-    cpc=cpc_near_lat[bb]
-    cpc_near_lat[bb] = cpc[~np.isnan(cpc)]
-    uhsas=uhsas_near_lat[bb]
-    uhsas_near_lat[bb] = uhsas[~np.isnan(uhsas)]
-    cpc=cpc_above_lat[bb]
-    cpc_above_lat[bb] = cpc[~np.isnan(cpc)]
-    uhsas=uhsas_above_lat[bb]
-    uhsas_above_lat[bb] = uhsas[~np.isnan(uhsas)]
-
-
-#%% make plot
-# set position shift so that models and obs are not overlapped
-p_shift = np.arange(nmodels+1)
-p_shift = (p_shift - p_shift.mean())*0.2
-
-#%% plot separate by cloud
-if plot_method == 'cloud':
-    #%% for CPC (>10nm)
-    figname = figpath_aircraft_statistics+'percentile_lat_CN10nm_bycldheight_'+campaign+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax2,ax3) = plt.subplots(3,1,figsize=(8,6))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-        
-    ax1.boxplot(cpc_above_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax1.boxplot(ncn10_above_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax1.tick_params(color='k',labelsize=15)
-    # ax1.set_yscale('log')
-    ax1.set_xlim(-1,latlen)
-    ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    ax1.plot([],c='k',label='CPC')
-    for mm in range(nmodels):
-        ax1.plot([],c=color_model[mm],label=Model_List[mm])
-        
-    ax2.boxplot(cpc_near_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax2.boxplot(ncn10_near_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax2.tick_params(color='k',labelsize=15)
-    # ax2.set_yscale('log')
-    ax2.set_xlim(-1,latlen)
-    ax2.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    ax2.plot([],c='k',label='CPC')
-    for mm in range(nmodels):
-        ax2.plot([],c=color_model[mm],label=Model_List[mm])
-        
-    ax3.boxplot(cpc_below_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax3.boxplot(ncn10_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax3.tick_params(color='k',labelsize=15)
-    # ax3.set_yscale('log')
-    ax3.set_xlim(-1,latlen)
-    ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    # plot temporal lines for label
-    ax3.plot([],c='k',label='CPC')
-    for mm in range(nmodels):
-        ax3.plot([],c=color_model[mm],label=Model_List[mm])
-    
-    ax3.set_xlabel('Latitude',fontsize=16)
-    
-    ax1.set_xticklabels([])
-    ax2.set_xticklabels([])
-    ax3.set_xticklabels([])
-    ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-    ax1.set_title('Percentile of CN (>10nm) # (cm$^{-3}$) '+campaign,fontsize=17)
-    fig.text(0.1,0.95,'Above Clouds',fontsize=15)
-    fig.text(0.1,0.6,'Near Clouds',fontsize=15)
-    fig.text(0.1,0.25,'Below Cloud',fontsize=15)
-    
-    ax2.legend(loc='upper right', shadow=False, fontsize='x-large')
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-    
-    #%% plot for UHSAS (>100nm)
-    figname = figpath_aircraft_statistics+'percentile_lat_CN100nm_bycldheight_'+campaign+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax2,ax3) = plt.subplots(3,1,figsize=(8,6))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-        
-    ax1.boxplot(uhsas_above_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax1.boxplot(ncn100_above_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax1.tick_params(color='k',labelsize=15)
-    # ax1.set_yscale('log')
-    ax1.set_xlim(-1,latlen)
-    ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    ax1.plot([],c='k',label='UHSAS100')
-    for mm in range(nmodels):
-        ax1.plot([],c=color_model[mm],label=Model_List[mm])
-        
-    ax2.boxplot(uhsas_near_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax2.boxplot(ncn100_near_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax2.tick_params(color='k',labelsize=15)
-    # ax2.set_yscale('log')
-    ax2.set_xlim(-1,latlen)
-    ax2.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    ax2.plot([],c='k',label='UHSAS100')
-    for mm in range(nmodels):
-        ax2.plot([],c=color_model[mm],label=Model_List[mm])
-        
-    ax3.boxplot(uhsas_below_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax3.boxplot(ncn100_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax3.tick_params(color='k',labelsize=15)
-    # ax3.set_yscale('log')
-    ax3.set_xlim(-1,latlen)
-    ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    # plot temporal lines for label
-    ax3.plot([],c='k',label='UHSAS100')
-    for mm in range(nmodels):
-        ax3.plot([],c=color_model[mm],label=Model_List[mm])
-    
-    ax3.set_xlabel('Latitude',fontsize=16)
-    
-    ax1.set_xticklabels([])
-    ax2.set_xticklabels([])
-    ax3.set_xticklabels([])
-    ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-    ax1.set_title('Percentile of CN (>100nm) # (cm$^{-3}$) '+campaign,fontsize=17)
-    fig.text(0.1,0.95,'Above Clouds',fontsize=15)
-    fig.text(0.1,0.6,'Near Clouds',fontsize=15)
-    fig.text(0.1,0.25,'Below Cloud',fontsize=15)
-    
-    ax2.legend(loc='upper right', shadow=False, fontsize='x-large')
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-
-elif plot_method == 'height':
-    #%% for CPC (>10nm)
-    figname = figpath_aircraft_statistics+'percentile_lat_CN10nm_byheight_'+campaign+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax3) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-        
-    ax1.boxplot(cpc_above_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax1.boxplot(ncn10_above_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax1.tick_params(color='k',labelsize=15)
-    # ax1.set_yscale('log')
-    ax1.set_xlim(-1,latlen)
-    ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    if campaign=='SOCRATES':
-        ax1.set_ylim(-100,4000)
-    elif campaign=='CSET':
-        ax1.set_ylim(-20,1200)
-    ax1.plot([],c='k',label='CPC')
-    for mm in range(nmodels):
-        ax1.plot([],c=color_model[mm],label=Model_List[mm])
-        
-    ax3.boxplot(cpc_below_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax3.boxplot(ncn10_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax3.tick_params(color='k',labelsize=15)
-    # ax3.set_yscale('log')
-    ax3.set_xlim(-1,latlen)
-    ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    if campaign=='SOCRATES':
-        ax3.set_ylim(-100,2000)
-    elif campaign=='CSET':
-        ax3.set_ylim(-50,4000)
-    # plot temporal lines for label
-    ax3.plot([],c='k',label='CPC')
-    for mm in range(nmodels):
-        ax3.plot([],c=color_model[mm],label=Model_List[mm])
-    
-    ax3.set_xlabel('Latitude',fontsize=16)
-    
-    ax1.set_xticklabels([])
-    ax3.set_xticklabels([])
-    ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-    ax1.set_title('Percentile of CN (>10nm) # (cm$^{-3}$) '+campaign,fontsize=17)
-    fig.text(0.1,0.9,'2-5km',fontsize=15)
-    fig.text(0.1,0.4,'0-2km',fontsize=15)
-    
-    ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-    
-    #%% plot for UHSAS (>100nm)
-    figname = figpath_aircraft_statistics+'percentile_lat_CN100nm_byheight_'+campaign+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax3) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-        
-    ax1.boxplot(uhsas_above_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax1.boxplot(ncn100_above_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax1.tick_params(color='k',labelsize=15)
-    # ax1.set_yscale('log')
-    ax1.set_xlim(-1,latlen)
-    ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    ax1.plot([],c='k',label='UHSAS100')
-    for mm in range(nmodels):
-        ax1.plot([],c=color_model[mm],label=Model_List[mm])
-        
-    ax3.boxplot(uhsas_below_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax3.boxplot(ncn100_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax3.tick_params(color='k',labelsize=15)
-    # ax3.set_yscale('log')
-    ax3.set_xlim(-1,latlen)
-    ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    ax3.set_ylim(-10,400)
-    # plot temporal lines for label
-    ax3.plot([],c='k',label='UHSAS100')
-    for mm in range(nmodels):
-        ax3.plot([],c=color_model[mm],label=Model_List[mm])
-    
-    ax3.set_xlabel('Latitude',fontsize=16)
-    
-    ax1.set_xticklabels([])
-    ax3.set_xticklabels([])
-    ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-    ax1.set_title('Percentile of CN (>100nm) # (cm$^{-3}$) '+campaign,fontsize=17)
-    fig.text(0.1,0.9,'2-5km',fontsize=15)
-    fig.text(0.1,0.4,'0-2km',fontsize=15)
-    
-    ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-#%%    
-
-elif plot_method == 'all':
-    #%% for CPC (>10nm)
-    figname = figpath_aircraft_statistics+'percentile_lat_CN_'+campaign+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax3) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-        
-    ax1.boxplot(cpc_below_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax1.boxplot(ncn10_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax1.tick_params(color='k',labelsize=15)
-    # ax1.set_yscale('log')
-    ax1.set_xlim(-1,latlen)
-    ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    if campaign=='SOCRATES':
-        ax1.set_ylim(-100,4000)
-    elif campaign=='CSET':
-        ax1.set_ylim(-20,2500)
-    ax1.plot([],c='k',label='CPC')
-    for mm in range(nmodels):
-        ax1.plot([],c=color_model[mm],label=Model_List[mm])
-        
-    ax3.boxplot(uhsas_below_lat,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax3.boxplot(ncn100_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=True, patch_artist=True)    # need patch_artist to fill color in box
-    ax3.tick_params(color='k',labelsize=15)
-    # ax3.set_yscale('log')
-    ax3.set_xlim(-1,latlen)
-    ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-    if campaign=='SOCRATES':
-        ax3.set_ylim(-10,400)
-    elif campaign=='CSET':
-        ax3.set_ylim(-10,1000)
-    # plot temporal lines for label
-    ax3.plot([],c='k',label='UHSAS100')
-    for mm in range(nmodels):
-        ax3.plot([],c=color_model[mm],label=Model_List[mm])
-    
-    ax3.set_xlabel('Latitude',fontsize=16)
-    
-    ax1.set_xticklabels([])
-    ax3.set_xticklabels([])
-    ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-    ax1.set_title('Percentile of CN # (cm$^{-3}$) '+campaign,fontsize=17)
-    
-    ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
-    ax3.legend(loc='upper right', shadow=False, fontsize='x-large')
-    fig.text(0.1,0.9,'>10nm',fontsize=15)
-    fig.text(0.1,0.4,'>100nm',fontsize=15)
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-#%%
-else:
-    raise ValueError('does not recognize plot_method: '+plot_method)
\ No newline at end of file
diff --git a/python/plotting/plot_flight_percentile_lat_cldfreq.py b/python/plotting/plot_flight_percentile_lat_cldfreq.py
deleted file mode 100644
index 01d0051..0000000
--- a/python/plotting/plot_flight_percentile_lat_cldfreq.py
+++ /dev/null
@@ -1,164 +0,0 @@
-"""# plot percentile of meteorological variables binned by different latitudes
-# for aircraft measurements in CSET or SOCRATES
-# only select a certain height ranges for warm clouds (the height range needs to be further tuned)
-"""
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_aircraft import read_RF_NCAR
-from read_netcdf import read_extractflight
-from specific_data_treatment import lwc2cflag
-from quality_control import qc_mask_takeoff_landing
-
-#%% settings
-
-from settings import campaign, Model_List, color_model,  \
-    latbin, E3SM_aircraft_path, figpath_aircraft_statistics
-
-if campaign in ['CSET', 'SOCRATES']:
-    from settings import RFpath
-else:
-    raise ValueError('This code is only for CSET or SOCRATES. check campaign setting: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_statistics):
-    os.makedirs(figpath_aircraft_statistics)
-    
-dlat = latbin[1]-latbin[0]
-latmin = latbin-dlat/2
-latmax = latbin+dlat/2
-latlen = len(latbin)
-    
-nmodels=len(Model_List)
-
-#%% find files for flight information
-
-lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
-lst.sort()
-if len(lst)==0:
-    raise ValueError('cannot fine any file')
-alldates = [x.split('_')[-1].split('.')[0] for x in lst]
-
-
-#%% define variables by latitude bins
-    
-height_lat = []
-cbheight = []         # cloud base height
-cflag_lat = []
-cloudo_lat = []        # cloud fraction by flag
-
-for bb in range(latlen):
-    height_lat.append(np.empty(0))
-    cbheight.append(np.empty(0))
-    cflag_lat.append(np.empty(0))
-    cloudo_lat.append(np.empty(0))
-    
-cloudm_lat = []
-for mm in range(nmodels):
-    cloudm_lat.append(list(cloudo_lat))
-
-print('reading '+format(len(alldates))+' files to calculate the statistics: ')
-
-for date in alldates:
-    print(date)
-    
-    #%% read in Models
-    cloudm = []
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-    
-        (timem,heightm,cloud,timeunitm,clunit,cllongname)=read_extractflight(filename_m,'CLOUD')
-        cloudm.append(cloud)   
-    
-    #%% read in observations
-    # note that it is only for CSET and SOCRATES
-    lst = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
-    if len(lst)==1 or len(lst)==2:  # SOCRATES has two flights in 20180217, choose the later one
-        filename=lst[-1]
-    else:
-        raise ValueError('find no file or too many files: '+lst)
-    (time,height,timeunit,hunit,hlongname,cellsize,cellunit)=read_RF_NCAR(filename,'ALT')
-    (time,lat,timeunit,latunit,latlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LAT')
-    (time,lon,timeunit,lonunit,lonlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LON')
-    (time,lwc,timeunit,lwcunit,lwclongname,cellsize,cellunit)=read_RF_NCAR(filename,'PLWCC')
-    
-    # exclude 30min after takeoff and before landing
-    height=qc_mask_takeoff_landing(time,height)
-    lat=qc_mask_takeoff_landing(time,lat)
-    lon=qc_mask_takeoff_landing(time,lon)
-    lwc=qc_mask_takeoff_landing(time,lwc)
-    timem=qc_mask_takeoff_landing(time,timem)
-    for mm in range(nmodels):
-        cloudm[mm]=qc_mask_takeoff_landing(time,cloudm[mm])
-    
-    # calculate cloud flag based on LWC
-    cldflag=lwc2cflag(lwc,lwcunit)
-    
-        
-    #%% put data in each latitude bin
-    for bb in range(latlen):
-        idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
-        height2 = height[idx]
-        cldflag2 = cldflag[idx]
-        
-        # set specific height range
-        idx2 = height2<5000
-        
-        if any(cldflag2==1):
-            cbheight[bb] = np.hstack((cbheight[bb],min(height2[cldflag2==1])))
-        
-        if len(idx2)!=0:
-            height_lat[bb] = np.hstack((height_lat[bb],height2[idx2]))
-            cflag_lat[bb] = np.hstack((cflag_lat[bb],cldflag2[idx2]))
-            cloudo_lat[bb] = np.hstack((cloudo_lat[bb],sum(cldflag2)/len(cldflag2)))
-            for mm in range(nmodels):
-                cld_temp = np.nanmean(cloudm[mm][idx][idx2])
-                if ~np.isnan(cld_temp):
-                    cloudm_lat[mm][bb] = np.hstack((cloudm_lat[mm][bb], cld_temp))
-    
-#%% make plot
-# set position shift so that models and obs are not overlapped
-p_shift = np.arange(nmodels+1)
-p_shift = (p_shift - p_shift.mean())*0.2
-
-    
-figname = figpath_aircraft_statistics+'percentile_lat_CldFreq_'+campaign+'.png'
-print('plotting figures to '+figname)
-
-fig,ax = plt.subplots(1,1,figsize=(8,2))   # figsize in inches
-plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-    
-ax.boxplot(cloudo_lat,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax.boxplot(cloudm_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-ax.tick_params(color='k',labelsize=15)
-# ax.set_yscale('log')
-ax.set_xlim(-1,latlen)
-ax.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-# plot temporal lines for label
-ax.plot([],c='k',label='OBS')
-for mm in range(nmodels):
-    ax.plot([],c=color_model[mm],label=Model_List[mm])
-
-ax.set_xlabel('Latitude',fontsize=16)
-
-ax.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-ax.set_title('Cloud Fraction (Fraction)',fontsize=17)
-# ax4.set_title(varmlongname[3]+' ('+varmunit[3]+')',fontsize=15)
-
-ax.legend(loc='upper right', shadow=False, fontsize='x-large')
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
\ No newline at end of file
diff --git a/python/plotting/plot_flight_percentile_z_AerosolComposition.py b/python/plotting/plot_flight_percentile_z_AerosolComposition.py
deleted file mode 100644
index c00e8cf..0000000
--- a/python/plotting/plot_flight_percentile_z_AerosolComposition.py
+++ /dev/null
@@ -1,322 +0,0 @@
-"""
-# plot percentile of Aerosol Sulfate and Organics with height
-# for flight data in IOPs
-# compare models and aircraft measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import hhmmss2sec
-from read_aircraft import read_ams,read_iwg1
-from read_netcdf import read_merged_size,read_extractflight
-from quality_control import qc_mask_qcflag,qc_mask_cloudflag,qc_remove_neg
-
-#%% settings
-
-from settings import campaign, Model_List, color_model, \
-     height_bin, E3SM_aircraft_path, figpath_aircraft_statistics
-
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP, merged_size_path, amspath, iwgpath
-elif campaign in ['CSET', 'SOCRATES']:
-    raise ValueError('CSET or SOCRATES do not have aerosol composition data')
-else:
-    raise ValueError('check campaign name setting: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_statistics):
-    os.makedirs(figpath_aircraft_statistics)
-    
-#%%
-z=height_bin
-dz = z[1]-z[0]
-zmin=z-np.insert((z[1:]-z[0:-1])/2,0,dz)
-zmax=z+np.append((z[1:]-z[0:-1])/2,dz)
-
-zlen=len(z)   
-
-
-#%% find files for flight information
-
-lst = glob.glob(merged_size_path+'merged_bin_*'+campaign+'*.nc')
-lst.sort()
-
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-
-# choose files for specific IOP
-if campaign=='HISCALE':
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+campaign+'_')
-        lst = glob.glob(a[0]+'*'+IOP+'*')
-        lst.sort()
-elif campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+campaign+'_')
-        lst = glob.glob(a[0]+'*'+IOP+'*')
-        lst.sort()
-else:
-    raise ValueError('check campaign name setting: '+campaign)
-
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-
-#%% read all data
-
-height_all = []
-so4_o_all = []
-org_o_all = []
-so4_m_all = []
-org_m_all = []
-nmodels=len(Model_List)
-for mm in range(nmodels):
-    so4_m_all.append([])
-    org_m_all.append([])
-    
-print('reading '+format(len(lst))+' files to calculate the statistics: ')
-
-for filename in lst:
-    
-    # get date info:        
-    date=filename[-12:-3]
-    if date[-1]=='a':
-        flightidx=1
-    else:
-        flightidx=2
-    print(date)
-    
-    #% read in flight information
-    (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
-    (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
-    (time,size,height,timeunit,zunit,long_name)=read_merged_size(filename,'height')
-    time=np.ma.compressed(time)
-    
-    #%% read T and P from iwg
-    filename_i=glob.glob(iwgpath+'aaf.iwg*.'+date+'*txt')
-    filename_i.sort()
-    # read in data
-    if len(filename_i)==1: 
-        (iwg,iwgvars)=read_iwg1(filename_i[0])
-        timelen = len(iwg)
-        if np.logical_and(campaign=='ACEENA', date=='20180216a'):
-            iwg.insert(1403,list(iwg[1403]))
-            tstr=iwg[1403][1]
-            tstr=tstr[0:-1]+str(int(tstr[-1])-1)
-            iwg[1403][1]=tstr
-            del iwg[-1]
-        # get variables
-        time_iwg=np.empty(timelen)
-        T_iwg=np.empty(timelen)
-        P_iwg=np.empty(timelen)
-        for t in range(timelen):
-            T_iwg[t]=float(iwg[t][20])+273.15
-            P_iwg[t]=float(iwg[t][23])*100
-            timestr=iwg[t][1].split(' ')
-            time_iwg[t]=hhmmss2sec(timestr[1])
-    else:
-        raise ValueError('cannot find any file or find too many files: ' + filename_i)
-    # remove cloud flag
-    T_iwg=qc_mask_cloudflag(T_iwg,cflag)
-    P_iwg=qc_mask_cloudflag(P_iwg,cflag)
-    
-    #%% read aerosol composition in AMS
-    
-    filename_ams=glob.glob(amspath+'*'+date[0:8]+'*')
-    filename_ams.sort()
-    
-    if len(filename_ams)==1 or len(filename_ams)==2:
-        (ams,amslist)=read_ams(filename_ams[flightidx-1])
-        time_ams=ams[0,:]
-        flag=ams[-1,:]
-        orgaaf=ams[1,:]
-        so4aaf=ams[5,:]
-        orgaaf=qc_mask_qcflag(orgaaf,flag)
-        so4aaf=qc_mask_qcflag(so4aaf,flag)
-    elif len(filename_ams)==0:
-        time_ams = time_iwg
-        orgaaf = np.full(len(time_ams),np.nan)
-        so4aaf = np.full(len(time_ams),np.nan)
-    else:
-        raise ValueError('find too many files: ' + filename_ams)
-    
-    # change values from standardize condition to ambient condition
-    T_ams = np.interp(time_ams,time,T_iwg)
-    P_ams = np.interp(time_ams,time,P_iwg)
-    so4aaf = so4aaf * (296.15/T_ams) * (P_ams/101325.)
-    orgaaf = orgaaf * (296.15/T_ams) * (P_ams/101325.)
-    
-    so4aaf = qc_remove_neg(so4aaf)
-    orgaaf = qc_remove_neg(orgaaf)
-
-    # exclude NaNs
-    idx = np.logical_and(~np.isnan(so4aaf), ~np.isnan(orgaaf))
-    so4_o_all.append(so4aaf[idx])
-    org_o_all.append(orgaaf[idx])
-    
-    height2=np.interp(time_ams,time,height)
-    height_all.append(height2[idx])
-    
-    # for interpolation of model results
-    time_ams=time_ams[idx]
-
-    #%% read in Models
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-    
-        (timem,heightm,soa_a1,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a1')
-        (timem,heightm,soa_a2,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a2')
-        (timem,heightm,soa_a3,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a3')
-        (timem,heightm,so4_a1,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a1')
-        (timem,heightm,so4_a2,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a2')
-        (timem,heightm,so4_a3,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a3')
-        (timem,heightm,pom_a1,timeunitm,pomunit,pomname)=read_extractflight(filename_m,'pom_a1')
-        (timem,heightm,pom_a3,timeunitm,pomunit,pomname)=read_extractflight(filename_m,'pom_a3')
-        (timem,heightm,pom_a4,timeunitm,pomunit,pomname)=read_extractflight(filename_m,'pom_a4')
-        (timem,heightm,mom_a1,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a1')
-        (timem,heightm,mom_a2,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a2')
-        (timem,heightm,mom_a3,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a3')
-        (timem,heightm,mom_a4,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a4')
-        
-        # add nucleation mode if available
-        try:
-            (timem,heightm,soa_a5,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a5')
-            model_org = soa_a1+soa_a2+soa_a3+soa_a5 + pom_a1+pom_a3+pom_a4 + mom_a1+mom_a2+mom_a3+mom_a4
-        except:
-            model_org = soa_a1+soa_a2+soa_a3 + pom_a1+pom_a3+pom_a4 + mom_a1+mom_a2+mom_a3+mom_a4
-        try:
-            (timem,heightm,so4_a5,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a5')
-            model_so4 = so4_a1+so4_a2+so4_a3+so4_a5
-        except:
-            model_so4 = so4_a1+so4_a2+so4_a3
-        
-        # change E3SM unit from kg/kg to ug/m3 
-        rho = P_iwg/T_iwg/287.06
-        model_so4=model_so4*1e9*rho
-        model_org=model_org*1e9*rho
-        
-        # interpolate into observational time
-        so4_m_all[mm].append(np.interp(time_ams,timem,model_so4)) 
-        org_m_all[mm].append(np.interp(time_ams,timem,model_org)) 
-    
-#%% calculate percentiles for each height bin
-
-so4_o_z = list()
-org_o_z = list()
-so4_m_z = []
-org_m_z = []
-for mm in range(nmodels):
-    so4_m_z.append([])
-    org_m_z.append([])
-for zz in range(zlen):
-    so4_o_z.append(np.empty(0))
-    org_o_z.append(np.empty(0))
-    for mm in range(nmodels):
-        so4_m_z[mm].append(np.empty(0))
-        org_m_z[mm].append(np.empty(0))
-    
-ndays=len(height_all)
-for dd in range(ndays):
-    height = height_all[dd]
-    so4_o = so4_o_all[dd]
-    org_o = org_o_all[dd]
-    for zz in range(zlen):
-        idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
-        so4_o_z[zz]=np.append(so4_o_z[zz],so4_o[idx])
-        org_o_z[zz]=np.append(org_o_z[zz],org_o[idx])
-        for mm in range(nmodels):
-            so4_m = so4_m_all[mm][dd]
-            org_m = org_m_all[mm][dd]
-            so4_m_z[mm][zz]=np.append(so4_m_z[mm][zz],so4_m[idx])
-            org_m_z[mm][zz]=np.append(org_m_z[mm][zz],org_m[idx])
-        
-
-#%% make plot
-# set position shift so that models and obs are not overlapped
-p_shift = np.arange(nmodels+1)
-p_shift = (p_shift - p_shift.mean())*0.2
-
-    
-figname = figpath_aircraft_statistics+'percentile_height_AerosolComposition_'+campaign+'_'+IOP+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2) = plt.subplots(1,2,figsize=(8,8))   # figsize in inches
-# plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-ax1.boxplot(so4_o_z,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=False, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax1.boxplot(so4_m_z[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=False, patch_artist=True)    # need patch_artist to fill color in box
-ax1.tick_params(color='k',labelsize=16)
-# ax1.set_xscale('log')
-ax1.set_ylim(-1,zlen)
-ax1.set_yticks(range(zlen))
-ax1.set_yticklabels(z)
-# ax1.set_yticks([1,3,5,7,9,11,12,13,14,15,16])
-# ax1.set_yticklabels(range(400,4100,400))
-# plot temporal lines for label
-ax1.plot([],c='k',label='Obs')
-for mm in range(nmodels):
-    ax1.plot([],c=color_model[mm],label=Model_List[mm])
-ax1.legend(loc='upper right', fontsize='x-large')
-    
-ax2.boxplot(org_o_z,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=False, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax2.boxplot(org_m_z[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=False, patch_artist=True)    # need patch_artist to fill color in box
-ax2.tick_params(color='k',labelsize=16)
-# ax2.set_xscale('log')
-ax2.set_ylim(-1,zlen)
-ax2.set_yticks(range(zlen))
-ax2.set_yticklabels([])
-# ax1.set_yticks(np.arange(0,20,2))
-# ax1.set_yticklabels(range(400,4100,400))
-# plot temporal lines for label
-ax2.plot([],c='k',label='Obs')
-for mm in range(nmodels):
-    ax2.plot([],c=color_model[mm],label=Model_List[mm])
-ax2.legend(loc='upper right', fontsize='x-large')
-    
-# set xlimit consistent in subplots
-# xlim1 = ax1.get_xlim()
-# xlim2 = ax2.get_xlim()
-# ax1.set_xlim([min(xlim1[0],xlim2[0]), max(xlim1[1],xlim2[1])])
-# ax2.set_xlim([min(xlim1[0],xlim2[0]), max(xlim1[1],xlim2[1])])
-
-ax1.set_ylabel('Height (m MSL)',fontsize=16)
-fig.text(0.46,0.06, '$\mu$g/m$^3$', fontsize=16)
-ax1.set_title('Sulfate',fontsize=16)
-ax2.set_title('Organic',fontsize=16)
-fig.text(0.48,0.92, IOP, fontsize=18)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
-
-    
\ No newline at end of file
diff --git a/python/plotting/plot_flight_percentile_z_CCN.py b/python/plotting/plot_flight_percentile_z_CCN.py
deleted file mode 100644
index 18756b0..0000000
--- a/python/plotting/plot_flight_percentile_z_CCN.py
+++ /dev/null
@@ -1,370 +0,0 @@
-"""
-# plot percentile of CCN number concentration with height
-# for flight data in IOPs
-# compare models and CCN measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_aircraft import read_ccn_hiscale, read_ccn_socrates
-from read_ARMdata import read_ccn
-from read_netcdf import read_extractflight,read_merged_size
-from quality_control import qc_mask_qcflag,qc_mask_cloudflag,qc_remove_neg
-
-#%% settings
-
-from settings import campaign, Model_List, color_model, \
-    height_bin, E3SM_aircraft_path, figpath_aircraft_statistics
-
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP, ccnpath, merged_size_path
-elif campaign in ['CSET', 'SOCRATES']:
-    from settings import ccnpath
-else:
-    raise ValueError('campaign name is not recognized: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_statistics):
-    os.makedirs(figpath_aircraft_statistics)
-   
-    
-#%%
-z=height_bin
-dz = z[1]-z[0]
-zmin=z-np.insert((z[1:]-z[0:-1])/2,0,dz)
-zmax=z+np.append((z[1:]-z[0:-1])/2,dz)
-
-zlen=len(z)   
-
-
-#%% find files for flight information
-
-lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
-lst.sort()
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-# choose files for specific IOP
-if campaign=='HISCALE':
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-elif campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-        
-alldates = [x.split('_')[-1].split('.')[0] for x in lst]
-    
-#%% read all data
-
-height_all = []
-ccna_all = []
-ccnb_all = []
-SSa_all = np.array([])
-SSb_all = np.array([])
-ccn3_all = []
-ccn5_all = []
-nmodels=len(Model_List)
-for mm in range(nmodels):
-    ccn3_all.append([])
-    ccn5_all.append([])
-    
-print('reading '+format(len(alldates))+' files to calculate the statistics: ')
-
-for date in alldates:
-    print(date)
-    
-    #%% read in Models
-    
-    ccn3=[]
-    ccn5=[]
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-    
-        (timem,heightm,ccn3_tmp,timeunitm,ccn3_unit,ccn3_longname)=read_extractflight(filename_m,'CCN3')
-        (timem,heightm,ccn5_tmp,timeunitm,ccn5_unit,ccn5_longname)=read_extractflight(filename_m,'CCN5')
-        ccn3.append(ccn3_tmp)
-        ccn5.append(ccn5_tmp)
-        
-        
-    # get supersaturation
-    SS3 = ccn3_longname.split('=')[-1]
-    SS5 = ccn5_longname.split('=')[-1]
-    
-    #%% read in flight data (for HISCALE)
-    if campaign=='HISCALE':
-        filename_ccn=glob.glob(ccnpath+'CCN_G1_'+date[0:8]+'*R2_HiScale001s.*')
-        filename_ccn.sort()
-        if date[-1]=='a':
-            flightidx=1
-        else:
-            flightidx=2
-        # read in data
-        if len(filename_ccn)==1 or len(filename_ccn)==2:
-            (data0,ccnlist)=read_ccn_hiscale(filename_ccn[flightidx-1])
-            # only choose data quality is good (flag=0)
-            flag = data0[7,:]
-            time_ccn = data0[0,:]
-            ccna = data0[10,:]
-            ccnb = data0[11,:]
-            SSa = data0[2,:]
-            SSb = data0[5,:]
-            ccna = qc_mask_qcflag(ccna,flag)
-            ccnb = qc_mask_qcflag(ccnb,flag)
-            SSa=qc_remove_neg(SSa)
-            SSb=qc_remove_neg(SSb)
-        elif len(filename_ccn)==0:
-            time_ccn=timem
-            ccna=np.nan*np.empty([len(timem)])
-            ccnb=np.nan*np.empty([len(timem)])
-            SSa=0.24*np.full(len(timem),1)
-            SSb=0.46*np.full(len(timem),1)
-        else:
-            raise ValueError('find too many files: '+filename_ccn)
-        timea=time_ccn
-        timeb=time_ccn
-        # cloud flag
-        filename = merged_size_path+'merged_bin_fims_pcasp_'+campaign+'_'+date+'.nc'
-        (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
-        ccna = qc_mask_cloudflag(ccna,cflag)
-        ccnb = qc_mask_cloudflag(ccnb,cflag)
-        
-    elif campaign=='ACEENA':
-        filename_ccna=glob.glob(ccnpath+'enaaafccn2colaF1.b1.'+date[0:8]+'*.nc')
-        filename_ccnb=glob.glob(ccnpath+'enaaafccn2colbF1.b1.'+date[0:8]+'*.nc')
-        # read in data
-        if len(filename_ccna)==1:
-            (timea,timeunita,ccna,qcflag,ccnunit,SSa)=read_ccn(filename_ccna[0])
-            ccna=qc_mask_qcflag(ccna,qcflag)
-            ccna=qc_remove_neg(ccna)
-            SSa=qc_remove_neg(SSa)
-        elif len(filename_ccna)==0:
-            # print('no CCN data found. set as NaN')
-            timea=timem
-            SSa=np.nan*np.empty([len(timem)])
-            ccna=np.nan*np.empty([len(timem)])
-        else:
-            raise ValueError('find too many files: '+filename_ccna)
-        if len(filename_ccnb)==1:
-            (timeb,timeunitb,ccnb,qcflag,ccnunit,SSb)=read_ccn(filename_ccnb[0])
-            ccnb=qc_mask_qcflag(ccnb,qcflag)
-            ccnb=qc_remove_neg(ccnb)
-            SSb=qc_remove_neg(SSb)
-        elif len(filename_ccnb)==0:
-            # print('no CCN data found. set as NaN')
-            timeb=timem
-            SSb=np.nan*np.empty([len(timem)])
-            ccnb=np.nan*np.empty([len(timem)])
-        else:
-            raise ValueError('find too many files: '+filename_ccnb)
-        # cloud flag
-        filename = merged_size_path+'merged_bin_fims_pcasp_opc_'+campaign+'_'+date+'.nc'
-        (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
-        if date=='20170707a':
-            time=np.delete(time,5247)
-            cflag=np.delete(cflag,5247)
-        elif date=='20180201a':
-            time=np.delete(time,3635)
-            cflag=np.delete(cflag,3635)
-        if time[0]<timea[0]:
-            cflag = cflag[np.where(time==timea[0])[0][0]:]
-            time = time[np.where(time==timea[0])[0][0]:]
-        elif time[0]>timea[0]:
-            cflag = np.insert(cflag,np.full(int(time[0]-timea[0]),0), -9999)
-            time = np.insert(time,np.full(int(time[0]-timea[0]),0), -9999)
-        if time[-1]<timea[-1]:
-            cflag = np.append(cflag,np.full(int(timea[-1]-time[-1]), -9999))
-            time = np.append(time,np.full(int(timea[-1]-time[-1]), -9999))
-        elif time[-1]>timea[-1]:
-            cflag = cflag[0:np.where(time==timea[-1])[0][0]+1]
-            time = time[0:np.where(time==timea[-1])[0][0]+1]
-        ccna = qc_mask_cloudflag(ccna,cflag)
-        ccnb = qc_mask_cloudflag(ccnb,cflag)
-        
-    # CSET does not have observed CCN
-    elif campaign=='CSET':
-        timea=timem
-        SSa=np.nan*np.empty([len(timem)])
-        ccna=np.nan*np.empty([len(timem)])
-        timeb=timem
-        SSb=np.nan*np.empty([len(timem)])
-        ccnb=np.nan*np.empty([len(timem)])
-        
-    # SOCRATES
-    elif campaign=='SOCRATES':
-        filename_ccn=glob.glob(ccnpath+'CCNscanning_SOCRATES_GV_RF*'+date[0:8]+'_R0.ict')
-        if len(filename_ccn)==1:
-            (data0,ccnlist)=read_ccn_socrates(filename_ccn[0])
-            time_ccn = data0[0,:]
-            ccn = data0[1,:]
-            SS = data0[3,:]
-            ccn=qc_remove_neg(ccn)
-            timea=time_ccn
-            timeb=time_ccn
-            ccna=np.array(ccn)
-            ccnb=np.array(ccn)
-            idxa=np.logical_and(SS>0.05, SS<0.15)
-            ccna[idxa==False]=np.nan
-            SSa=np.full((len(timea)),0.1)
-            idxb=np.logical_and(SS>0.45, SS<0.55)
-            ccnb[idxb==False]=np.nan
-            SSb=np.full((len(timeb)),0.5)
-        elif len(filename_ccn)==0:
-            timea=timem
-            SSa=np.nan*np.empty([len(timem)])
-            ccna=np.nan*np.empty([len(timem)])
-            timeb=timem
-            SSb=np.nan*np.empty([len(timem)])
-            ccnb=np.nan*np.empty([len(timem)])
-        else:
-            raise ValueError('find too many files: '+filename_ccn)
-            
-    if any(timea!=timeb):
-        raise ValueError('time dimension is inconsistent')
-        
-    # exclude NaNs
-    idx = np.logical_or(~np.isnan(ccna), ~np.isnan(ccnb))
-    ccna_all.append(ccna[idx])
-    ccnb_all.append(ccnb[idx])
-    SSa_all=np.append(SSa_all,SSa[idx])
-    SSb_all=np.append(SSb_all,SSb[idx])
-    
-    height2=np.interp(timea,timem,heightm)
-    height_all.append(height2[idx])
-    
-    # for interpolation of model results
-    timea=timea[idx]
-    timeb=timeb[idx]
-    
-    # interpolate model results into observational time
-    for mm in range(nmodels):
-        ccn3_all[mm].append(np.interp(timea,timem,ccn3[mm])) 
-        ccn5_all[mm].append(np.interp(timeb,timem,ccn5[mm])) 
-         
-#%% calculate percentiles for each height bin
-
-ccna_z = list()
-ccnb_z = list()
-ccn3_z = []
-ccn5_z = []
-nmodels=len(Model_List)
-for mm in range(nmodels):
-    ccn3_z.append([])
-    ccn5_z.append([])
-for zz in range(zlen):
-    ccna_z.append(np.empty(0))
-    ccnb_z.append(np.empty(0))
-    for mm in range(nmodels):
-        ccn3_z[mm].append(np.empty(0))
-        ccn5_z[mm].append(np.empty(0))
-    
-ndays=len(height_all)
-for dd in range(ndays):
-    height = height_all[dd]
-    ccna = ccna_all[dd]
-    ccnb = ccnb_all[dd]
-    for zz in range(zlen):
-        idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
-        ccna_z[zz]=np.append(ccna_z[zz],ccna[np.logical_and(idx,~np.isnan(ccna))])
-        ccnb_z[zz]=np.append(ccnb_z[zz],ccnb[np.logical_and(idx,~np.isnan(ccnb))])
-        for mm in range(nmodels):
-            ccn3 = ccn3_all[mm][dd]
-            ccn5 = ccn5_all[mm][dd]
-            ccn3_z[mm][zz]=np.append(ccn3_z[mm][zz],ccn3[idx])
-            ccn5_z[mm][zz]=np.append(ccn5_z[mm][zz],ccn5[idx])
-        
-
-#%% make plot
-# set position shift so that models and obs are not overlapped
-p_shift = np.arange(nmodels+1)
-p_shift = (p_shift - p_shift.mean())*0.2
-
-if campaign in ['HISCALE', 'ACEENA']:
-    figname = figpath_aircraft_statistics+'percentile_height_CCN_'+campaign+'_'+IOP+'.png'
-else:
-    figname = figpath_aircraft_statistics+'percentile_height_CCN_'+campaign+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2) = plt.subplots(1,2,figsize=(8,8))   # figsize in inches
-# plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-ax1.boxplot(ccna_z,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=False, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax1.boxplot(ccn3_z[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=False, patch_artist=True)    # need patch_artist to fill color in box
-ax1.tick_params(color='k',labelsize=16)
-#ax1.set_xscale('log')
-ax1.set_ylim(-1,zlen)
-ax1.set_yticks(range(zlen))
-ax1.set_yticklabels(z)
-# ax1.set_yticks([1,3,5,7,9,11,12,13,14,15,16])
-# ax1.set_yticklabels(range(400,4100,400))
-# plot temporal lines for label
-ax1.plot([],c='k',label='Obs ('+format(np.nanmean(SSa_all),'.2f')+'%)')
-for mm in range(nmodels):
-    ax1.plot([],c=color_model[mm],label=Model_List[mm])
-ax1.legend(loc='upper right', fontsize='x-large')
-    
-ax2.boxplot(ccnb_z,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=False, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax2.boxplot(ccn5_z[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=False, patch_artist=True)    # need patch_artist to fill color in box
-ax2.tick_params(color='k',labelsize=16)
-#ax2.set_xscale('log')
-ax2.set_ylim(-1,zlen)
-ax2.set_yticks(range(zlen))
-ax2.set_yticklabels([])
-# ax1.set_yticks(np.arange(0,20,2))
-# ax1.set_yticklabels(range(400,4100,400))
-# plot temporal lines for label
-ax2.plot([],c='k',label='Obs ('+format(np.nanmean(SSb_all),'.2f')+'%)')
-for mm in range(nmodels):
-    ax2.plot([],c=color_model[mm],label=Model_List[mm])
-ax2.legend(loc='upper right', fontsize='x-large')
-    
-# set xlimit consistent in subplots
-# xlim1 = ax1.get_xlim()
-# xlim2 = ax2.get_xlim()
-# ax1.set_xlim([min(xlim1[0],xlim2[0]), max(xlim1[1],xlim2[1])])
-# ax2.set_xlim([min(xlim1[0],xlim2[0]), max(xlim1[1],xlim2[1])])
-
-ax1.set_ylabel('Height (m MSL)',fontsize=16)
-fig.text(0.4,0.06, 'CCN number (cm$^{-3}$)', fontsize=16)
-ax1.set_title('SS = '+SS3,fontsize=16)
-ax2.set_title('SS = '+SS5,fontsize=16)
-if campaign in ['HISCALE', 'ACEENA']:
-    fig.text(0.48,0.92, IOP, fontsize=18)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
-    
diff --git a/python/plotting/plot_flight_percentile_z_CN.py b/python/plotting/plot_flight_percentile_z_CN.py
deleted file mode 100644
index 601dab7..0000000
--- a/python/plotting/plot_flight_percentile_z_CN.py
+++ /dev/null
@@ -1,340 +0,0 @@
-"""
-# plot percentile of aerosol number concentration (CN) with height
-# for flight data in IOPs
-# compare models and CPC measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import glob
-import os
-import matplotlib.pyplot as plt
-import numpy as np
-from read_aircraft import read_cpc, read_RF_NCAR
-from read_netcdf import read_merged_size,read_extractflight
-from quality_control import qc_mask_cloudflag,qc_remove_neg,qc_cpc_air
-
-#%% settings
-
-from settings import campaign, Model_List, color_model,  \
-    height_bin, E3SM_aircraft_path, figpath_aircraft_statistics
-
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP, cpcpath,merged_size_path
-elif campaign in ['CSET', 'SOCRATES']:
-    from settings import RFpath
-else:
-    raise ValueError('campaign name is not recognized: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_statistics):
-    os.makedirs(figpath_aircraft_statistics)
-   
-    
-#%%
-z=height_bin
-dz = z[1]-z[0]
-zmin=z-np.insert((z[1:]-z[0:-1])/2,0,dz)
-zmax=z+np.append((z[1:]-z[0:-1])/2,dz)
-
-zlen=len(z)   
-
-
-#%% find files for flight information
-
-lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
-lst.sort()
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-# choose files for specific IOP
-if campaign=='HISCALE':
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-elif campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-        
-alldates = [x.split('_')[-1].split('.')[0] for x in lst]
-    
-#%% read all data
-
-height_all = []
-cpc10_o = []
-cpc3_o = []
-uhsas100_o = []
-cpc100_m = []
-cpc10_m = []
-cpc3_m = []
-nmodels=len(Model_List)
-for mm in range(nmodels):
-    cpc100_m.append([])
-    cpc10_m.append([])
-    cpc3_m.append([])
-    
-print('reading '+format(len(alldates))+' files to calculate the statistics: ')
-
-for date in alldates:
-    print(date)
-    
-    #%% read in Models
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-    
-        (timem,heightm,cpc_m,timeunitm,ncn_unit,ncn_longname)=read_extractflight(filename_m,'NCN')
-        (timem,heightm,cpcu_m,timeunitm,ncnu_unit,ncnu_longname)=read_extractflight(filename_m,'NUCN')
-        (timem,heightm,ncnall,timeunitm,ncnall_unit,ncnall_longname)=read_extractflight(filename_m,'NCNall')
-        
-        cpc100_m[mm].append(np.sum(ncnall[100:,:],0)*1e-6) # #/m3 to #/cm3
-        cpc10_m[mm].append(cpc_m*1e-6)    # #/m3 to #/cm3
-        cpc3_m[mm].append(cpcu_m*1e-6)    # #/m3 to #/cm3
-    
-    height_all.append(heightm)
-    
-    #%% read in CPC measurements
-    if campaign in ['HISCALE', 'ACEENA']:
-        if date[-1]=='a':
-            flightidx=1
-        else:
-            flightidx=2
-        if campaign=='HISCALE':
-            filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_HiScale001s.ict.txt')
-        elif campaign=='ACEENA':
-            filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_ACEENA001s.ict')    
-        filename_c.sort()
-        # read in data
-        if len(filename_c)==1 or len(filename_c)==2: # some days have two flights
-            (cpc,cpclist)=read_cpc(filename_c[flightidx-1])
-            if np.logical_and(campaign=='ACEENA', date=='20180216a'):
-                cpc=np.insert(cpc,1404,(cpc[:,1403]+cpc[:,1404])/2,axis=1)
-            elif np.logical_and(campaign=='HISCALE', date=='20160425a'):
-                cpc=np.insert(cpc,0,cpc[:,0],axis=1)
-                cpc[0,0]=cpc[0,0]-1
-            time_cpc = cpc[0,:]
-            cpc10 = cpc[1,:]
-            cpc3 = cpc[2,:]
-        elif len(filename_c)==0:
-            time_cpc=timem
-            cpc10=np.nan*np.empty([len(timem)])
-            cpc3=np.nan*np.empty([len(timem)])
-        else:
-            raise ValueError('find too many files: '+filename_c)
-        
-        # cloud flag
-        if campaign=='HISCALE':
-            filename = merged_size_path+'merged_bin_fims_pcasp_'+campaign+'_'+date+'.nc'
-        elif campaign=='ACEENA':
-            filename = merged_size_path+'merged_bin_fims_pcasp_opc_'+campaign+'_'+date+'.nc'
-        (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
-        cpc3=qc_mask_cloudflag(cpc3,cflag)
-        cpc10=qc_mask_cloudflag(cpc10,cflag)
-        
-        # some quality checks
-        (cpc3,cpc10) = qc_cpc_air(cpc3,cpc10)
-        
-        cpc10_o.append(cpc10)
-        cpc3_o.append(cpc3)
-    
-    #%% read in flight data (for CSET and SOCRATES)
-    elif campaign in ['CSET', 'SOCRATES']:
-        filename = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
-        if len(filename)==1 or len(filename)==2:  # SOCRATES has two flights in 20180217, choose the later one
-            (time_cpc,cpc10,timeunit,cpc10unit,cpc10longname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'CONCN')
-            if campaign=='CSET':
-                (time_cpc,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'CONCU100_RWOOU')
-            elif campaign=='SOCRATES':
-                # there are two variables: CONCU100_CVIU and CONCU100_LWII
-                (time_cpc,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'CONCU100_LWII')
-        else:
-            raise ValueError('cannot find any file or find too many files: '+filename)
-        
-        # some quality checks
-        uhsas100=qc_remove_neg(uhsas100)
-        cpc10=qc_remove_neg(cpc10)
-        
-        cpc10_o.append(cpc10)
-        uhsas100_o.append(uhsas100)
-        
-#%% calculate percentiles for each height bin
-
-uhsas100_o_z = list()
-cpc10_o_z = list()
-cpc3_o_z = list()
-cpc100_m_z = []
-cpc10_m_z = []
-cpc3_m_z = []
-nmodels=len(Model_List)
-for mm in range(nmodels):
-    cpc100_m_z.append([])
-    cpc10_m_z.append([])
-    cpc3_m_z.append([])
-for zz in range(zlen):
-    uhsas100_o_z.append(np.empty(0))
-    cpc10_o_z.append(np.empty(0))
-    cpc3_o_z.append(np.empty(0))
-    for mm in range(nmodels):
-        cpc100_m_z[mm].append(np.empty(0))
-        cpc10_m_z[mm].append(np.empty(0))
-        cpc3_m_z[mm].append(np.empty(0))
-    
-ndays=len(height_all)
-for dd in range(ndays):
-    height = height_all[dd]
-    cpc10 = cpc10_o[dd]
-    if campaign in ['HISCALE', 'ACEENA']:
-        cpc3 = cpc3_o[dd]
-    elif campaign in ['CSET', 'SOCRATES']:
-        uhsas100 = uhsas100_o[dd]
-    for zz in range(zlen):
-        idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
-        cpc10_o_z[zz]=np.append(cpc10_o_z[zz],cpc10[np.logical_and(idx,~np.isnan(cpc10))])
-        for mm in range(nmodels):
-            model10 = cpc10_m[mm][dd]
-            cpc10_m_z[mm][zz]=np.append(cpc10_m_z[mm][zz],model10[idx])
-        if campaign in ['HISCALE', 'ACEENA']:
-            cpc3_o_z[zz]=np.append(cpc3_o_z[zz],cpc3[np.logical_and(idx,~np.isnan(cpc3))])
-            for mm in range(nmodels):
-                model3 = cpc3_m[mm][dd]
-                cpc3_m_z[mm][zz]=np.append(cpc3_m_z[mm][zz],model3[idx])
-        elif campaign in ['CSET', 'SOCRATES']:
-            uhsas100_o_z[zz]=np.append(uhsas100_o_z[zz],uhsas100[np.logical_and(idx,~np.isnan(uhsas100))])
-            for mm in range(nmodels):
-                model100 = cpc100_m[mm][dd]
-                cpc100_m_z[mm][zz]=np.append(cpc100_m_z[mm][zz],model100[idx])
-
-#%% make plot
-# set position shift so that models and obs are not overlapped
-p_shift = np.arange(nmodels+1)
-p_shift = (p_shift - p_shift.mean())*0.2
-
-if campaign in ['HISCALE', 'ACEENA']:
-    figname = figpath_aircraft_statistics+'percentile_height_CN_'+campaign+'_'+IOP+'.png'
-else:
-    figname = figpath_aircraft_statistics+'percentile_height_CN_'+campaign+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2) = plt.subplots(1,2,figsize=(8,8))   # figsize in inches
-# plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-ax1.boxplot(cpc10_o_z,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=False, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax1.boxplot(cpc10_m_z[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=False, patch_artist=True)    # need patch_artist to fill color in box
-ax1.tick_params(color='k',labelsize=16)
-#ax1.set_xscale('log')
-ax1.set_ylim(-1,zlen)
-ax1.set_yticks(range(zlen))
-ax1.set_yticklabels(z)
-# ax1.set_yticks([1,3,5,7,9,11,12,13,14,15,16])
-# ax1.set_yticklabels(range(400,4100,400))
-# plot temporal lines for label
-ax1.plot([],c='k',label='CPC(>10nm)')
-for mm in range(nmodels):
-    ax1.plot([],c=color_model[mm],label=Model_List[mm])
-ax1.legend(loc='upper right', fontsize='x-large')
-    
-if campaign in ['HISCALE', 'ACEENA']:
-    ax2.boxplot(cpc3_o_z,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=False, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax2.boxplot(cpc3_m_z[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=False, patch_artist=True)    # need patch_artist to fill color in box
-    ax2.plot([],c='k',label='CPC(>3nm)')
-elif campaign in ['CSET', 'SOCRATES']:
-    ax2.boxplot(uhsas100_o_z,whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
-                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-                vert=False, patch_artist=True)    # need patch_artist to fill color in box
-    for mm in range(nmodels):
-        c = color_model[mm]
-        ax2.boxplot(cpc100_m_z[mm],whis=(5,95),showmeans=False,showfliers=False,
-                positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
-                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-                vert=False, patch_artist=True)    # need patch_artist to fill color in box
-    ax2.plot([],c='k',label='UHSAS(>100nm)')
-    
-ax2.tick_params(color='k',labelsize=16)
-ax2.set_ylim(-1,zlen)
-ax2.set_yticks(range(zlen))
-ax2.set_yticklabels([])
-# ax1.set_yticks(np.arange(0,20,2))
-# ax1.set_yticklabels(range(400,4100,400))
-# plot temporal lines for label
-for mm in range(nmodels):
-    ax2.plot([],c=color_model[mm],label=Model_List[mm])
-ax2.legend(loc='upper right', fontsize='x-large')
-    
-# set xlimit consistent in subplots
-# xlim1 = ax1.get_xlim()
-# xlim2 = ax2.get_xlim()
-# ax1.set_xlim([min(xlim1[0],xlim2[0]), max(xlim1[1],xlim2[1])])
-# ax2.set_xlim([min(xlim1[0],xlim2[0]), max(xlim1[1],xlim2[1])])
-if campaign=='HISCALE':
-    ax1.set_xlim(-200,15000)
-    ax2.set_xlim(-200,15000)
-elif campaign=='ACEENA':
-    ax1.set_xlim(0,3000)
-    ax2.set_xlim(0,3000)
-elif campaign=='CSET':
-    ax1.set_xscale('log')
-    ax2.set_xscale('log')
-    ax1.set_xlim(1,1e4)
-    ax2.set_xlim(1,1e4)
-elif campaign=='SOCRATES':
-    ax1.set_xscale('log')
-    ax2.set_xscale('log')
-    ax1.set_xlim(1,1e4)
-    ax2.set_xlim(1,1e4)
-
-ax1.set_ylabel('Height (m MSL)',fontsize=16)
-fig.text(0.4,0.06, 'Aerosol number (cm$^{-3}$)', fontsize=16)
-if campaign in ['HISCALE', 'ACEENA']:
-    fig.text(0.48,0.9, IOP, fontsize=18)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
-
-#%% plot sample numbers
-# num_sample = [len(a) for a in cpc10_o_z]
-# fig,ax = plt.subplots(figsize=(2,8))
-# ax.plot(num_sample,z,color='k',linewidth=1,linestyle='-')
-# ax.set_xscale('log')
-# ax.set_xlabel('Sample Number (#)',fontsize=16)
-# ax.set_ylabel('Height (m MSL)',fontsize=16)
-# ax.set_xticks([10,1e3,1e5])
-# ax.set_yticks(z)
-# ax.tick_params(color='k',labelsize=16)
-
-# if campaign in ['HISCALE', 'ACEENA']:
-#     figname = figpath_aircraft_statistics+'samplenumber_'+campaign+'_'+IOP+'.png'
-# else:
-#     figname = figpath_aircraft_statistics+'samplenumber_'+campaign+'.png'
-# fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
\ No newline at end of file
diff --git a/python/plotting/plot_flight_profile_z_CldFreq.py b/python/plotting/plot_flight_profile_z_CldFreq.py
deleted file mode 100644
index c0f5199..0000000
--- a/python/plotting/plot_flight_profile_z_CldFreq.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""
-# plot vertical profile of cloud fraction for all flights in each IOP
-# compare models and aircraft measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import glob
-import os
-import matplotlib.pyplot as plt
-import numpy as np
-from read_aircraft import  read_RF_NCAR
-from specific_data_treatment import lwc2cflag
-from read_netcdf import read_extractflight,read_merged_size
-
-#%% settings
-
-from settings import campaign,  Model_List, color_model,   \
-    height_bin, E3SM_aircraft_path, figpath_aircraft_statistics
-    
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP, merged_size_path
-elif campaign in ['CSET', 'SOCRATES']:
-    from settings import RFpath
-else:
-    raise ValueError('campaign name is not recognized: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_statistics):
-    os.makedirs(figpath_aircraft_statistics)
-    
-    
-#%%
-z=height_bin
-dz = z[1]-z[0]
-zmin=z-np.insert((z[1:]-z[0:-1])/2,0,dz)
-zmax=z+np.append((z[1:]-z[0:-1])/2,dz)
-
-zlen=len(z)   
-
-
-#%% find files for flight information
-
-lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
-lst.sort()
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-# choose files for specific IOP
-if campaign=='HISCALE':
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-elif campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-        
-alldates = [x.split('_')[-1].split('.')[0] for x in lst]
-    
-#%% read all data
-
-cflagall=[]
-heightall=[]
-cldmall=[]
-
-nmodels=len(Model_List)
-for mm in range(nmodels):
-    cldmall.append([])
-    
-print('reading '+format(len(alldates))+' files to calculate the statistics: ')
-
-for date in alldates:
-    
-    #%% read in models
-    
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        
-        (timem,heightm,cloud,timeunit,cldunit,cldname)=read_extractflight(filename_m,'CLOUD')
-            
-        cldmall[mm].append(cloud)
-        
-    #%% read in obs
-    if campaign in ['HISCALE', 'ACEENA']:
-        if date[-1]=='a':
-            flightidx=1
-        else:
-            flightidx=2
-        
-        #% read in flight information
-        if campaign=='HISCALE':
-            filename = merged_size_path+'merged_bin_fims_pcasp_'+campaign+'_'+date+'.nc'
-        elif campaign=='ACEENA':
-            filename = merged_size_path+'merged_bin_fims_pcasp_opc_'+campaign+'_'+date+'.nc'
-        (time,size,height,timeunit,cunit,long_name)=read_merged_size(filename,'height')
-        (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
-        time=np.ma.compressed(time)
-    
-    elif campaign in ['CSET', 'SOCRATES']:
-        filename = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
-        if len(filename)==1 or len(filename)==2:  # SOCRATES has two flights in 20180217, choose the later one
-            (time,lwc,timeunit,lwcunit,lwclongname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'PLWCC')
-        # calculate cloud flag based on LWC
-        cflag=lwc2cflag(lwc,lwcunit)
-        
-    heightall.append(heightm)
-    cflagall.append(cflag)
-    
-
-#%% calculate percentiles for each height bin
-
-cflag_z = list()
-cldm_z = []
-nmodels=len(Model_List)
-for mm in range(nmodels):
-    cldm_z.append([])
-for zz in range(zlen):
-    cflag_z.append(np.empty(0))
-    for mm in range(nmodels):
-        cldm_z[mm].append(np.empty(0))
-    
-ndays=len(heightall)
-# ndays=1;
-for dd in range(ndays):
-    height = heightall[dd]
-    cflag  = cflagall[dd]
-    for zz in range(zlen):
-        idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
-        cflag_z[zz]=np.append(cflag_z[zz],cflag[idx])
-        
-    for mm in range(nmodels):
-        cldm = cldmall[mm][dd]
-        for zz in range(zlen):
-            idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
-            cldm_z[mm][zz]=np.append(cldm_z[mm][zz],cldm[idx])
-        
-#%% remove all NANs and calculate cloud frequency
-cldfreq_flag = np.full(zlen,np.nan)
-cldfreq_m = []
-for mm in range(nmodels):
-    cldfreq_m.append(np.full(zlen,np.nan))
-    
-for zz in range(zlen):
-    data = cflag_z[zz]
-    data = data[data>=0]
-    if len(data)>0:
-        cldfreq_flag[zz] = sum(data==1)/len(data)
-    for mm in range(nmodels):
-        data = cldm_z[mm][zz]
-        data = data[~np.isnan(data)]
-        if len(data)>0:
-            cldfreq_m[mm][zz] = np.mean(data)
-  
-#%% plot frequency  
-if campaign in ['HISCALE', 'ACEENA']:
-    figname = figpath_aircraft_statistics+'profile_height_CldFreq_'+campaign+'_'+IOP+'.png'
-else:
-    figname = figpath_aircraft_statistics+'profile_height_CldFreq_'+campaign+'.png'
-print('plotting figures to '+figname)
-
-fig,ax = plt.subplots(figsize=(4,8))
-
-ax.plot(cldfreq_flag,z,color='k',linewidth=1,linestyle='-',label='Obs')
-for mm in range(nmodels):
-    ax.plot(cldfreq_m[mm],z,color=color_model[mm],linewidth=1,label=Model_List[mm])
-
-ax.tick_params(color='k',labelsize=12)
-# ax.set_ylim(-1,zlen)
-# ax.set_yticks(range(zlen))
-# ax.set_yticks(z[0:-1:2])
-ax.set_ylabel('Height (m MSL)',fontsize=12)
-ax.legend(loc='upper right', fontsize='large')
-ax.set_xlabel('Cloud Frequency',fontsize=12)
-if campaign in ['HISCALE', 'ACEENA']:
-    ax.set_title(IOP,fontsize=15)
-else:
-    ax.set_title(campaign,fontsize=15)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
\ No newline at end of file
diff --git a/python/plotting/plot_flight_profile_z_LWC.py b/python/plotting/plot_flight_profile_z_LWC.py
deleted file mode 100644
index 4cef45d..0000000
--- a/python/plotting/plot_flight_profile_z_LWC.py
+++ /dev/null
@@ -1,199 +0,0 @@
-"""
-# plot vertical profile of cloud fraction for all flights in each IOP
-# compare models and aircraft measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_aircraft import read_wcm, read_RF_NCAR
-from read_netcdf import read_extractflight
-from quality_control import qc_mask_qcflag,qc_remove_neg
-
-#%% settings
-
-from settings import campaign, Model_List, color_model,  \
-    height_bin, E3SM_aircraft_path, figpath_aircraft_statistics
-    
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP,  wcmpath
-elif campaign in ['CSET', 'SOCRATES']:
-    from settings import RFpath
-else:
-    raise ValueError('campaign name is not recognized: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_statistics):
-    os.makedirs(figpath_aircraft_statistics)
-    
-    
-#%%
-z=height_bin
-dz = z[1]-z[0]
-zmin=z-np.insert((z[1:]-z[0:-1])/2,0,dz)
-zmax=z+np.append((z[1:]-z[0:-1])/2,dz)
-
-zlen=len(z)   
-
-#%% find files for flight information
-
-lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
-lst.sort()
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-# choose files for specific IOP
-if campaign=='HISCALE':
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-elif campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-        
-alldates = [x.split('_')[-1].split('.')[0] for x in lst]
-    
-#%% read all data
-
-heightall=[]
-lwcobsall=[]
-lwcmall=[]
-
-nmodels=len(Model_List)
-for mm in range(nmodels):
-    lwcmall.append([])
-    
-print('reading '+format(len(alldates))+' files to calculate the statistics: ')
-
-for date in alldates:
-    print(date)
-        
-    #%% read in obs
-    if campaign in ['HISCALE', 'ACEENA']:
-        if date[-1]=='a':
-            flightidx=1
-        else:
-            flightidx=2
-        
-        filename_wcm = glob.glob(wcmpath+'WCM_G1_'+date[0:8]+'*')
-        filename_wcm.sort()
-        if len(filename_wcm)==0:
-            print('skip this date: '+date)
-            continue
-        (wcm,wcmlist)=read_wcm(filename_wcm[flightidx-1])
-        time0=wcm[0,:]
-        flag=wcm[-1,:]
-        lwcobs=wcm[2,:]
-        lwcobs=qc_remove_neg(lwcobs)
-        lwcobs=qc_mask_qcflag(lwcobs,flag)
-    
-    elif campaign in ['CSET', 'SOCRATES']:
-        filename = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
-        if len(filename)==1 or len(filename)==2:  # SOCRATES has two flights in 20180217, choose the later one
-            (time,lwcobs,timeunit,lwcunit,lwclongname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'PLWCC')
-        lwcobs=qc_remove_neg(lwcobs)
-        
-    lwcobsall.append(lwcobs)
-    
-    #%% read in models
-    
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        
-        (timem,heightm,lwc,timeunit,cldunit,cldname)=read_extractflight(filename_m,'LWC')
-            
-         # change E3SM unit from kg/m3 to g/m3 
-        lwcmall[mm].append(lwc*1000)
-        
-    heightall.append(heightm)
-    
-#%% calculate percentiles for each height bin
-
-lwcobs_z = list()
-lwcm_z = []
-for mm in range(nmodels):
-    lwcm_z.append([])
-for zz in range(zlen):
-    lwcobs_z.append(np.empty(0))
-    for mm in range(nmodels):
-        lwcm_z[mm].append(np.empty(0))
-    
-ndays=len(heightall)
-# ndays=1;
-for dd in range(ndays):
-    height = heightall[dd]
-    lwcobs  = lwcobsall[dd]
-    for zz in range(zlen):
-        idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
-        lwcobs_z[zz]=np.append(lwcobs_z[zz],lwcobs[idx])
-        
-    for mm in range(nmodels):
-        lwcm = lwcmall[mm][dd]
-        for zz in range(zlen):
-            idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
-            lwcm_z[mm][zz]=np.append(lwcm_z[mm][zz],lwcm[idx])
-      
-#%% remove all NANs and calculate cloud frequency
-lwcmean_o = np.full(zlen,np.nan)
-std_lwc_o = np.full(zlen,np.nan)
-lwcmean_m = []
-for mm in range(nmodels):
-    lwcmean_m.append(np.full(zlen,np.nan))
-    
-for zz in range(zlen):
-    data = lwcobs_z[zz]
-    data = data[~np.isnan(data)]
-    if len(data)>0:
-        lwcmean_o[zz] = np.mean(data)
-        std_lwc_o[zz] = np.std(data)/np.sqrt(len(data))
-    for mm in range(nmodels):
-        data = lwcm_z[mm][zz]
-        data = data[~np.isnan(data)]
-        if len(data)>0:
-            lwcmean_m[mm][zz] = np.mean(data)
-            
-#%% plot frequency  
-if campaign in ['HISCALE', 'ACEENA']:
-    figname = figpath_aircraft_statistics+'profile_height_LWC_'+campaign+'_'+IOP+'.png'
-else:
-    figname = figpath_aircraft_statistics+'profile_height_LWC_'+campaign+'.png'
-print('plotting figures to '+figname)
-
-fig,ax = plt.subplots(figsize=(3,8))
-
-ax.plot(lwcmean_o,z,color='k',linewidth=1,linestyle='-',label='Obs')
-ax.fill_betweenx(z,lwcmean_o-std_lwc_o,lwcmean_o+std_lwc_o,facecolor='k',alpha=0.2)
-
-for mm in range(nmodels):
-    ax.plot(lwcmean_m[mm],z,color=color_model[mm],linewidth=1,label=Model_List[mm])
-
-ax.tick_params(color='k',labelsize=16)
-# ax.set_ylim(-1,zlen)
-# ax.set_yticks(range(zlen))
-if campaign=='HISCALE':
-    ax.set_ylim(0,4500)
-ax.set_yticks(z)
-ax.set_ylabel('Height (m MSL)',fontsize=16)
-ax.legend(loc='upper right', fontsize='large')
-ax.set_xlabel('LWC (g/m3)',fontsize=16)
-if campaign in ['HISCALE', 'ACEENA']:
-    ax.set_title(IOP,fontsize=18)
-else:
-    ax.set_title(campaign,fontsize=18)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-            
-            
diff --git a/python/plotting/plot_flight_timeseries_AerosolComposition.py b/python/plotting/plot_flight_timeseries_AerosolComposition.py
deleted file mode 100644
index 5b6c72e..0000000
--- a/python/plotting/plot_flight_timeseries_AerosolComposition.py
+++ /dev/null
@@ -1,210 +0,0 @@
-"""
-# plot aircraft track data
-# timeseries of aerosol composition (SO4 and total organic) concentration 
-# compare models and aircraft measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import hhmmss2sec
-from read_aircraft import read_ams,read_iwg1
-from read_netcdf import read_merged_size,read_extractflight
-from quality_control import qc_mask_qcflag,qc_remove_neg
-
-#%% settings
-
-from settings import campaign, Model_List, color_model, \
-    E3SM_aircraft_path, figpath_aircraft_timeseries
-
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP, merged_size_path, amspath, iwgpath
-elif campaign in ['CSET', 'SOCRATES']:
-    raise ValueError('CSET and SOCRATES do not have composition data')
-else:
-    raise ValueError('campaign name is not recognized: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_timeseries):
-    os.makedirs(figpath_aircraft_timeseries)
-    
-#%% find files for flight information
-lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
-lst.sort()
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-# choose files for specific IOP
-if campaign=='HISCALE':
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-elif campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-        
-alldates = [x.split('_')[-1].split('.')[0] for x in lst]
-    
-# for each flight
-for date in alldates:
-    
-    if date[-1]=='a':
-        flightidx=1
-    else:
-        flightidx=2
-
-    #% read in flight information
-    if campaign=='HISCALE':
-        filename = merged_size_path+'merged_bin_fims_pcasp_'+campaign+'_'+date+'.nc'
-    elif campaign=='ACEENA':
-        filename = merged_size_path+'merged_bin_fims_pcasp_opc_'+campaign+'_'+date+'.nc'
-    (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
-    (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
-    (time,size,height,timeunit,zunit,long_name)=read_merged_size(filename,'height')
-    time=np.ma.compressed(time)
-    
-    #%% read T and P from iwg
-    filename_i=glob.glob(iwgpath+'aaf.iwg*.'+date+'*txt')
-    filename_i.sort()
-    # read in data
-    if len(filename_i)==1: 
-        (iwg,iwgvars)=read_iwg1(filename_i[0])
-        timelen = len(iwg)
-        if np.logical_and(campaign=='ACEENA', date=='20180216a'):
-            iwg.insert(1403,list(iwg[1403]))
-            tstr=iwg[1403][1]
-            tstr=tstr[0:-1]+str(int(tstr[-1])-1)
-            iwg[1403][1]=tstr
-            del iwg[-1]
-        # get variables
-        time_iwg=np.empty(timelen)
-        T_iwg=np.empty(timelen)
-        P_iwg=np.empty(timelen)
-        for t in range(timelen):
-            T_iwg[t]=float(iwg[t][20])+273.15
-            P_iwg[t]=float(iwg[t][23])*100
-            timestr=iwg[t][1].split(' ')
-            time_iwg[t]=hhmmss2sec(timestr[1])
-    else:
-        raise ValueError('find no file or multiple files: ' + filename_i)
-    
-    #%% read aerosol composition in AMS
-    
-    filename_ams=glob.glob(amspath+'*'+date[0:8]+'*')
-    filename_ams.sort()
-    
-    if len(filename_ams)==1 or len(filename_ams)==2:
-        (ams,amslist)=read_ams(filename_ams[flightidx-1])
-        time_ams=ams[0,:]
-        flag=ams[-1,:]
-        orgaaf=ams[1,:]
-        so4aaf=ams[5,:]
-        # flag=1 is also good data but behind CVI inlet. currently only use good data behind isokinetic inlet (flag=0)
-        orgaaf=qc_mask_qcflag(orgaaf,flag)
-        so4aaf=qc_mask_qcflag(so4aaf,flag)
-    elif len(filename_ams)==0:
-        time_ams = time_iwg
-        orgaaf = np.full(len(time_ams),np.nan)
-        so4aaf = np.full(len(time_ams),np.nan)
-    else:
-        raise ValueError('find too many files')
-    
-    # change values from standardize condition to ambient condition
-    T_ams = np.interp(time_ams,time,T_iwg)
-    P_ams = np.interp(time_ams,time,P_iwg)
-    so4aaf = so4aaf * (296.15/T_ams) * (P_ams/101325.)
-    orgaaf = orgaaf * (296.15/T_ams) * (P_ams/101325.)
-    
-    # some quality check:
-    orgaaf=qc_remove_neg(orgaaf)
-    so4aaf=qc_remove_neg(so4aaf)
-    
-    
-    #%% read in Models
-    nmodels=len(Model_List)
-    so4_m = []
-    org_m = []
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-    
-        (timem,heightm,soa_a1,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a1')
-        (timem,heightm,soa_a2,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a2')
-        (timem,heightm,soa_a3,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a3')
-        (timem,heightm,so4_a1,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a1')
-        (timem,heightm,so4_a2,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a2')
-        (timem,heightm,so4_a3,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a3')
-        (timem,heightm,pom_a1,timeunitm,pomunit,pomname)=read_extractflight(filename_m,'pom_a1')
-        (timem,heightm,pom_a3,timeunitm,pomunit,pomname)=read_extractflight(filename_m,'pom_a3')
-        (timem,heightm,pom_a4,timeunitm,pomunit,pomname)=read_extractflight(filename_m,'pom_a4')
-        (timem,heightm,mom_a1,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a1')
-        (timem,heightm,mom_a2,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a2')
-        (timem,heightm,mom_a3,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a3')
-        (timem,heightm,mom_a4,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a4')
-        
-        # add nucleation mode if available
-        try:
-            (timem,heightm,soa_a5,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a5')
-            model_org = soa_a1+soa_a2+soa_a3+soa_a5 + pom_a1+pom_a3+pom_a4 + mom_a1+mom_a2+mom_a3+mom_a4
-        except:
-            model_org = soa_a1+soa_a2+soa_a3 + pom_a1+pom_a3+pom_a4 + mom_a1+mom_a2+mom_a3+mom_a4
-        try:
-            (timem,heightm,so4_a5,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a5')
-            model_so4 = so4_a1+so4_a2+so4_a3+so4_a5
-        except:
-            model_so4 = so4_a1+so4_a2+so4_a3
-        
-        # change E3SM unit from kg/kg to ug/m3 
-        rho = P_iwg/T_iwg/287.06
-        model_so4=model_so4*1e9*rho
-        model_org=model_org*1e9*rho
-        
-        so4_m.append(model_so4) 
-        org_m.append(model_org) 
-    
-    timem2 = timem/3600
-       
-    #%% make plot
-        
-    figname = figpath_aircraft_timeseries+'AerosolComposition_'+campaign+'_'+date+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=2.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-    ax1.plot(time_ams/3600,so4aaf,color='k',linewidth=1,label='OBS')
-    for mm in range(nmodels):
-        ax1.plot(timem2, so4_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    # ax1.set_yscale('log')
-    ax1.tick_params(color='k',labelsize=12)
-    ylim1 = ax1.get_ylim()
-    
-    ax2.plot(time_ams/3600,orgaaf,color='k',linewidth=1,label='OBS')
-    for mm in range(nmodels):
-        ax2.plot(timem2, org_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    # ax2.set_yscale('log')
-    ax2.tick_params(color='k',labelsize=12)
-    ylim2 = ax2.get_ylim()
-    
-    ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
-    ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
-    
-    ax2.set_xlabel('time (hour UTC) '+date,fontsize=14)
-
-    ax1.set_title('Aerosol Sulfate Concentration ($\mu$g/m$^3$)',fontsize=13)
-    ax2.set_title('Aerosol Organic Concentration ($\mu$g/m$^3$)',fontsize=13)
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-    plt.close()   
\ No newline at end of file
diff --git a/python/plotting/plot_flight_timeseries_CCN.py b/python/plotting/plot_flight_timeseries_CCN.py
deleted file mode 100644
index 0c89dd8..0000000
--- a/python/plotting/plot_flight_timeseries_CCN.py
+++ /dev/null
@@ -1,221 +0,0 @@
-"""
-# plot aircraft track data
-# timeseries of CCN number concentration 
-# compare models and aircraft measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_aircraft import read_ccn_hiscale,read_ccn_socrates
-from read_ARMdata import read_ccn
-from read_netcdf import read_extractflight
-from quality_control import qc_mask_qcflag,qc_remove_neg
-
-#%% settings
-
-from settings import campaign, Model_List, color_model, \
-    E3SM_aircraft_path, figpath_aircraft_timeseries
-
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import ccnpath, IOP
-elif campaign in ['CSET', 'SOCRATES']:
-    from settings import ccnpath
-else:
-    raise ValueError('campaign name is not recognized: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_timeseries):
-    os.makedirs(figpath_aircraft_timeseries)
-    
-
-#%% find files for flight information
-lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
-lst.sort()
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-# choose files for specific IOP
-if campaign=='HISCALE':
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-elif campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+Model_List[0]+'_')
-        lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
-        lst.sort()
-        
-alldates = [x.split('_')[-1].split('.')[0] for x in lst]
-    
-# for each flight
-for date in alldates:
-    
-    #%% read in Models
-    nmodels=len(Model_List)
-    ccn3_m = []
-    ccn5_m = []
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-    
-        (timem,heightm,ccn3,timeunitm,ccn3_unit,ccn3_longname)=read_extractflight(filename_m,'CCN3')
-        (timem,heightm,ccn5,timeunitm,ccn5_unit,ccn5_longname)=read_extractflight(filename_m,'CCN5')
-        
-        ccn3_m.append(ccn3) 
-        ccn5_m.append(ccn5) 
-        
-    # get supersaturation
-    SS3 = ccn3_longname.split('=')[-1]
-    SS5 = ccn5_longname.split('=')[-1]
-    timem2 = timem/3600
-    
-    #%% read in flight data (for HISCALE)
-    if campaign=='HISCALE':
-        filename_ccn=glob.glob(ccnpath+'CCN_G1_'+date[0:8]+'*R2_HiScale001s.*')
-        filename_ccn.sort()
-        if date[-1]=='a':
-            flightidx=1
-        else:
-            flightidx=2
-        # read in data
-        if len(filename_ccn)==1 or len(filename_ccn)==2:
-            (data0,ccnlist)=read_ccn_hiscale(filename_ccn[flightidx-1])
-            # only choose data quality is good (flag=0)
-            flag = data0[7,:]
-            time_ccn = data0[0,:]
-            ccna = data0[10,:]
-            ccnb = data0[11,:]
-            SSa = data0[2,:]
-            SSb = data0[5,:]
-            ccna=qc_mask_qcflag(ccna,flag)
-            ccnb=qc_mask_qcflag(ccnb,flag)
-        elif len(filename_ccn)==0:
-            time_ccn=timem
-            ccna=np.nan*np.empty([len(timem)])
-            ccnb=np.nan*np.empty([len(timem)])
-            SSa=0.24
-            SSb=0.46
-        else:
-            raise ValueError('find too many files')
-        timea=time_ccn
-        timeb=time_ccn
-        
-    elif campaign=='ACEENA':
-        filename_ccna=glob.glob(ccnpath+'enaaafccn2colaF1.b1.'+date[0:8]+'*.nc')
-        filename_ccnb=glob.glob(ccnpath+'enaaafccn2colbF1.b1.'+date[0:8]+'*.nc')
-        # read in data
-        if len(filename_ccna)==1:
-            (timea,timeunita,ccna,qcflag,ccnunit,SSa)=read_ccn(filename_ccna[0])
-            ccna=qc_mask_qcflag(ccna,qcflag)
-            ccna=qc_remove_neg(ccna)
-            SSa=qc_remove_neg(SSa)
-        elif len(filename_ccna)==0:
-            # print('no CCN data found. set as NaN')
-            timea=timem
-            SSa=np.nan*np.empty([len(timem)])
-            ccna=np.nan*np.empty([len(timem)])
-        else:
-            raise ValueError('find too many files')
-        if len(filename_ccnb)==1:
-            (timeb,timeunitb,ccnb,qcflag,ccnunit,SSb)=read_ccn(filename_ccnb[0])
-            ccnb=qc_mask_qcflag(ccnb,qcflag)
-            ccnb=qc_remove_neg(ccnb)
-            SSb=qc_remove_neg(SSb)
-        elif len(filename_ccnb)==0:
-            # print('no CCN data found. set as NaN')
-            timeb=timem
-            SSb=np.nan*np.empty([len(timem)])
-            ccnb=np.nan*np.empty([len(timem)])
-        else:
-            raise ValueError('find too many files')
-        
-    # CSET does not have observed CCN
-    elif campaign=='CSET':
-        timea=timem
-        SSa=np.nan*np.empty([len(timem)])
-        ccna=np.nan*np.empty([len(timem)])
-        timeb=timem
-        SSb=np.nan*np.empty([len(timem)])
-        ccnb=np.nan*np.empty([len(timem)])
-        
-    # SOCRATES
-    elif campaign=='SOCRATES':
-        filename_ccn=glob.glob(ccnpath+'CCNscanning_SOCRATES_GV_RF*'+date[0:8]+'_R0.ict')
-        if len(filename_ccn)==1:
-            (data0,ccnlist)=read_ccn_socrates(filename_ccn[0])
-            time_ccn = data0[0,:]
-            ccn = data0[1,:]
-            SS = data0[3,:]
-            ccn=qc_remove_neg(ccn)
-            timea=time_ccn
-            timeb=time_ccn
-            ccna=np.array(ccn)
-            ccnb=np.array(ccn)
-            idxa=np.logical_and(SS>0.05, SS<0.15)
-            ccna[idxa==False]=np.nan
-            SSa=0.1
-            idxb=np.logical_and(SS>0.45, SS<0.55)
-            ccnb[idxb==False]=np.nan
-            SSb=0.5
-        elif len(filename_ccn)==0:
-            timea=timem
-            SSa=np.nan*np.empty([len(timem)])
-            ccna=np.nan*np.empty([len(timem)])
-            timeb=timem
-            SSb=np.nan*np.empty([len(timem)])
-            ccnb=np.nan*np.empty([len(timem)])
-        else:
-            raise ValueError('find too many files')
-            
-            
-    
-    #%% make plot
-        
-    figname = figpath_aircraft_timeseries+'CCN_'+campaign+'_'+date+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-    ax1.plot(timea/3600,ccna,'k.',linewidth=1,label='OBS')
-    for mm in range(nmodels):
-        ax1.plot(timem2, ccn3_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    # ax1.set_yscale('log')
-    ax1.tick_params(color='k',labelsize=12)
-    ylim1 = ax1.get_ylim()
-    
-    ax2.plot(timeb/3600,ccnb,'k.',linewidth=1,label='OBS')
-    for mm in range(nmodels):
-        ax2.plot(timem2, ccn5_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    # ax2.set_yscale('log')
-    ax2.tick_params(color='k',labelsize=12)
-    ylim2 = ax2.get_ylim()
-    
-    # set ylimit consistent in subplots
-    ax1.set_ylim([ylim1[0], ylim2[1]])
-    ax2.set_ylim([ylim1[0], ylim2[1]])
-    
-    ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
-    ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
-    
-    # supersaturation
-    fig.text(0.67,0.9,'SS_obs='+format(np.nanmean(SSa),'.2f')+'%, SS_model='+SS3)
-    fig.text(0.67,0.4,'SS_obs='+format(np.nanmean(SSb),'.2f')+'%, SS_model='+SS5)
-    
-    ax2.set_xlabel('time (hour UTC) '+date,fontsize=14)
-    ax1.set_title('CCN Number Concentration (cm$^{-3}$)',fontsize=15)
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-    plt.close()
-    
\ No newline at end of file
diff --git a/python/plotting/plot_flight_timeseries_CN.py b/python/plotting/plot_flight_timeseries_CN.py
deleted file mode 100644
index 490902f..0000000
--- a/python/plotting/plot_flight_timeseries_CN.py
+++ /dev/null
@@ -1,173 +0,0 @@
-"""
-# plot aircraft track data
-# timeseries of aerosol number concentration (CN)
-# compare models and CPC measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_aircraft import read_cpc, read_RF_NCAR
-from read_netcdf import read_merged_size,read_extractflight
-from quality_control import qc_cpc_air
-
-#%% settings
-
-from settings import campaign,  Model_List, color_model, \
-    E3SM_aircraft_path, figpath_aircraft_timeseries
-
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP, cpcpath,merged_size_path
-elif campaign in ['CSET', 'SOCRATES']:
-    from settings import RFpath
-else:
-    raise ValueError('campaign name is not recognized: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_timeseries):
-    os.makedirs(figpath_aircraft_timeseries)
-   
-
-#%% find files for flight information
-if campaign in ['HISCALE', 'ACEENA']:
-    lst = glob.glob(merged_size_path+'merged_bin_*'+campaign+'*.nc')
-elif campaign in ['CSET', 'SOCRATES']:
-    lst = glob.glob(RFpath+'RF*.PNI.nc')
-else:
-    raise ValueError('campaign name is not recognized: '+campaign)
-lst.sort()
-
-if len(lst)==0:
-    raise ValueError('cannot find any file')
-  
-# choose files for specific IOP
-if campaign=='HISCALE':
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+campaign+'_')
-        lst = glob.glob(a[0]+'*'+IOP+'*')
-        lst.sort()
-elif campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+campaign+'_')
-        lst = glob.glob(a[0]+'*'+IOP+'*')
-        lst.sort()
-    
-# for each flight
-for filename in lst:
-    
-    #%% read in flight data (for HISCALE and ACEENA)
-    if campaign in ['HISCALE', 'ACEENA']:
-        # get date info:        
-        date=filename[-12:-3]
-        if date[-1]=='a':
-            flightidx=1
-        else:
-            flightidx=2
-    
-        #% read in flight information
-        (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
-        time=np.ma.compressed(time)
-        if campaign=='HISCALE':
-            filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_HiScale001s.ict.txt')
-        elif campaign=='ACEENA':
-            filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_ACEENA001s.ict')    
-        filename_c.sort()
-        # read in data
-        if len(filename_c)==1 or len(filename_c)==2: # some days have two flights
-            (cpc,cpclist)=read_cpc(filename_c[flightidx-1])
-            if np.logical_and(campaign=='ACEENA', date=='20180216a'):
-                cpc=np.insert(cpc,1404,(cpc[:,1403]+cpc[:,1404])/2,axis=1)
-            time_cpc = cpc[0,:]
-            cpc10 = cpc[1,:]
-            cpc3 = cpc[2,:]
-        elif len(filename_c)==0:
-            time_cpc=time
-            cpc10=np.nan*np.empty([len(time)])
-            cpc3=np.nan*np.empty([len(time)])
-        else:
-            raise ValueError('find too many files')
-        # some quality checks
-        (cpc3,cpc10) = qc_cpc_air(cpc3,cpc10)
-        
-    #%% read in flight data (for CSET and SOCRATES)
-    elif campaign in ['CSET', 'SOCRATES']:
-        fname=filename.split('.')
-        date=fname[-4]
-        (time_cpc,cpc10,timeunit,cpc10unit,cpc10longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCN')
-        if campaign=='CSET':
-            (time_cpc,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCU100_RWOOU')
-        elif campaign=='SOCRATES':
-            # there are two variables: CONCU100_CVIU and CONCU100_LWII
-            (time_cpc,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCU100_LWII')
-        
-    #%% read in Models
-    nmodels=len(Model_List)
-    cpc100_m = []
-    cpc10_m = []
-    cpc3_m = []
-    for mm in range(nmodels):
-        filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-    
-        (timem,heightm,cpc_m,timeunitm,ncn_unit,ncn_longname)=read_extractflight(filename_m,'NCN')
-        (timem,heightm,cpcu_m,timeunitm,ncnu_unit,ncnu_longname)=read_extractflight(filename_m,'NUCN')
-        (timem,heightm,ncnall,timeunitm,ncnall_unit,ncnall_longname)=read_extractflight(filename_m,'NCNall')
-        # if len(cpc_m)!=cpc.shape[1]:
-        #     print('CPC and MAM have different dimensions! check')
-        #     print(cpc.shape,cpc_m.shape)
-        #     errors
-        cpc100_m.append(np.sum(ncnall[100:,:],0)*1e-6) # #/m3 to #/cm3
-        cpc10_m.append(cpc_m*1e-6) # #/m3 to #/cm3
-        cpc3_m.append(cpcu_m*1e-6) # #/m3 to #/cm3
-    
-    timem2 = timem/3600
-    
-    #%% make plot
-        
-    figname = figpath_aircraft_timeseries+'CN_'+campaign+'_'+date+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-    ax1.plot(time_cpc/3600,cpc10,color='k',linewidth=1,label='CPC(>10nm)')
-    for mm in range(nmodels):
-        ax1.plot(timem2, cpc10_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    ax1.set_yscale('log')
-    ax1.tick_params(color='k',labelsize=12)
-    ylim1 = ax1.get_ylim()
-    
-    if campaign in ['HISCALE', 'ACEENA']:
-        ax2.plot(time_cpc/3600,cpc3,color='k',linewidth=1,label='CPC(>3nm)')
-        for mm in range(nmodels):
-            ax2.plot(timem2, cpc3_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    elif campaign in ['CSET', 'SOCRATES']:
-        ax2.plot(time_cpc/3600,uhsas100,color='k',linewidth=1,label='UHSAS(>100nm)')
-        for mm in range(nmodels):
-            ax2.plot(timem2, cpc100_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    ax2.set_yscale('log')
-    ax2.tick_params(color='k',labelsize=12)
-    ylim2 = ax2.get_ylim()
-    
-    # set ylimit consistent in subplots
-    ax1.set_ylim([max(1,min(ylim1[0],ylim2[0])), max(ylim1[1],ylim2[1])])
-    ax2.set_ylim([max(1,min(ylim1[0],ylim2[0])), max(ylim1[1],ylim2[1])])
-    
-    ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
-    ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
-    
-    ax2.set_xlabel('time (hour UTC) '+date,fontsize=14)
-    ax1.set_title('Aerosol Number Concentration (cm$^{-3}$)',fontsize=15)
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-    # plt.close()
\ No newline at end of file
diff --git a/python/plotting/plot_flight_track_height.py b/python/plotting/plot_flight_track_height.py
deleted file mode 100644
index 24eb9b0..0000000
--- a/python/plotting/plot_flight_track_height.py
+++ /dev/null
@@ -1,250 +0,0 @@
-"""
-# plot aircraft information
-# plot 1: plot flight track location (lat/lon) with height in color
-# plot 2: plot timeseries of flight height with cloud and CVI flags
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_ARMdata import read_cvi_aceena
-from specific_data_treatment import lwc2cflag
-from time_format_change import hhmmss2sec
-from read_aircraft import read_iwg1, read_cvi_hiscale, read_RF_NCAR
-
-
-#%% settings
-
-from settings import campaign, lat0, lon0, figpath_aircraft_timeseries
-
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP, iwgpath, cvipath
-elif campaign in ['CSET', 'SOCRATES']:
-    from settings import RFpath
-else:
-    raise ValueError('campaign name is not recognized: '+campaign)
-    
-if not os.path.exists(figpath_aircraft_timeseries):
-    os.makedirs(figpath_aircraft_timeseries)
-    
-
-#%% find all flight data
-
-if campaign=='HISCALE':
-    lst = glob.glob(iwgpath+'*a2.txt')
-    lst.sort()
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+campaign+'_')
-        lst = glob.glob(a[0]+'*'+IOP+'*')
-elif campaign=='ACEENA':
-    lst = glob.glob(iwgpath+'*a2.txt')
-    lst.sort()
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+campaign+'_')
-        lst = glob.glob(a[0]+'*'+IOP+'*')
-elif campaign in ['CSET', 'SOCRATES']:
-    lst = glob.glob(RFpath+'RF*.PNI.nc')
-else:
-    raise ValueError('campaign name is not recognized: '+campaign)
-lst.sort()
-
-#%% read in data and make plot
-for filename in lst:
-    
-    # get date info:        
-    fname=filename.split('.')
-    
-    #%% read in flight data (for HISCALE and ACEENA)
-    if campaign in ['HISCALE', 'ACEENA']:
-        date=fname[-3]
-        if date[-1]=='a':
-            flightidx=1
-        else:
-            flightidx=2
-        
-        # read in IWG data
-        (iwg,iwgvars)=read_iwg1(filename)
-        timelen = len(iwg)
-        if np.logical_and(campaign=='ACEENA', date=='20180216a'):
-            iwg.insert(1403,list(iwg[1403]))
-            tstr=iwg[1403][1]
-            tstr=tstr[0:-1]+str(int(tstr[-1])-1)
-            iwg[1403][1]=tstr
-            del iwg[-1]
-        # get lat, lon, height, time
-        lon=np.empty(timelen)
-        lat=np.empty(timelen)
-        height=np.empty(timelen)
-        time=np.empty(timelen)
-        cldflag=np.empty(timelen)
-        legnum=np.full(timelen,0)
-        for t in range(timelen):
-            lat[t]=float(iwg[t][2])
-            lon[t]=float(iwg[t][3])
-            height[t]=float(iwg[t][4])
-            cldflag[t]=int(iwg[t][35])
-            legnum[t]=int(iwg[t][-1])
-            timestr=iwg[t][1].split(' ')
-            time[t]=hhmmss2sec(timestr[1])
-        datestr=timestr[0]
-        
-        # read in CVI
-        if campaign=='HISCALE':
-            filename_c=glob.glob(cvipath+'CVI_G1_'+date[0:8]+'*R4_HISCALE_001s.ict.txt')
-            filename_c.sort()
-            # read in data
-            if len(filename_c)==1 or len(filename_c)==2:
-                (cvi,cvilist)=read_cvi_hiscale(filename_c[flightidx-1])
-                time_cvi = cvi[0,:]
-                cvi_inlet=cvi[-1,:]
-                if all(time_cvi==time)==False:
-                    raise ValueError('time dimension is incosistent')
-            elif len(filename_c)==0:
-                time_cvi=time
-                cvi_inlet=np.nan*np.empty([len(time)])
-            else:
-                raise ValueError('find too many files: '+filename_c)
-                
-        elif campaign=='ACEENA':
-            filename_c=glob.glob(cvipath+'enaaafinletcviF1.c1.'+date[0:8]+'*.nc')
-            filename_c.sort()
-            # read in data
-            if len(filename_c)==1:
-                (time_c,lon_c,lat_c,alt_c,timeunit_c,cvimode,cvi_inlet,enhance_factor,dilution_factor)=read_cvi_aceena(filename_c[0])
-                if date=='20180216a':
-                    time_c=np.insert(time_c,1403,(time_c[1402]+time_c[1403])/2)
-                    cvi_inlet=np.insert(cvi_inlet,1403,cvi_inlet[1403])
-                if all(time_c==time)==False:
-                    raise ValueError('time dimension is incosistent')
-            elif len(filename_c)==0:
-                time_cvi=time
-                cvi_inlet=np.nan*np.empty([len(time)])
-            else:
-                raise ValueError('find too many files: '+filename_c)
-            # cvi_inlet[cvi_inlet==-9]=1  # if cvi_inlet is unfunctional, use fims as good data
-        
-        else:
-            raise ValueError('do not recognize this campaign: '+campaign)
-    
-    #%% read in flight data (for CSET and SOCRATES)
-    elif campaign in ['CSET', 'SOCRATES']:
-        date=fname[-4]
-        print('input data for '+date)
-        (time,height,timeunit,hunit,hlongname,cellsize,cellunit)=read_RF_NCAR(filename,'ALT')
-        (time,lat,timeunit,latunit,latlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LAT')
-        (time,lon,timeunit,lonunit,lonlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LON')
-        (time,lwc,timeunit,lwcunit,lwclongname,cellsize,cellunit)=read_RF_NCAR(filename,'PLWCC')
-        # lon[lon<0]=lon[lon<0]+360
-        # calculate cloud flag based on LWC
-        cldflag=lwc2cflag(lwc,lwcunit)
-        if campaign=='SOCRATES':
-            (time,cvi_inlet,timeunit,cviunit,cvilongname,cellsize,cellunit)=read_RF_NCAR(filename,'CVINLET')
-        else:
-            cvi_inlet=np.nan*np.empty([len(time)])
-       
-    
-    #%% plot flight tracks:
-    lat[lat<-9000]=np.nan
-    lon[lon<-9000]=np.nan
-    height[height<-9000]=np.nan
-    
-    # change longitude to [-180, 180]
-    if lon0>180:
-        lon0=lon0-360
-        
-    try:
-    #     os.environ['PROJ_LIB'] = r'c:\Users\tang357\Anaconda3\pkgs\basemap-1.3.0-py38ha7665c8_0\Library\share'
-    #     from mpl_toolkits.basemap import Basemap
-    #     figname = figpath_aircraft_timeseries + 'flighttrack_'+campaign+'_'+date+'.png'
-    #     print('plot flight track to '+figname)
-    #     fig,ax = plt.subplots(figsize=(8,5))   # figsize in inches
-    #     plt.tight_layout(pad=0.1, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    #     if campaign in ['CSET', 'SOCRATES']:
-    #         m = Basemap(llcrnrlon=min(np.floor(min(lon)),np.floor(lon0))-2,llcrnrlat=min(np.floor(min(lat)),np.floor(lat0))-2,\
-    #             urcrnrlon=max(np.ceil(max(lon)),np.ceil(lon0))+2,urcrnrlat=max(np.ceil(max(lat)),np.ceil(lat0))+2,\
-    #             resolution='l',rsphere=(6378137.00,6356752.3142),projection='lcc',lat_0=np.min(lat),lon_0=np.min(lon)) #,lat_ts=5.)
-    #         m.drawparallels(np.arange(-90,90,5),labels=[1,0,0,0])
-    #         m.drawmeridians(np.arange(-180,180,5),labels=[0,0,0,1])
-    #         m.drawcoastlines()
-    #         m.fillcontinents()
-    #     elif campaign=='HISCALE':
-    #         m = Basemap(llcrnrlon=-99,llcrnrlat=35,urcrnrlon=-95,urcrnrlat=38,\
-    #             resolution='l',rsphere=(6378137.00,6356752.3142),projection='lcc',lat_0=lat0,lon_0=lon0) #,lat_ts=5.)
-    #         m.drawparallels(np.arange(30,40,1),labels=[1,0,0,0])
-    #         m.drawmeridians(np.arange(-110,-90,1),labels=[0,0,0,1])
-    #         m.drawstates()
-    #         x2,y2=m(lon0,lat0)
-    #         m.scatter(x2,y2,s=100,marker='*',color='k')
-    #     elif campaign=='ACEENA':
-    #         m = Basemap(llcrnrlon=-30,llcrnrlat=37,urcrnrlon=-25,urcrnrlat=41,\
-    #             resolution='l',rsphere=(6378137.00,6356752.3142),projection='lcc',lat_0=lat0,lon_0=lon0) #,lat_ts=5.)
-    #         m.drawparallels(np.arange(30,42,1),labels=[1,0,0,0])
-    #         m.drawmeridians(np.arange(-30,-20,1),labels=[0,0,0,1])
-    #         m.drawcoastlines()
-    #         m.fillcontinents()
-    #         x2,y2=m(lon0,lat0)
-    #         m.scatter(x2,y2,s=100,marker='*',color='k')
-    #     x, y = m(lon,lat)
-    #     h=m.scatter(x,y,s=1,c=height,cmap='jet')
-    #     ax.set_title('Flight track '+date,fontsize=15)
-    #     cbar=fig.colorbar(h)
-    # except:
-        figname = figpath_aircraft_timeseries + 'flighttrack_'+campaign+'_'+date+'.png'
-        print('plot flight track to '+figname)
-        fig,ax = plt.subplots(figsize=(8,5))   # figsize in inches
-        # plot the location of the campaign site:
-        ax.plot([lon0,lon0],[lat0-50, lat0+50],':',color=[.8,.8,.8])
-        ax.plot([lon0-50, lon0+50],[lat0,lat0],':',color=[.8,.8,.8])
-        # plot flight track
-        h=ax.scatter(lon,lat,s=1,c=height,cmap='jet',vmin=0,vmax=max(height))  #vmin/vmax: color range
-        ax.set_xlim(min(np.floor(min(lon)),np.floor(lon0)), max(np.ceil(max(lon)),np.ceil(lon0)))
-        ax.set_ylim(min(np.floor(min(lat)),np.floor(lat0)), max(np.ceil(max(lat)),np.ceil(lat0)))
-        ax.tick_params(color='k',labelsize=14)
-        ax.set_xlabel('longitude',fontsize=14)
-        ax.set_ylabel('latitude',fontsize=14)
-        ax.set_title('Flight track '+date,fontsize=15)
-        cbar=fig.colorbar(h)
-        fig.text(0.81,0.91, 'm MSL')
-    except:
-        raise ValueError("cannot make flight track plot")
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-    plt.close()
-        
-    #%% plot flight height and flag/leg timeseries
-    figname = figpath_aircraft_timeseries + 'flightheight_'+campaign+'_'+date+'.png'
-    print('plot flight height timeseries to '+figname)
-    
-    fig,ax1 = plt.subplots(figsize=(8,2))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-    h11=ax1.plot(time/3600,height/1000,color='k',linewidth=1)
-    if campaign in ['HISCALE', 'ACEENA']:
-        for ll in range(1,max(legnum)+1):
-            idx=legnum==ll
-            ax1.plot(time[idx]/3600,height[idx]/1000,color='b',linewidth=2)
-    h12=ax1.plot(time/3600,time*0+max(height)*0.00105,color='k',linewidth=.2)
-    cvi2=0.0*cvi_inlet
-    cvi2[cvi_inlet==1]=np.nan
-    cvi2=cvi2+max(height)*0.00105
-    h13=ax1.plot(time/3600,cvi2,color='k',linewidth=2)
-    h14=ax1.vlines(time[cldflag==1]/3600,0,max(height)*0.0011,color='silver',linewidth=0.1)
-    # ax1.set_xlim(time[0]/3600-0.3, time[-1]/3600+0.3)
-    ax1.set_ylim(0,max(height)*0.0011)
-    ax1.set_ylabel('height (km)',fontsize=12)
-    ax1.set_xlabel('time (hour UTC) '+date,fontsize=12)
-    ax1.set_title('thin black: flight track. blue: flight legs. gray vertical lines: cloud flag. thick black: CVI mode', fontsize=10)
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-    plt.close()
-    
\ No newline at end of file
diff --git a/python/plotting/plot_profile_cloud.py b/python/plotting/plot_profile_cloud.py
deleted file mode 100644
index 44d99e4..0000000
--- a/python/plotting/plot_profile_cloud.py
+++ /dev/null
@@ -1,120 +0,0 @@
-"""
-# plot vertical profile of cloud fraction
-# for each day of selected IOP
-# compare models and surface measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import timeunit2cday,yyyymmdd2cday,cday2mmdd
-from read_ARMdata import read_armbe
-from read_netcdf import read_E3SM_z
-
-#%% settings
-
-from settings import campaign, armbepath, Model_List, IOP, start_date, end_date,  \
-    lon0, E3SM_profile_path, figpath_profile_timeseries
-
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-    
-if not os.path.exists(figpath_profile_timeseries):
-    os.makedirs(figpath_profile_timeseries)
-
-
-#%% read in obs data
-if campaign=='ACEENA':
-    if IOP=='IOP1':
-        filename_armbe = armbepath+'enaarmbecldradC1.c1.20170101.003000.nc'
-        year='2017'
-    elif IOP=='IOP2':
-        filename_armbe = armbepath+'enaarmbecldradC1.c1.20180101.003000.nc'
-        year='2018'
-elif campaign=='HISCALE':  
-    filename_armbe = armbepath+'sgparmbecldradC1.c1.20160101.003000.nc'
-    year='2016'
-        
-(time0,height0,cld0,time0unit,cld0unit) = read_armbe(filename_armbe,'cld_frac')
-
-
-time0=time0/86400.+timeunit2cday(time0unit)
-if campaign=='HISCALE':
-    # observation is leap year. change the time for comparison with noleap model output. 
-    # note that it is not suitable for January and February
-    time0=time0-1   
-    
-#%% read in model
-
-cldm = []
-nmodels = len(Model_List)
-for mm in range(nmodels):
-    timem=np.empty(0)
-    for cday in range(cday1,cday2+1):
-        mmdd=cday2mmdd(cday)
-        date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        
-        filename_input = E3SM_profile_path+'Profile_vars_'+campaign+'_'+Model_List[mm]+'.'+date+'.nc'
-        (time,height,data,timemunit,dataunit,long_name)=read_E3SM_z(filename_input,'CLOUD')
-        
-        timem = np.hstack((timem,time))
-        if cday==cday1:
-            datam=data*100
-        else:
-            datam = np.vstack((datam,data*100))
-    
-    data=data*100.
-    dataunit='%'
-    cldm.append(datam)
-
-# change to local solar time
-timeshift = lon0/360*24
-if timeshift>12:
-    timeshift=timeshift-24
-time0 = time0+timeshift/24.
-timem = timem+timeshift/24.
-
-#%% plot cloud for each day in time_range
-
-
-for cday in range(cday1,cday2+1):
-    idxo=np.logical_and(time0>cday-0.1, time0<cday+1.1)
-    idxm=np.logical_and(timem>cday-0.1, timem<cday+1.1)
-    
-    mmdd = cday2mmdd(cday)
-    figname = figpath_profile_timeseries+'cloudfraction_'+campaign+'_'+year+mmdd+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,ax = plt.subplots(nmodels+1,1,figsize=(6,2*nmodels+1))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-
-    h0=ax[0].contourf(time0[idxo],height0/1000,cld0[idxo,:].T,np.arange(0,101,10),cmap=plt.get_cmap('jet'))
-    ax[0].set_title('Cloud Fraction (%) Obs')
-    ax[0].set_ylim(0,15)
-    ax[0].set_xlim(cday,cday+1)
-    ax[0].set_xticks(np.arange(cday,cday+1.01,0.125))
-    ax[0].set_xticklabels([])
-    ax[0].set_ylabel('Height (km)')
-    for mm in range(nmodels):
-        ax[mm+1].contourf(timem[idxm],height/1000,cldm[mm][idxm,:].T,np.arange(0,101,10),cmap=plt.get_cmap('jet'))
-        ax[mm+1].set_title(Model_List[mm])
-        ax[mm+1].set_ylim(0,15)
-        ax[mm+1].set_xlim(cday,cday+1)
-        ax[mm+1].set_xticks(np.arange(cday,cday+1.01,0.125))
-        ax[mm+1].set_xticklabels([])
-        ax[mm+1].set_ylabel('Height (km)')
-        
-    ax[-1].set_xticklabels(['00','03','06','09','12','15','18','21','24'])
-    ax[-1].set_xlabel('Local Solar Time for ' + campaign +' day '+year+mmdd)
-    
-    cax = plt.axes([1.01, 0.2, 0.02, 0.6])
-    cbar=fig.colorbar(h0, cax=cax)
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-    plt.close()
diff --git a/python/plotting/plot_sfc_diurnalcycle_AerosolComposition.py b/python/plotting/plot_sfc_diurnalcycle_AerosolComposition.py
deleted file mode 100644
index d9c0336..0000000
--- a/python/plotting/plot_sfc_diurnalcycle_AerosolComposition.py
+++ /dev/null
@@ -1,193 +0,0 @@
-"""
-# plot surface diurnal cycle of aerosol composition
-# compare models and surface measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import yyyymmdd2cday,cday2mmdd
-from read_ARMdata import read_acsm
-from read_netcdf import read_E3SM
-from specific_data_treatment import  avg_time_1d
-from quality_control import qc_remove_neg,qc_acsm_org_max
-
-#%% settings
-
-from settings import campaign, acsmpath, Model_List, color_model, \
-    IOP, start_date, end_date, E3SM_sfc_path, figpath_sfc_timeseries
-
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-    
-if not os.path.exists(figpath_sfc_timeseries):
-    os.makedirs(figpath_sfc_timeseries)
-    
-            
-#%% read in obs data
-if campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst = glob.glob(acsmpath+'enaaosacsmC1.a1.201706*') + glob.glob(acsmpath+'enaaosacsmC1.a1.201707*')
-    elif IOP=='IOP2':
-        lst = glob.glob(acsmpath+'enaaosacsmC1.a1.201801*') + glob.glob(acsmpath+'enaaosacsmC1.a1.201802*')
-    lst.sort()
-elif campaign=='HISCALE':  
-    if IOP=='IOP1':
-        lst = glob.glob(acsmpath+'sgpaosacsmC1.b1.201604*') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201605*') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201606*')
-    elif IOP=='IOP2':
-        lst = glob.glob(acsmpath+'sgpaosacsmC1.b1.201608*.cdf') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201609*.cdf')
-    lst.sort()
-    
-t_obs=np.empty(0)
-so4_obs=np.empty(0)
-org_obs=np.empty(0)
-for filename in lst:
-    (times_obs,so4sfc,timeunit,so4sfcunit)=read_acsm(filename,'sulfate')
-    (times_obs,orgsfc,timeunit,orgsfcunit)=read_acsm(filename,'total_organics')
-    timestr=timeunit.split(' ')
-    date=timestr[2]
-    cday=yyyymmdd2cday(date,'noleap')
-    # average in time for quicker plot
-    time2=np.arange(0,86400,3600)
-    so42 = avg_time_1d(np.array(times_obs),np.array(so4sfc),time2)
-    org2 = avg_time_1d(np.array(times_obs),np.array(orgsfc),time2)
-    t_obs=np.hstack((t_obs, cday+time2/86400))
-    so4_obs=np.hstack((so4_obs, so42))
-    org_obs=np.hstack((org_obs, org2))
-
-so4_obs=qc_remove_neg(so4_obs)
-org_obs=qc_remove_neg(org_obs)
-org_obs=qc_acsm_org_max(org_obs)
-    
-    
-
-#%% read in models
-nmodels = len(Model_List)
-model_org = list()
-model_so4 = list()
-
-for mm in range(nmodels):
-    so4varname=['so4_a1','so4_a2','so4_a3']
-    orgvarname=['soa_a1','soa_a2','soa_a3','pom_a1','pom_a3','pom_a4',\
-                'mom_a1','mom_a2','mom_a3','mom_a4']
-    if Model_List[mm]=='NucSoaCond':
-        so4varname.append('so4_a5')
-        orgvarname.append('soa_a5')
-
-    timem2 = np.array([])
-    tmp_so4 = np.empty(0)
-    tmp_org = np.empty(0)
-    ps = np.empty(0)
-    ts = np.empty(0)
-    for cday in range(cday1,cday2+1):
-        mmdd=cday2mmdd(cday)
-        date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        filename_input = E3SM_sfc_path+'SFC_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        
-        (timem,so4all,timeunitm,so4unit,so4name)=read_E3SM(filename_input,so4varname)
-        (timem,orgall,timeunitm,orgunit,orgname)=read_E3SM(filename_input,orgvarname)
-        (timem,[psm,tsm],timeunitm,varunit,varlongname)=read_E3SM(filename_input,['PS','T'])
-        
-        tmp_so4 = np.hstack((tmp_so4,sum(so4all)))
-        tmp_org = np.hstack((tmp_org,sum(orgall)))
-        ps = np.hstack((ps,psm))
-        ts = np.hstack((ts,tsm))
-        timem2 = np.hstack((timem2,timem))
-    
-    model_org.append(tmp_org)
-    model_so4.append(tmp_so4)
-
-# change E3SM unit from kg/kg to ug/m3 
-rho = ps/287.06/ts
-
-for mm in range(nmodels):
-    model_so4[mm]=model_so4[mm]*1e9*rho
-    model_org[mm]=model_org[mm]*1e9*rho
-    
-#%% calculate diurnal cycle
-days = np.arange(cday1, cday2+1)
-
-time_dc = np.arange(30,1440.,60)
-so4_o_dc = np.full((len(time_dc),len(days)),np.nan)
-org_o_dc = np.full((len(time_dc),len(days)),np.nan)
-n_valid = list()
-if len(so4_obs)>1: # not NaN
-    for dd in range(len(days)):
-        for tt in range(len(time_dc)):
-            time_tmp = days[dd]+time_dc[tt]/1440.
-            idx = np.abs(t_obs-time_tmp).argmin()
-            if (t_obs[idx]-time_tmp)*1440 <= 30:    
-                so4_o_dc[tt,dd] = so4_obs[idx]
-if len(org_obs)>1:
-    for dd in range(len(days)):
-        for tt in range(len(time_dc)):
-            time_tmp = days[dd]+time_dc[tt]/1440.
-            idx = np.abs(t_obs-time_tmp).argmin()
-            if (t_obs[idx]-time_tmp)*1440 <= 30:    
-                org_o_dc[tt,dd] = org_obs[idx]
-so4_o_dc = np.nanmean(so4_o_dc,1)
-org_o_dc = np.nanmean(org_o_dc,1)
-
-# for E3SM data
-so4_m_dc = []
-org_m_dc = []
-for mm in range(nmodels):
-    tmp_so4 = np.full((24,len(days)),np.nan)
-    tmp_org = np.full((24,len(days)),np.nan)
-    for dd in range(len(days)):
-        idx=np.logical_and(timem2>=days[dd], timem2<days[dd]+1)
-        tmp_so4[:,dd] = model_so4[mm][idx]
-        tmp_org[:,dd] = model_org[mm][idx]
-    so4_m_dc.append(np.nanmean(tmp_so4,1))
-    org_m_dc.append(np.nanmean(tmp_org,1))
-    
-#%% make plot
-    
-figname = figpath_sfc_timeseries+'diurnalcycle_AerosolComposition_'+campaign+'_'+IOP+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2) = plt.subplots(2,1,figsize=(6,4))   # figsize in inches
-plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-
-ax1.plot(time_dc/60,so4_o_dc,color='k',linewidth=1,label='OBS (SO4)')
-for mm in range(nmodels):
-    ax1.plot(time_dc/60, so4_m_dc[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax1.tick_params(color='k',labelsize=12)
-ylim1 = ax1.get_ylim()
-
-ax2.plot(time_dc/60,org_o_dc,color='k',linewidth=1,label='OBS (ORG)')
-for mm in range(nmodels):
-    ax2.plot(time_dc/60, org_m_dc[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax2.tick_params(color='k',labelsize=12)
-ylim2 = ax2.get_ylim()
-
-ax1.set_xlim(0,24)
-ax2.set_xlim(0,24)
-ax1.set_xticks(np.arange(0,24,3))
-ax2.set_xticks(np.arange(0,24,3))
-
-# set ylimit consistent in subplots
-# ax1.set_yticks([10,100,1000,10000,100000])
-# ax2.set_yticks([10,100,1000,10000,100000])
-# ax1.set_yscale('log')
-# ax2.set_yscale('log')
-# ax1.set_ylim([ylim1[0], ylim2[1]])
-# ax2.set_ylim([ylim1[0], ylim2[1]])
-
-
-ax1.legend(loc='center right', shadow=False, fontsize='medium',bbox_to_anchor=(1.3, .5))
-ax2.legend(loc='center right', shadow=False, fontsize='medium',bbox_to_anchor=(1.3, .5))
-
-ax2.set_xlabel('Hour (UTC)',fontsize=12)
-ax1.set_title('Aerosol Sulfate and Organic Concentration ($\mu$g/m$^3$) '+campaign+' '+IOP,fontsize=14)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
\ No newline at end of file
diff --git a/python/plotting/plot_sfc_diurnalcycle_CCN.py b/python/plotting/plot_sfc_diurnalcycle_CCN.py
deleted file mode 100644
index 761517b..0000000
--- a/python/plotting/plot_sfc_diurnalcycle_CCN.py
+++ /dev/null
@@ -1,198 +0,0 @@
-"""
-# plot surface diurnal cycle of aerosol size distribution
-# compare models and surface measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import yyyymmdd2cday,cday2mmdd
-from read_ARMdata import read_ccn
-from read_surface import read_CCN_hiscale_IOP1, read_CCN_hiscale_IOP2
-from read_netcdf import read_E3SM
-from quality_control import qc_remove_neg,qc_mask_qcflag,qc_ccn_max
-
-#%% settings
-
-from settings import campaign, ccnsfcpath,  Model_List, color_model, \
-    IOP, start_date, end_date, E3SM_sfc_path, figpath_sfc_timeseries
-
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-    
-if not os.path.exists(figpath_sfc_timeseries):
-    os.makedirs(figpath_sfc_timeseries)
-    
-#%% read in obs data
-if campaign=='ACEENA':
-    # cpc
-    if IOP=='IOP1':
-        lst = glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201706*')+glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201707*')
-    elif IOP=='IOP2':
-        lst = glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201801*')+glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201802*')
-    lst.sort()
-    t_ccn=np.empty(0)
-    ccn=np.empty(0)
-    SS=np.empty(0)
-    for filename in lst:
-        (time,timeunit,data,qc,dataunit,SS0)=read_ccn(filename)
-        data=qc_mask_qcflag(data,qc)
-        timestr=timeunit.split(' ')
-        date=timestr[2]
-        cday=yyyymmdd2cday(date,'noleap')
-        t_ccn=np.hstack((t_ccn, cday+time/86400))
-        ccn=np.hstack((ccn, data))
-        SS=np.hstack((SS, SS0))
-    ccn=qc_remove_neg(ccn)
-    ccn=qc_ccn_max(ccn,SS)
-    # SS=0.1%
-    idx = np.logical_and(SS>0.05, SS<0.15)
-    t_ccna = t_ccn[idx]
-    ccna = ccn[idx]
-    SSa = 0.1
-    # SS=0.5%
-    idx = np.logical_and(SS>0.4, SS<0.6)
-    t_ccnb = t_ccn[idx]
-    ccnb = ccn[idx]
-    SSb = 0.5
-
-elif campaign=='HISCALE':  
-    if IOP=='IOP1':
-        (times_ccn,ccnsfc,sssfc,timeunit)=read_CCN_hiscale_IOP1(ccnsfcpath)
-        sssfc=[int(x*10) for x in sssfc]
-        sssfc=np.array(sssfc)/10.
-        times_ccn=np.array(times_ccn)
-        ccnsfc=np.array(ccnsfc)
-    elif IOP=='IOP2':
-        (times_ccn,ccnsfc,sssfc,timeunit)=read_CCN_hiscale_IOP2(ccnsfcpath)
-        sssfc=[int(x*10) for x in sssfc]
-        sssfc=np.array(sssfc)/10.
-        times_ccn=np.array(times_ccn)
-        ccnsfc=np.array(ccnsfc)
-    # find the nearest Supersaturation in Obs comparing to model
-    # 0.1%
-    idx = sssfc==0.1
-    ccna = ccnsfc[idx]
-    t_ccna = times_ccn[idx]
-    SSa = 0.1
-    # 0.5%
-    idx = sssfc==0.5
-    ccnb = ccnsfc[idx]
-    t_ccnb = times_ccn[idx]
-    SSb = 0.5
-    
-#%% read in models
-ccna_m = []
-ccnb_m = []
-nmodels = len(Model_List)
-for mm in range(nmodels):
-    tmp_CCN3=np.empty(0)
-    tmp_CCN5=np.empty(0)
-    timem=np.empty(0)
-    for cday in range(cday1,cday2+1):
-        mmdd=cday2mmdd(cday)
-        date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        
-        filename_input = E3SM_sfc_path+'SFC_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        (time,ccn3,timemunit,dataunit,ccn3_longname)=read_E3SM(filename_input,'CCN3')
-        (time,ccn5,timemunit,dataunit,ccn5_longname)=read_E3SM(filename_input,'CCN5')
-        
-        timem = np.hstack((timem,time))
-        tmp_CCN3 = np.hstack((tmp_CCN3,ccn3))
-        tmp_CCN5 = np.hstack((tmp_CCN5,ccn5))
-    
-    ccna_m.append(tmp_CCN3)
-    ccnb_m.append(tmp_CCN5)
-    
-    # get supersaturation
-    SS3 = ccn3_longname.split('=')[-1]
-    SS5 = ccn5_longname.split('=')[-1]
-    
-    
-    
-#%% calculate diurnal cycle
-days = np.arange(cday1, cday2+1)
-
-time_dc = np.arange(30,1440.,60)
-ccna_o_dc = np.full((len(time_dc),len(days)),np.nan)
-ccnb_o_dc = np.full((len(time_dc),len(days)),np.nan)
-n_valid = list()
-for dd in range(len(days)):
-    nn=0
-    for tt in range(len(time_dc)):
-        time_tmp = days[dd]+time_dc[tt]/1440.
-        idx = np.abs(t_ccna-time_tmp).argmin()
-        if (t_ccna[idx]-time_tmp)*1440 <= 30:    
-            ccna_o_dc[tt,dd] = ccna[idx]
-        idx = np.abs(t_ccnb-time_tmp).argmin()
-        if (t_ccnb[idx]-time_tmp)*1440 <= 30:    
-            ccnb_o_dc[tt,dd] = ccnb[idx]
-ccna_o_dc = np.nanmean(ccna_o_dc,1)
-ccnb_o_dc = np.nanmean(ccnb_o_dc,1)
-
-# for E3SM data
-ccna_m_dc = []
-ccnb_m_dc = []
-for mm in range(nmodels):
-    tmp_ccna = np.full((24,len(days)),np.nan)
-    tmp_ccnb = np.full((24,len(days)),np.nan)
-    for dd in range(len(days)):
-        idx=np.logical_and(timem>=days[dd], timem<days[dd]+1)
-        tmp_ccna[:,dd] = ccna_m[mm][idx]
-        tmp_ccnb[:,dd] = ccnb_m[mm][idx]
-    ccna_m_dc.append(np.nanmean(tmp_ccna,1))
-    ccnb_m_dc.append(np.nanmean(tmp_ccnb,1))
-    
-#%% make plot
-    
-figname = figpath_sfc_timeseries+'diurnalcycle_CCN_'+campaign+'_'+IOP+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2) = plt.subplots(2,1,figsize=(6,4))   # figsize in inches
-plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-
-ax1.plot(time_dc/60,ccna_o_dc,color='k',linewidth=1,label='Obs')
-for mm in range(nmodels):
-    ax1.plot(time_dc/60, ccna_m_dc[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax1.tick_params(color='k',labelsize=12)
-ylim1 = ax1.get_ylim()
-
-ax2.plot(time_dc/60,ccnb_o_dc,color='k',linewidth=1,label='Obs')
-for mm in range(nmodels):
-    ax2.plot(time_dc/60, ccnb_m_dc[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax2.tick_params(color='k',labelsize=12)
-ylim2 = ax2.get_ylim()
-
-# ax1.set_yticks([10,100,1000,10000,100000])
-# ax2.set_yticks([10,100,1000,10000,100000])
-ax1.set_xlim(0,24)
-ax2.set_xlim(0,24)
-ax1.set_xticks(np.arange(0,24,3))
-ax2.set_xticks(np.arange(0,24,3))
-
-# set ylimit consistent in subplots
-# ax1.set_yscale('log')
-# ax2.set_yscale('log')
-# ax1.set_ylim([ylim1[0], ylim2[1]])
-# ax2.set_ylim([ylim1[0], ylim2[1]])
-
-# supersaturation
-fig.text(0.1,0.92,'SS_obs='+format(np.nanmean(SSa),'.2f')+'%, SS_model='+SS3)
-fig.text(0.1,0.42,'SS_obs='+format(np.nanmean(SSb),'.2f')+'%, SS_model='+SS5)
-
-ax1.legend(loc='center right', shadow=False, fontsize='medium',bbox_to_anchor=(1.3, .5))
-ax2.legend(loc='center right', shadow=False, fontsize='medium',bbox_to_anchor=(1.3, .5))
-
-ax2.set_xlabel('Hour (UTC)',fontsize=12)
-ax1.set_title('CCN Number Concentration (cm$^{-3}$) '+campaign+' '+IOP,fontsize=14)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
diff --git a/python/plotting/plot_sfc_diurnalcycle_CN.py b/python/plotting/plot_sfc_diurnalcycle_CN.py
deleted file mode 100644
index 0f1def5..0000000
--- a/python/plotting/plot_sfc_diurnalcycle_CN.py
+++ /dev/null
@@ -1,210 +0,0 @@
-"""
-# plot surface diurnal cycle of aerosol size distribution
-# compare models and surface measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import yyyymmdd2cday,cday2mmdd
-from read_ARMdata import read_cpc
-from read_netcdf import read_E3SM
-from specific_data_treatment import  avg_time_1d
-from quality_control import qc_mask_qcflag_cpc
-
-#%% settings
-
-from settings import campaign, cpcsfcpath, cpcusfcpath, Model_List, color_model, \
-    IOP, start_date, end_date, E3SM_sfc_path, figpath_sfc_timeseries
-
-
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-    
-if not os.path.exists(figpath_sfc_timeseries):
-    os.makedirs(figpath_sfc_timeseries)
-    
-#%% read in obs data
-if campaign=='ACEENA':
-    # cpc
-    if IOP=='IOP1':
-        lst = glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.2017062*')+glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201707*')
-    elif IOP=='IOP2':
-        lst = glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201801*')+glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201802*')
-    lst.sort()
-    t_cpc=np.empty(0)
-    cpc=np.empty(0)
-    for filename in lst:
-        (time,data,qc,timeunit,cpcunit)=read_cpc(filename)
-        data=qc_mask_qcflag_cpc(data,qc)
-        timestr=timeunit.split(' ')
-        date=timestr[2]
-        cday=yyyymmdd2cday(date,'noleap')
-        # average in time for better comparison with obs
-        time2=np.arange(0,86400,3600)
-        data2 = avg_time_1d(np.array(time),np.array(data),time2)
-        t_cpc=np.hstack((t_cpc, cday+time2/86400))
-        cpc=np.hstack((cpc, data2))
-    # no cpcu
-    t_cpcu = np.array([np.nan])
-    cpcu = np.array([np.nan])
-    
-elif campaign=='HISCALE':  
-    # cpc
-    if IOP=='IOP1':
-        lst = glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201604*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201605*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201606*')
-    elif IOP=='IOP2':
-        lst = glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201608*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201609*')
-    lst.sort()
-    t_cpc=np.empty(0)
-    cpc=np.empty(0)
-    if len(lst)==0:
-        t_cpc = np.array([np.nan])
-        cpc = np.array([np.nan])
-    else:
-        for filename in lst:
-            (time,data,qc,timeunit,cpcunit)=read_cpc(filename)
-            data=qc_mask_qcflag_cpc(data,qc)
-            timestr=timeunit.split(' ')
-            date=timestr[2]
-            cday=yyyymmdd2cday(date,'noleap')
-            # average in time for better comparison with obs
-            time2=np.arange(0,86400,3600)
-            data2 = avg_time_1d(np.array(time),np.array(data),time2)
-            t_cpc=np.hstack((t_cpc, cday+time2/86400))
-            cpc=np.hstack((cpc, data2))
-  
-    # cpcu
-    if IOP=='IOP1':
-        lst = glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201604*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201605*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201606*')
-    elif IOP=='IOP2':
-        lst = glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201608*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201609*')
-    lst.sort()
-    t_cpcu=np.empty(0)
-    cpcu=np.empty(0)
-    if len(lst)==0:
-        t_cpcu = np.array([np.nan])
-        cpcu = np.array([np.nan])
-    else:
-        for filename in lst:
-            (time,data,qc,timeunit,cpcuunit)=read_cpc(filename)
-            data=qc_mask_qcflag_cpc(data,qc)
-            timestr=timeunit.split(' ')
-            date=timestr[2]
-            cday=yyyymmdd2cday(date,'noleap')
-            # average in time for better comparison with obs
-            time2=np.arange(0,86400,3600)
-            data2 = avg_time_1d(np.array(time),np.array(data),time2)
-            t_cpcu=np.hstack((t_cpcu, cday+time2/86400))
-            cpcu=np.hstack((cpcu, data2))
-    
-#%% read in models
-ncn_m = []
-nucn_m = []
-nmodels = len(Model_List)
-for mm in range(nmodels):
-    tmp_ncn=np.empty(0)
-    tmp_nucn=np.empty(0)
-    timem=np.empty(0)
-    for cday in range(cday1,cday2+1):
-        mmdd=cday2mmdd(cday)
-        date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        
-        filename_input = E3SM_sfc_path+'SFC_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        (time,ncn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCN')
-        (time,nucn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NUCN')
-        
-        timem = np.hstack((timem,time))
-        tmp_ncn = np.hstack((tmp_ncn,ncn*1e-6))
-        tmp_nucn = np.hstack((tmp_nucn,nucn*1e-6))
-    
-    ncn_m.append(tmp_ncn)
-    nucn_m.append(tmp_nucn)
-    
-#%% calculate diurnal cycle
-days = np.arange(cday1,cday2+1)
-
-time_dc = np.arange(30,1440.,60)
-cpc_o_dc = np.full((len(time_dc),len(days)),np.nan)
-cpcu_o_dc = np.full((len(time_dc),len(days)),np.nan)
-n_valid = list()
-if len(cpc)>1: # not NaN
-    for dd in range(len(days)):
-        for tt in range(len(time_dc)):
-            time_tmp = days[dd]+time_dc[tt]/1440.
-            idx = np.abs(t_cpc-time_tmp).argmin()
-            if (t_cpc[idx]-time_tmp)*1440 <= 30:    
-                cpc_o_dc[tt,dd] = cpc[idx]
-if len(cpcu)>1:
-    for dd in range(len(days)):
-        for tt in range(len(time_dc)):
-            time_tmp = days[dd]+time_dc[tt]/1440.
-            idx = np.abs(t_cpcu-time_tmp).argmin()
-            if (t_cpcu[idx]-time_tmp)*1440 <= 30:    
-                cpcu_o_dc[tt,dd] = cpcu[idx]
-cpc_o_dc = np.nanmean(cpc_o_dc,1)
-cpcu_o_dc = np.nanmean(cpcu_o_dc,1)
-
-# for E3SM data
-ncn_m_dc = []
-nucn_m_dc = []
-for mm in range(nmodels):
-    tmp_ncn = np.full((24,len(days)),np.nan)
-    tmp_nucn = np.full((24,len(days)),np.nan)
-    for dd in range(len(days)):
-        idx=np.logical_and(timem>=days[dd], timem<days[dd]+1)
-        tmp_ncn[:,dd] = ncn_m[mm][idx]
-        tmp_nucn[:,dd] = nucn_m[mm][idx]
-    ncn_m_dc.append(np.nanmean(tmp_ncn,1))
-    nucn_m_dc.append(np.nanmean(tmp_nucn,1))
-    
-#%% make plot
-    
-figname = figpath_sfc_timeseries+'diurnalcycle_CN_'+campaign+'_'+IOP+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2) = plt.subplots(2,1,figsize=(6,4))   # figsize in inches
-plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-
-ax1.plot(time_dc/60,cpc_o_dc,color='k',linewidth=1,label='CPC(>10nm)')
-for mm in range(nmodels):
-    ax1.plot(time_dc/60, ncn_m_dc[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax1.tick_params(color='k',labelsize=12)
-ylim1 = ax1.get_ylim()
-
-ax2.plot(time_dc/60,cpcu_o_dc,color='k',linewidth=1,label='CPC(>3nm)')
-for mm in range(nmodels):
-    ax2.plot(time_dc/60, nucn_m_dc[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax2.tick_params(color='k',labelsize=12)
-ylim2 = ax2.get_ylim()
-
-ax1.set_xlim(0,24)
-ax2.set_xlim(0,24)
-ax1.set_xticks(np.arange(0,24,3))
-ax2.set_xticks(np.arange(0,24,3))
-
-# set ylimit consistent in subplots
-# ax1.set_yticks([10,100,1000,10000,100000])
-# ax2.set_yticks([10,100,1000,10000,100000])
-# ax1.set_yscale('log')
-# ax2.set_yscale('log')
-# ax1.set_ylim([ylim1[0], ylim2[1]])
-# ax2.set_ylim([ylim1[0], ylim2[1]])
-
-
-ax1.legend(loc='center right', shadow=False, fontsize='medium',bbox_to_anchor=(1.3, .5))
-ax2.legend(loc='center right', shadow=False, fontsize='medium',bbox_to_anchor=(1.3, .5))
-
-ax2.set_xlabel('Hour (UTC)',fontsize=12)
-ax1.set_title('Aerosol Number Concentration (cm$^{-3}$) '+campaign+' '+IOP,fontsize=14)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
\ No newline at end of file
diff --git a/python/plotting/plot_sfc_pdf_AerosolSize.py b/python/plotting/plot_sfc_pdf_AerosolSize.py
deleted file mode 100644
index 398681b..0000000
--- a/python/plotting/plot_sfc_pdf_AerosolSize.py
+++ /dev/null
@@ -1,213 +0,0 @@
-"""
-# plot mean aerosol size ditribution for surface data
-# compare models and surface measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import yyyymmdd2cday, cday2mmdd
-from read_surface import read_smpsb_pnnl,read_smps_bin
-from read_ARMdata import read_uhsas, read_smps_bnl
-from read_netcdf import read_E3SM
-from specific_data_treatment import  avg_time_2d
-from quality_control import qc_mask_qcflag,qc_correction_nanosmps
-
-
-#%% settings
-
-from settings import campaign, Model_List, color_model, IOP, start_date, end_date, \
-            E3SM_sfc_path, figpath_sfc_statistics
-            
-if campaign=='ACEENA':
-    from settings import uhsassfcpath
-elif campaign=='HISCALE':
-    if IOP=='IOP1':
-        from settings import smps_bnl_path, nanosmps_bnl_path
-    elif IOP=='IOP2':
-        from settings import smps_pnnl_path
-    
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-
-if not os.path.exists(figpath_sfc_statistics):
-    os.makedirs(figpath_sfc_statistics)
-        
-#%% read in obs data
-if campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.2017062*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201707*')
-    elif IOP=='IOP2':
-        lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201801*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201802*')
-    lst.sort()
-    t_uhsas=np.empty(0)
-    uhsas=np.empty((0,99))
-    for filename in lst:
-        (time,dmin,dmax,data,timeunit,dataunit,long_name) = read_uhsas(filename)
-        timestr=timeunit.split(' ')
-        date=timestr[2]
-        cday=yyyymmdd2cday(date,'noleap')
-        # average in time for quicker plot
-        time2=np.arange(1800,86400,3600)
-        data2 = avg_time_2d(time,data,time2)
-        t_uhsas=np.hstack((t_uhsas, cday+time2/86400))
-        uhsas=np.vstack((uhsas, data2))
-    size_u = (dmin+dmax)/2
-    # change to dN/dlogDp
-    dlnDp_u=np.empty(99)
-    for bb in range(len(size_u)):
-        dlnDp_u[bb]=np.log10(dmax[bb]/dmin[bb])
-        uhsas[:,bb]=uhsas[:,bb]/dlnDp_u[bb]
-    
-    time = np.array(t_uhsas)
-    size = np.array(size_u)
-    obs = np.array(uhsas.T)
-    
-elif campaign=='HISCALE':    
-    if IOP=='IOP1':
-        lst = glob.glob(smps_bnl_path+'*.nc')
-        lst.sort()
-        t_smps=np.empty(0)
-        smps=np.empty((0,192))
-        for filename in lst:
-            (time,size,flag,timeunit,dataunit,smps_longname)=read_smps_bnl(filename,'status_flag')
-            (time,size,data,timeunit,smpsunit,smps_longname)=read_smps_bnl(filename,'number_size_distribution')
-            data=qc_mask_qcflag(data,flag)
-            timestr=timeunit.split(' ')
-            date=timestr[2]
-            cday=yyyymmdd2cday(date,'noleap')
-            # average in time for quicker plot
-            time2=np.arange(1800,86400,3600)
-            data2 = avg_time_2d(time,data,time2)
-            t_smps=np.hstack((t_smps, cday+time2/86400))
-            smps=np.vstack((smps, data2))
-        smps=smps.T
-        # combine with nanoSMPS
-        lst2 = glob.glob(nanosmps_bnl_path+'*.nc')
-        lst2.sort()
-        t_nano=np.empty(0)
-        nanosmps=np.empty((0,192))
-        for filename2 in lst2:
-            (timen,sizen,flagn,timenunit,datanunit,long_name)=read_smps_bnl(filename2,'status_flag')
-            (timen,sizen,datan,timenunit,nanounit,nanoname)=read_smps_bnl(filename2,'number_size_distribution')
-            datan=qc_mask_qcflag(datan,flagn)
-            timestr=timenunit.split(' ')
-            date=timestr[2]
-            cday=yyyymmdd2cday(date,'noleap')
-            # average in time for quicker plot
-            time2=np.arange(1800,86400,3600)
-            data2 = avg_time_2d(timen,datan,time2)
-            t_nano=np.hstack((t_nano, cday+time2/86400))
-            nanosmps=np.vstack((nanosmps, data2))
-        # nanosmps is overcounting, adjust nanosmps value for smooth transition to SMPS
-        nanosmps=qc_correction_nanosmps(nanosmps.T)
-        for tt in range(smps.shape[1]):
-            if any(t_nano==t_smps[tt]):
-                smps[0:80,tt]=nanosmps[0:80,t_nano==t_smps[tt]].reshape(80)
-        
-    elif IOP=='IOP2':
-        data=read_smpsb_pnnl(smps_pnnl_path+'HiScaleSMPSb_SGP_20160827_R1.ict')
-        size=read_smps_bin(smps_pnnl_path+'NSD_column_size_chart.txt')
-        time=data[0,:]
-        smps=data[1:-1,:]
-        flag=data[-1,:]
-        smps=qc_mask_qcflag(smps.T,flag).T
-        cday=yyyymmdd2cday('2016-08-27')
-        # average in time for quicker plot
-        time2=np.arange(time[0],time[-1]+1800,3600)
-        data2 = avg_time_2d(time,smps.T,time2)
-        t_smps=cday+time2/86400
-        smps=data2.T
-        
-    time = np.array(t_smps)
-    size = np.array(size)
-    obs = np.array(smps)  
-    
-    # SMPS is already divided by log10
-    
-else:
-    raise ValueError('does not recognize this campaign: '+campaign)
-    
-#%% read in models
-model = []
-nmodels = len(Model_List)
-for mm in range(nmodels):
-    tmp_data=np.empty((3000,0))
-    timem=np.empty(0)
-    for cday in range(cday1,cday2+1):
-        mmdd=cday2mmdd(cday)
-        date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        
-        filename_input = E3SM_sfc_path+'SFC_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        (time,ncn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCNall')
-        
-        timem = np.hstack((timem,time))
-        tmp_data = np.hstack((tmp_data,ncn*1e-6))
-    
-    # change to dN/dlog10Dp
-    for bb in range(3000):
-        dlnDp=np.log10((bb+2)/(bb+1))
-        tmp_data[bb,:]=tmp_data[bb,:]/dlnDp
-    
-    model.append(tmp_data)
-        
-#%% calculate mean pdf
-pdf_obs=np.nanmean(obs,1)
-pdf_model=[None]*nmodels
-for mm in range(nmodels):
-    pdf_model[mm]=np.nanmean(model[mm],1)
-
-#%%
-pct1_o = [np.nanpercentile(obs[i,:],10) for i in range(len(size))]
-pct2_o = [np.nanpercentile(obs[i,:],90) for i in range(len(size))]
-pct1_m = [[] for mm in range(nmodels)]
-pct2_m = [[] for mm in range(nmodels)]
-for mm in range(nmodels):
-    pct1_m[mm] = [np.nanpercentile(model[mm][i,:],10) for i in range(3000)]
-    pct2_m[mm] = [np.nanpercentile(model[mm][i,:],90) for i in range(3000)]
-
-# import scipy.stats as stats
-# sem_o = np.ma.filled(stats.sem(obs,1,nan_policy='omit'),np.nan)
-# sem_m = [[] for mm in range(nmodels)]
-# for mm in range(nmodels):
-#     sem_m[mm] = np.ma.filled(stats.sem(model[mm],1,nan_policy='omit'),np.nan)
-
-#%% make plot
-# not plotting data if the mean value is 0
-pdf_obs[pdf_obs==0] = np.nan
-
-figname = figpath_sfc_statistics+'pdf_AerosolSize_'+campaign+'_'+IOP+'.png'
-
-print('plotting figures to '+figname)
-
-fig,ax = plt.subplots(figsize=(4,2.5))   # figsize in inches
-
-ax.plot(size,pdf_obs,color='k',label='Obs')
-for mm in range(nmodels):
-    ax.plot(np.arange(1,3001),pdf_model[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-
-ax.fill_between(size,pct1_o,pct2_o, alpha=0.5, facecolor='gray')
-for mm in range(nmodels):
-    ax.fill_between(np.arange(1,3001),pct1_m[mm],pct2_m[mm], alpha=0.2, facecolor=color_model[mm])
-
-ax.legend(loc='upper right', shadow=False, fontsize='medium')
-ax.tick_params(color='k',labelsize=12)
-ax.set_xscale('log')
-ax.set_yscale('log')
-ax.set_ylim(0.01,1e4)
-ax.set_xlim(0.67,4500)
-ax.set_xlabel('Diameter (nm)',fontsize=13)
-ax.set_ylabel('#/dlnDp (cm$^{-3}$)',fontsize=13)
-ax.set_title(campaign+' '+IOP,fontsize=14)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-# plt.close()
-        
\ No newline at end of file
diff --git a/python/plotting/plot_sfc_pie_AerosolComposition.py b/python/plotting/plot_sfc_pie_AerosolComposition.py
deleted file mode 100644
index c738928..0000000
--- a/python/plotting/plot_sfc_pie_AerosolComposition.py
+++ /dev/null
@@ -1,174 +0,0 @@
-"""
-# plot surface aerosol composition in a pie plot
-# plot models and surface measurements separately
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import yyyymmdd2cday,cday2mmdd
-from read_ARMdata import read_acsm
-from read_netcdf import read_E3SM
-from quality_control import qc_remove_neg,qc_acsm_org_max
-
-#%% settings
-
-from settings import campaign, acsmpath, Model_List, \
-    IOP, start_date, end_date, E3SM_sfc_path, figpath_sfc_statistics
-
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-    
-import os
-if not os.path.exists(figpath_sfc_statistics):
-    os.makedirs(figpath_sfc_statistics)
-    
-            
-#%% read in obs data
-if campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst = glob.glob(acsmpath+'enaaosacsmC1.a1.201706*') + glob.glob(acsmpath+'enaaosacsmC1.a1.201707*')
-    elif IOP=='IOP2':
-        lst = glob.glob(acsmpath+'enaaosacsmC1.a1.201801*') + glob.glob(acsmpath+'enaaosacsmC1.a1.201802*')
-    lst.sort()
-elif campaign=='HISCALE':  
-    if IOP=='IOP1':
-        lst = glob.glob(acsmpath+'sgpaosacsmC1.b1.201604*') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201605*') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201606*')
-    elif IOP=='IOP2':
-        lst = glob.glob(acsmpath+'sgpaosacsmC1.b1.201608*.cdf') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201609*.cdf')
-    lst.sort()
-else:
-    raise ValueError('surface aerosol composition is only available in HISCALE or ACEENA. check: '+campaign)
-    
-t_obs=np.empty(0)
-so4_obs=np.empty(0)
-org_obs=np.empty(0)
-nh4_obs=np.empty(0)
-no3_obs=np.empty(0)
-chl_obs=np.empty(0)
-for filename in lst:
-    (times_obs,so4sfc,timeunit,so4sfcunit)=read_acsm(filename,'sulfate')
-    (times_obs,orgsfc,timeunit,orgsfcunit)=read_acsm(filename,'total_organics')
-    (times_obs,nh4sfc,timeunit,nh4sfcunit)=read_acsm(filename,'ammonium')
-    (times_obs,no3sfc,timeunit,no3sfcunit)=read_acsm(filename,'nitrate')
-    (times_obs,chlsfc,timeunit,chlsfcunit)=read_acsm(filename,'chloride')
-    timestr=timeunit.split(' ')
-    date=timestr[2]
-    cday=yyyymmdd2cday(date,'noleap')
-    so4_obs=np.hstack((so4_obs, so4sfc))
-    org_obs=np.hstack((org_obs, orgsfc))
-    nh4_obs=np.hstack((nh4_obs, nh4sfc))
-    no3_obs=np.hstack((no3_obs, no3sfc))
-    chl_obs=np.hstack((chl_obs, chlsfc))
-so4_obs=qc_remove_neg(so4_obs)
-nh4_obs=qc_remove_neg(nh4_obs)
-no3_obs=qc_remove_neg(no3_obs)
-chl_obs=qc_remove_neg(chl_obs)
-org_obs=qc_remove_neg(org_obs)
-org_obs=qc_acsm_org_max(org_obs)
-
-#%% read in models
-nmodels = len(Model_List)
-model_org = list()
-model_so4 = list()
-model_bc = list()
-model_dst = list()
-model_ncl = list()
-
-for mm in range(nmodels):
-    bcvarname=['bc_a1','bc_a3','bc_a4']
-    dstvarname=['dst_a1','dst_a3']
-    nclvarname=['ncl_a1','ncl_a2','ncl_a3']
-    so4varname=['so4_a1','so4_a2','so4_a3']
-    orgvarname=['soa_a1','soa_a2','soa_a3','pom_a1','pom_a3','pom_a4',\
-                'mom_a1','mom_a2','mom_a3','mom_a4']
-    if Model_List[mm]=='NucSoaCond':
-        so4varname.append('so4_a5')
-        orgvarname.append('soa_a5')
-
-    timem2 = np.array([])
-    tmp_so4 = np.empty(0)
-    tmp_org = np.empty(0)
-    tmp_bc = np.empty(0)
-    tmp_dst = np.empty(0)
-    tmp_ncl = np.empty(0)
-    ps = np.empty(0)
-    ts = np.empty(0)
-    for cday in range(cday1,cday2+1):
-        mmdd=cday2mmdd(cday)
-        date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        filename_input = E3SM_sfc_path+'SFC_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        
-        (timem,so4all,timeunitm,so4unit,so4name)=read_E3SM(filename_input,so4varname)
-        (timem,orgall,timeunitm,orgunit,orgname)=read_E3SM(filename_input,orgvarname)
-        (timem,bcall,timeunitm,bcunit,bcname)=read_E3SM(filename_input,bcvarname)
-        (timem,dstall,timeunitm,dstunit,dstname)=read_E3SM(filename_input,dstvarname)
-        (timem,nclall,timeunitm,nclunit,nclname)=read_E3SM(filename_input,nclvarname)
-        (timem,[psm,tsm],timeunitm,varunit,varlongname)=read_E3SM(filename_input,['PS','T'])
-        
-        tmp_so4 = np.hstack((tmp_so4,sum(so4all)))
-        tmp_org = np.hstack((tmp_org,sum(orgall)))
-        tmp_bc = np.hstack((tmp_bc,sum(bcall)))
-        tmp_dst = np.hstack((tmp_dst,sum(dstall)))
-        tmp_ncl = np.hstack((tmp_ncl,sum(nclall)))
-        ps = np.hstack((ps,psm))
-        ts = np.hstack((ts,tsm))
-        timem2 = np.hstack((timem2,timem))
-    
-    model_so4.append(tmp_so4)
-    model_org.append(tmp_org)
-    model_bc.append(tmp_bc)
-    model_dst.append(tmp_dst)
-    model_ncl.append(tmp_ncl)
-
-# change E3SM unit from kg/kg to ug/m3 
-rho = ps/287.06/ts
-
-for mm in range(nmodels):
-    model_so4[mm]=model_so4[mm]*1e9*rho
-    model_org[mm]=model_org[mm]*1e9*rho
-    model_bc[mm]=model_bc[mm]*1e9*rho
-    model_dst[mm]=model_dst[mm]*1e9*rho
-    model_ncl[mm]=model_ncl[mm]*1e9*rho
-    
-#%% Pie plot
-
-figname = figpath_sfc_statistics+'Pieplot_AerosolComposition_'+campaign+'_'+IOP+'.png'
-print('plotting figures to '+figname)
-
-fig,ax = plt.subplots(1,nmodels+1,figsize=((nmodels+1)*3.5,3.5))   # figsize in inches
-# colors = ['limegreen', 'red', 'b', 'y', 'orange' ]
-
-colors_o = ['limegreen', 'red', 'orange', 'lightblue', 'yellow']
-labels_o = ['ORG', 'SO4', 'NO3', 'NH4', 'CHL']
-sizeo = [np.nanmean(org_obs),np.nanmean(so4_obs),np.nanmean(no3_obs),np.nanmean(nh4_obs),np.nanmean(chl_obs)]
-
-colors_m = ['limegreen', 'red', 'k', 'silver','gray']
-labels_m = ['ORG', 'SO4', 'BC', 'DST', 'NCL']
-sizem = []
-for mm in range(nmodels):
-    sizem.append([np.mean(model_org[mm]),np.mean(model_so4[mm]),np.mean(model_bc[mm]),np.mean(model_dst[mm]),np.mean(model_ncl[mm])])
-
-def absolute_value(val):
-    a=np.round(val*sum(sizeo))/100
-    return a
-ax[0].pie(sizeo/sum(sizeo),labels=labels_o,colors=colors_o, autopct=absolute_value)  # autopct='%1.1f%%'
-for mm in range(nmodels):
-    def absolute_valuemm(val):
-        a=np.round(val*sum(sizem[mm]))/100
-        return a
-    ax[mm+1].pie(sizem[mm]/sum(sizem[mm]),labels=labels_m, colors=colors_m, autopct=absolute_valuemm)
-    
-ax[0].set_title('Obs',fontsize=14)
-for mm in range(nmodels):
-    ax[mm+1].set_title(Model_List[mm],fontsize=14)
-fig.text(.5,.15,'unit: $\mu$g/m$^3$')
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
\ No newline at end of file
diff --git a/python/plotting/plot_sfc_timeseries_AerosolComposition.py b/python/plotting/plot_sfc_timeseries_AerosolComposition.py
deleted file mode 100644
index 87fa173..0000000
--- a/python/plotting/plot_sfc_timeseries_AerosolComposition.py
+++ /dev/null
@@ -1,142 +0,0 @@
-"""
-# plot surface timeseries of aerosol composition
-# compare models and surface measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import yyyymmdd2cday,cday2mmdd
-from read_ARMdata import read_acsm
-from read_netcdf import read_E3SM
-from specific_data_treatment import  avg_time_1d
-from quality_control import qc_remove_neg,qc_acsm_org_max
-
-#%% settings
-
-from settings import campaign, acsmpath, Model_List, color_model, \
-    IOP, start_date, end_date, E3SM_sfc_path, figpath_sfc_timeseries
-
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-    
-if not os.path.exists(figpath_sfc_timeseries):
-    os.makedirs(figpath_sfc_timeseries)
-    
-#%% read in obs data
-if campaign=='ACEENA':
-    if IOP=='IOP1':
-        lst = glob.glob(acsmpath+'enaaosacsmC1.a1.201706*') + glob.glob(acsmpath+'enaaosacsmC1.a1.201707*')
-    elif IOP=='IOP2':
-        lst = glob.glob(acsmpath+'enaaosacsmC1.a1.201801*') + glob.glob(acsmpath+'enaaosacsmC1.a1.201802*')
-    lst.sort()
-elif campaign=='HISCALE':  
-    if IOP=='IOP1':
-        lst = glob.glob(acsmpath+'sgpaosacsmC1.b1.201604*') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201605*') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201606*')
-    elif IOP=='IOP2':
-        lst = glob.glob(acsmpath+'sgpaosacsmC1.b1.201608*.cdf') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201609*.cdf')
-    lst.sort()
-    
-t_obs=np.empty(0)
-so4_obs=np.empty(0)
-org_obs=np.empty(0)
-for filename in lst:
-    (times_obs,so4sfc,timeunit,so4sfcunit)=read_acsm(filename,'sulfate')
-    (times_obs,orgsfc,timeunit,orgsfcunit)=read_acsm(filename,'total_organics')
-    timestr=timeunit.split(' ')
-    date=timestr[2]
-    cday=yyyymmdd2cday(date,'noleap')
-    # average in time for quicker plot
-    time2=np.arange(1800,86400,3600)
-    so42 = avg_time_1d(np.array(times_obs),np.array(so4sfc),time2)
-    org2 = avg_time_1d(np.array(times_obs),np.array(orgsfc),time2)
-    t_obs=np.hstack((t_obs, cday+time2/86400))
-    so4_obs=np.hstack((so4_obs, so42))
-    org_obs=np.hstack((org_obs, org2))
-so4_obs=qc_remove_neg(so4_obs)
-org_obs=qc_remove_neg(org_obs)
-org_obs=qc_acsm_org_max(org_obs)
-    
-
-#%% read in models
-nmodels = len(Model_List)
-model_org = list()
-model_so4 = list()
-
-for mm in range(nmodels):
-    so4varname=['so4_a1','so4_a2','so4_a3']
-    orgvarname=['soa_a1','soa_a2','soa_a3','pom_a1','pom_a3','pom_a4',\
-                'mom_a1','mom_a2','mom_a3','mom_a4']
-    if Model_List[mm]=='NucSoaCond':
-        so4varname.append('so4_a5')
-        orgvarname.append('soa_a5')
-
-    timem2 = np.array([])
-    tmp_so4 = np.empty(0)
-    tmp_org = np.empty(0)
-    ps = np.empty(0)
-    ts = np.empty(0)
-    for cday in range(cday1,cday2+1):
-        mmdd=cday2mmdd(cday)
-        date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        filename_input = E3SM_sfc_path+'SFC_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        
-        (timem,so4all,timeunitm,so4unit,so4name)=read_E3SM(filename_input,so4varname)
-        (timem,orgall,timeunitm,orgunit,orgname)=read_E3SM(filename_input,orgvarname)
-        (timem,[psm,tsm],timeunitm,varunit,varlongname)=read_E3SM(filename_input,['PS','T'])
-        
-        tmp_so4 = np.hstack((tmp_so4,sum(so4all)))
-        tmp_org = np.hstack((tmp_org,sum(orgall)))
-        ps = np.hstack((ps,psm))
-        ts = np.hstack((ts,tsm))
-        timem2 = np.hstack((timem2,timem))
-    
-    model_org.append(tmp_org)
-    model_so4.append(tmp_so4)
-
-# change E3SM unit from kg/kg to ug/m3 
-rho = ps/287.06/ts
-
-for mm in range(nmodels):
-    model_so4[mm]=model_so4[mm]*1e9*rho
-    model_org[mm]=model_org[mm]*1e9*rho
-
-#%% make plot
-    
-figname = figpath_sfc_timeseries+'timeseries_AerosolComposition_'+campaign+'_'+IOP+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=0.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-
-ax1.plot(t_obs,so4_obs,color='k',linewidth=1,label='OBS (SO4)')
-for mm in range(nmodels):
-    ax1.plot(timem2, model_so4[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-# ax1.set_yscale('log')
-ax1.tick_params(color='k',labelsize=12)
-
-ax2.plot(t_obs,org_obs,color='k',linewidth=1,label='OBS (ORG)')
-for mm in range(nmodels):
-    ax2.plot(timem2, model_org[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-# ax2.set_yscale('log')
-ax2.tick_params(color='k',labelsize=12)
-
-ax1.set_xlim(cday1,cday2)
-ax2.set_xlim(cday1,cday2)
-
-ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
-ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
-
-ax2.set_xlabel('Calendar Day',fontsize=14)
-
-ax1.set_title('Aerosol Sulfate and Organic Concentration ($\mu$g/m$^3$) '+campaign+' '+IOP,fontsize=14)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
diff --git a/python/plotting/plot_sfc_timeseries_CCN.py b/python/plotting/plot_sfc_timeseries_CCN.py
deleted file mode 100644
index 7afbec6..0000000
--- a/python/plotting/plot_sfc_timeseries_CCN.py
+++ /dev/null
@@ -1,161 +0,0 @@
-"""
-# plot surface timeseries of CCN size distribution
-# compare models and surface measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import yyyymmdd2cday,cday2mmdd
-from read_ARMdata import read_ccn
-from read_surface import read_CCN_hiscale_IOP1, read_CCN_hiscale_IOP2
-from read_netcdf import read_E3SM
-from quality_control import qc_remove_neg,qc_mask_qcflag,qc_ccn_max
-
-#%% settings
-
-from settings import campaign, ccnsfcpath,  Model_List, color_model, \
-    IOP, start_date, end_date, E3SM_sfc_path, figpath_sfc_timeseries
-
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-    
-if not os.path.exists(figpath_sfc_timeseries):
-    os.makedirs(figpath_sfc_timeseries)
-    
-    
-#%% read in obs data
-if campaign=='ACEENA':
-    # cpc
-    if IOP=='IOP1':
-        lst = glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201706*')+glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201707*')
-    elif IOP=='IOP2':
-        lst = glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201801*')+glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201802*')
-    lst.sort()
-    t_ccn=np.empty(0)
-    ccn=np.empty(0)
-    SS=np.empty(0)
-    for filename in lst:
-        (time,timeunit,data,qc,dataunit,SS0)=read_ccn(filename)
-        data=qc_mask_qcflag(data,qc)
-        timestr=timeunit.split(' ')
-        date=timestr[2]
-        cday=yyyymmdd2cday(date,'noleap')
-        t_ccn=np.hstack((t_ccn, cday+time/86400))
-        ccn=np.hstack((ccn, data))
-        SS=np.hstack((SS, SS0))
-    ccn=qc_remove_neg(ccn)
-    ccn=qc_ccn_max(ccn,SS)
-    # SS=0.1%
-    idx = np.logical_and(SS>0.05, SS<0.15)
-    t_ccna = t_ccn[idx]
-    ccna = ccn[idx]
-    SSa = 0.1
-    # SS=0.5%
-    idx = np.logical_and(SS>0.4, SS<0.6)
-    t_ccnb = t_ccn[idx]
-    ccnb = ccn[idx]
-    SSb = 0.5
-
-elif campaign=='HISCALE':  
-    if IOP=='IOP1':
-        (times_ccn,ccnsfc,sssfc,timeunit)=read_CCN_hiscale_IOP1(ccnsfcpath)
-        sssfc=[int(x*10) for x in sssfc]
-        sssfc=np.array(sssfc)/10.
-        times_ccn=np.array(times_ccn)
-        ccnsfc=np.array(ccnsfc)
-    elif IOP=='IOP2':
-        (times_ccn,ccnsfc,sssfc,timeunit)=read_CCN_hiscale_IOP2(ccnsfcpath)
-        sssfc=[int(x*10) for x in sssfc]
-        sssfc=np.array(sssfc)/10.
-        times_ccn=np.array(times_ccn)
-        ccnsfc=np.array(ccnsfc)
-    # find the nearest Supersaturation in Obs comparing to model
-    # 0.1%
-    idx = sssfc==0.1
-    ccna = ccnsfc[idx]
-    t_ccna = times_ccn[idx]
-    SSa = 0.1
-    # 0.5%
-    idx = sssfc==0.5
-    ccnb = ccnsfc[idx]
-    t_ccnb = times_ccn[idx]
-    SSb = 0.5
-    
-#%% read in models
-ccna_m = []
-ccnb_m = []
-nmodels = len(Model_List)
-for mm in range(nmodels):
-    tmp_CCN3=np.empty(0)
-    tmp_CCN5=np.empty(0)
-    timem=np.empty(0)
-    for cday in range(cday1,cday2+1):
-        mmdd=cday2mmdd(cday)
-        date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        
-        filename_input = E3SM_sfc_path+'SFC_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        (time,ccn3,timemunit,dataunit,ccn3_longname)=read_E3SM(filename_input,'CCN3')
-        (time,ccn5,timemunit,dataunit,ccn5_longname)=read_E3SM(filename_input,'CCN5')
-        
-        timem = np.hstack((timem,time))
-        tmp_CCN3 = np.hstack((tmp_CCN3,ccn3))
-        tmp_CCN5 = np.hstack((tmp_CCN5,ccn5))
-    
-    ccna_m.append(tmp_CCN3)
-    ccnb_m.append(tmp_CCN5)
-    
-    # get supersaturation
-    SS3 = ccn3_longname.split('=')[-1]
-    SS5 = ccn5_longname.split('=')[-1]
-    
-#%% make plot
-    
-figname = figpath_sfc_timeseries+'timeseries_CCN_'+campaign+'_'+IOP+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-
-ax1.plot(t_ccna,ccna,color='k',linewidth=1,label='Obs')
-for mm in range(nmodels):
-    ax1.plot(timem, ccna_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax1.set_yscale('log')
-ax1.tick_params(color='k',labelsize=12)
-ylim1 = ax1.get_ylim()
-
-ax2.plot(t_ccnb,ccnb,color='k',linewidth=1,label='Obs')
-for mm in range(nmodels):
-    ax2.plot(timem, ccnb_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax2.set_yscale('log')
-ax2.tick_params(color='k',labelsize=12)
-ylim2 = ax2.get_ylim()
-
-# ax1.set_yticks([10,100,1000,10000,100000])
-# ax2.set_yticks([10,100,1000,10000,100000])
-ax1.set_xlim(cday1,cday2)
-ax2.set_xlim(cday1,cday2)
-
-# set ylimit consistent in subplots
-ax1.set_ylim([ylim1[0], ylim2[1]])
-ax2.set_ylim([ylim1[0], ylim2[1]])
-
-# supersaturation
-fig.text(0.67,0.9,'SS_obs='+format(np.nanmean(SSa),'.2f')+'%, SS_model='+SS3)
-fig.text(0.67,0.4,'SS_obs='+format(np.nanmean(SSb),'.2f')+'%, SS_model='+SS5)
-
-ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
-ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
-
-ax2.set_xlabel('Calendar Day',fontsize=14)
-ax1.set_title('CCN Number Concentration (cm$^{-3}$) '+campaign+' '+IOP,fontsize=15)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
diff --git a/python/plotting/plot_sfc_timeseries_CN.py b/python/plotting/plot_sfc_timeseries_CN.py
deleted file mode 100644
index 003df2e..0000000
--- a/python/plotting/plot_sfc_timeseries_CN.py
+++ /dev/null
@@ -1,167 +0,0 @@
-"""
-# plot surface timeseries of aerosol number concentration
-# compare models and surface measurements
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from time_format_change import yyyymmdd2cday,cday2mmdd
-from read_ARMdata import read_cpc
-from read_netcdf import read_E3SM
-from specific_data_treatment import  avg_time_1d
-from quality_control import qc_remove_neg,qc_mask_qcflag_cpc
-
-#%% settings
-
-from settings import campaign, cpcsfcpath, cpcusfcpath, Model_List, color_model, \
-    IOP, start_date, end_date, E3SM_sfc_path, figpath_sfc_timeseries
-
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-    
-if not os.path.exists(figpath_sfc_timeseries):
-    os.makedirs(figpath_sfc_timeseries)
-    
-#%% read in obs data
-if campaign=='ACEENA':
-    # cpc
-    if IOP=='IOP1':
-        lst = glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.2017062*')+glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201707*')
-    elif IOP=='IOP2':
-        lst = glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201801*')+glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201802*')
-    lst.sort()
-    t_cpc=np.empty(0)
-    cpc=np.empty(0)
-    for filename in lst:
-        (time,data,qc,timeunit,cpcunit)=read_cpc(filename)
-        data=qc_mask_qcflag_cpc(data,qc)
-        timestr=timeunit.split(' ')
-        date=timestr[2]
-        cday=yyyymmdd2cday(date,'noleap')
-        # average in time for quicker and clearer plot
-        time2=np.arange(1800,86400,3600)
-        data2 = avg_time_1d(np.array(time),np.array(data),time2)
-        t_cpc=np.hstack((t_cpc, cday+time2/86400))
-        cpc=np.hstack((cpc, data2))
-    # no cpcu
-    t_cpcu = np.array(np.nan)
-    cpcu = np.array(np.nan)
-    
-elif campaign=='HISCALE':  
-    # cpc
-    if IOP=='IOP1':
-        lst = glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201604*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201605*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201606*')
-    elif IOP=='IOP2':
-        lst = glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201608*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201609*')
-    lst.sort()
-    t_cpc=np.empty(0)
-    cpc=np.empty(0)
-    if len(lst)==0:
-        t_cpc = np.array(np.nan)
-        cpc = np.array(np.nan)
-    else:
-        for filename in lst:
-            (time,data,qc,timeunit,cpcunit)=read_cpc(filename)
-            data=qc_mask_qcflag_cpc(data,qc)
-            timestr=timeunit.split(' ')
-            date=timestr[2]
-            cday=yyyymmdd2cday(date,'noleap')
-            t_cpc=np.hstack((t_cpc, cday+time/86400))
-            cpc=np.hstack((cpc, data))
-        cpc=qc_remove_neg(cpc)
-  
-    # cpcu
-    if IOP=='IOP1':
-        lst = glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201604*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201605*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201606*')
-    elif IOP=='IOP2':
-        lst = glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201608*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201609*')
-    lst.sort()
-    t_cpcu=np.empty(0)
-    cpcu=np.empty(0)
-    if len(lst)==0:
-        t_cpcu = np.array(np.nan)
-        cpcu = np.array(np.nan)
-    else:
-        for filename in lst:
-            (time,data,qc,timeunit,cpcuunit)=read_cpc(filename)
-            data=qc_mask_qcflag_cpc(data,qc)
-            timestr=timeunit.split(' ')
-            date=timestr[2]
-            cday=yyyymmdd2cday(date,'noleap')
-            t_cpcu=np.hstack((t_cpcu, cday+time/86400))
-            cpcu=np.hstack((cpcu, data))
-        cpcu=qc_remove_neg(cpcu)
-    
-#%% read in models
-ncn_m = []
-nucn_m = []
-nmodels = len(Model_List)
-for mm in range(nmodels):
-    tmp_ncn=np.empty(0)
-    tmp_nucn=np.empty(0)
-    timem=np.empty(0)
-    for cday in range(cday1,cday2+1):
-        mmdd=cday2mmdd(cday)
-        date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        
-        filename_input = E3SM_sfc_path+'SFC_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
-        (time,ncn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCN')
-        (time,nucn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NUCN')
-        
-        timem = np.hstack((timem,time))
-        tmp_ncn = np.hstack((tmp_ncn,ncn*1e-6))
-        tmp_nucn = np.hstack((tmp_nucn,nucn*1e-6))
-    
-    ncn_m.append(tmp_ncn)
-    nucn_m.append(tmp_nucn)
-    
-
-#%% make plot
-    
-figname = figpath_sfc_timeseries+'timeseries_CN_'+campaign+'_'+IOP+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-
-ax1.plot(t_cpc,cpc,color='k',linewidth=1,label='CPC(>10nm)')
-for mm in range(nmodels):
-    ax1.plot(timem, ncn_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-# ax1.set_yscale('log')
-ax1.tick_params(color='k',labelsize=12)
-ylim1 = ax1.get_ylim()
-
-ax2.plot(t_cpcu,cpcu,color='k',linewidth=1,label='CPC(>3nm)')
-for mm in range(nmodels):
-    ax2.plot(timem, nucn_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-# ax2.set_yscale('log')
-ax2.tick_params(color='k',labelsize=12)
-ylim2 = ax2.get_ylim()
-
-# ax1.set_yticks([10,100,1000,10000,100000])
-# ax2.set_yticks([10,100,1000,10000,100000])
-ax1.set_xlim(cday1,cday2)
-ax2.set_xlim(cday1,cday2)
-
-# # set ylimit consistent in subplots
-# ax1.set_ylim([ylim1[0], ylim2[1]])
-# ax2.set_ylim([ylim1[0], ylim2[1]])
-
-
-ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
-ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
-
-ax2.set_xlabel('Calendar Day',fontsize=14)
-ax1.set_title('Aerosol Number Concentration (cm$^{-3}$) '+campaign+' '+IOP,fontsize=15)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-
diff --git a/python/plotting/plot_ship_latitude_rainLWP.py b/python/plotting/plot_ship_latitude_rainLWP.py
deleted file mode 100644
index 46f78eb..0000000
--- a/python/plotting/plot_ship_latitude_rainLWP.py
+++ /dev/null
@@ -1,237 +0,0 @@
-"""
-# plot ship-track meteorological variables binned by different latitudes
-"""
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-import scipy.stats
-from read_ship import read_marmet
-from read_ARMdata import read_mwr, read_met
-from read_netcdf import read_E3SM
-from time_format_change import yyyymmdd2cday, cday2mmdd
-
-#%% settings
-
-from settings import campaign, latbin, Model_List, color_model, \
-            shipmetpath, shipmwrpath, E3SM_ship_path, figpath_ship_statistics
-
-dlat = latbin[1]-latbin[0]
-latmin = latbin-dlat/2
-latmax = latbin+dlat/2
-latlen = len(latbin)
-
-if not os.path.exists(figpath_ship_statistics):
-    os.makedirs(figpath_ship_statistics)
-
-
-#%% read in observation
-
-# initialize variables by latitude bins
-lwp_o = list()
-rain_o = list()
-for bb in range(latlen):
-    lwp_o.append(np.empty(0))
-    rain_o.append(np.empty(0))
-
-if campaign=='MAGIC':
-    lst = glob.glob(shipmetpath+'marmet*.txt')
-    lst.sort()
-
-    for ll in range(len(lst)):
-        legnum=lst[ll][-6:-4]
-        
-        # read in MET
-        filenameo = shipmetpath+'marmet'+legnum+'.txt'
-        (shipdata,shipvarlist) = read_marmet(filenameo)        
-        # get variables
-        lat=np.array([float(a[shipvarlist.index('lat')]) for a in shipdata]) 
-        lon=np.array([float(a[shipvarlist.index('lon')]) for a in shipdata]) 
-        rain=np.array([float(a[shipvarlist.index('org')]) for a in shipdata]) 
-        lat[lat==-999]=np.nan
-        lon[lon==-999]=np.nan
-        rain[rain==-999]=np.nan
-        # rain rate in leg 19 are unrealistic. mask all data
-        if legnum=='19':
-            rain=rain*np.nan
-        # separate into latitude bins
-        for bb in range(latlen):
-            idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
-            rain_o[bb]=np.hstack((rain_o[bb],rain[idx]))
-            
-        # read in MWR
-        t_lwp=np.empty(0)
-        lwp=np.empty(0)
-        # find the days related to the ship leg
-        year=[a[1] for a in shipdata]
-        month=[a[2] for a in shipdata]
-        day=[a[3] for a in shipdata]
-        hh=[int(a[4]) for a in shipdata]
-        mm=[int(a[5]) for a in shipdata]
-        ss=[int(a[6]) for a in shipdata]
-        
-        yyyymmdd = [year[i]+month[i]+day[i] for i in range(len(year))]   # yyyymmdd
-        time0 = np.array(hh)/24. + np.array(mm)/1440. + np.array(ss)/86400.
-        time0 = np.array([time0[i] + yyyymmdd2cday(yyyymmdd[i],'noleap') for i in range(len(time0))])
-    
-        sday = [year[a]+month[a]+day[a] for a in range(len(mm))]
-        sday = list(set(sday))
-        sday.sort()
-        for dd in sday:
-            filenameo = glob.glob(shipmwrpath+'magmwrret1liljclouM1.s2.'+dd+'.*')
-            if len(filenameo)==0:
-                continue  # some days may be missing
-            (time,obs,timeunit,lwpunit,lwpflag)=read_mwr(filenameo[0],'be_lwp')
-            t_lwp=np.hstack((t_lwp, yyyymmdd2cday(dd,'noleap')+time/86400))
-            obs[obs<-9000]=np.nan
-            lwp=np.hstack((lwp, obs))
-        # if no obs available, fill one data with NaN
-        if len(t_lwp)==0:
-            t_lwp=[time0[0],time0[1]]
-            lwp=np.full((2),np.nan)
-        # if time expands two years, add 365 days to the second year
-        if t_lwp[0]>t_lwp[-1]:
-            t_lwp[t_lwp<=t_lwp[-1]]=t_lwp[t_lwp<=t_lwp[-1]]+365
-        lat1=np.interp(t_lwp,time0,lat)
-        lon1=np.interp(t_lwp,time0,lon)
-        # separate into latitude bins
-        for bb in range(latlen):
-            idx = np.logical_and(lat1>=latmin[bb], lat1<latmax[bb])
-            lwp_o[bb]=np.hstack((lwp_o[bb],lwp[idx]))  
-        
-elif campaign=='MARCUS':
-    
-    startdate='2017-10-30'
-    enddate='2018-03-22'
-    cday1=yyyymmdd2cday(startdate,'noleap')
-    cday2=yyyymmdd2cday(enddate,'noleap')
-    if startdate[0:4]!=enddate[0:4]:
-        cday2=cday2+365  # cover two years
-            
-    for cc in range(cday1,cday2+1):
-        if cc<=365:
-            yyyymmdd=startdate[0:4]+cday2mmdd(cc)
-        else:
-            yyyymmdd=enddate[0:4]+cday2mmdd(cc-365)
-            
-        lst0 = glob.glob(shipmetpath+'maraadmetX1.b1.'+yyyymmdd+'*')
-        if len(lst0)==0:
-            continue
-        (time0,lon,timeunit,lonunit,lon_long_name)=read_met(lst0[0],'lon')
-        (time0,lat,timeunit,lonunit,lon_long_name)=read_met(lst0[0],'lat')
-        
-        lat[lat==-999]=np.nan
-        lon[lon==-999]=np.nan
-        rain=np.array(lon)*np.nan
-        # separate into latitude bins
-        for bb in range(latlen):
-            idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
-            rain_o[bb]=np.hstack((rain_o[bb],rain[idx]))
-        
-        # read in MWR
-        filenameo = glob.glob(shipmwrpath+'marmwrret1liljclouM1.s2.'+yyyymmdd+'.*')
-        if len(filenameo)==0:
-            continue  # some days may be missing
-        (time,lwp,timeunit,lwpunit,lwpflag)=read_mwr(filenameo[0],'be_lwp')
-        lwp[lwp<-9000]=np.nan
-        # if no obs available, fill one data with NaN
-        if len(time)==0:
-            continue
-        
-        lat1=np.interp(time,time0,lat)
-        lon1=np.interp(time,time0,lon)
-        # separate into latitude bins
-        for bb in range(latlen):
-            idx = np.logical_and(lat1>=latmin[bb], lat1<latmax[bb])
-            lwp_o[bb]=np.hstack((lwp_o[bb],lwp[idx]))  
-        
-            
-#%% read in model
-nmodels=len(Model_List)
-lwp_m = list()
-rain_m = list()
-for mm in range(nmodels):
-    
-    # initialize variables by latitude bins
-    lwp_tmp = list()
-    rain_tmp = list()
-    for bb in range(latlen):
-        lwp_tmp.append(np.empty(0))
-        rain_tmp.append(np.empty(0))
-        
-    lst = glob.glob(E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg*.nc')
-    lst.sort()   
-    for ll in range(len(lst)):
-        filenamem = lst[ll]
-        (timem,varm,timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['TGCLDLWP','PRECT','lat','lon'])
-        for ii in range(len(varm)):
-            varm[ii][varm[ii]<-9000] = np.nan
-            
-        lat0=varm[2]
-        lon0=varm[3]
-        
-        # separate into latitude bins
-        for bb in range(latlen):
-            idx = np.logical_and(lat0>=latmin[bb], lat0<latmax[bb])
-            lwp_tmp[bb]=np.hstack((lwp_tmp[bb],varm[0][idx]*1000))
-            rain_tmp[bb]=np.hstack((rain_tmp[bb],varm[1][idx]*3600*1000))
-        
-    lwp_m.append(lwp_tmp)
-    rain_m.append(rain_tmp)
-    
-# change the unit
-varmunit[0]='g/m2'
-varmunit[1]='mm/hr'
-varmlongname[0]='LWP'
-varmlongname[1]='Rainrate'
-
-#%% calculate the mean and standard error for each bin
-mean_lwp_o = np.array([np.nanmean(a) for a in lwp_o])
-mean_rain_o = np.array([np.nanmean(a) for a in rain_o])
-sem_lwp_o = np.array([scipy.stats.sem(a,nan_policy='omit') for a in lwp_o])
-sem_rain_o = np.array([scipy.stats.sem(a,nan_policy='omit') for a in rain_o])
-
-mean_lwp_m = list()
-mean_rain_m = list()
-sem_lwp_m = list()
-sem_rain_m = list()
-for mm in range(nmodels):
-    mean_lwp_m.append(np.array([np.nanmean(a) for a in lwp_m[mm]]))
-    mean_rain_m.append(np.array([np.nanmean(a) for a in rain_m[mm]]))
-    sem_lwp_m.append(np.array([scipy.stats.sem(a,nan_policy='omit') for a in lwp_m[mm]]))
-    sem_rain_m.append(np.array([scipy.stats.sem(a,nan_policy='omit') for a in rain_m[mm]]))
-
-#%% make plot
-    
-figname = figpath_ship_statistics+'composite_LWPrain_bylat_'+campaign+'_all.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-
-ax1.plot(latbin,mean_lwp_o,color='k',linewidth=1,label='OBS')
-ax1.fill_between(latbin, mean_lwp_o-sem_lwp_o, mean_lwp_o+sem_lwp_o, alpha=0.5, facecolor='gray')
-for mm in range(nmodels):
-    ax1.plot(latbin, mean_lwp_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax1.tick_params(color='k',labelsize=15)
-
-ax2.plot(latbin,mean_rain_o,color='k',linewidth=1,label='OBS') 
-ax2.fill_between(latbin,mean_rain_o-sem_rain_o, mean_rain_o+sem_rain_o, alpha=0.5, facecolor='gray')
-for mm in range(nmodels):
-    ax2.plot(latbin, mean_rain_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-ax2.tick_params(color='k',labelsize=15)
-
-ax1.set_xticks(np.arange(np.floor(latbin[0]),int(latbin[-1])+1, dlat*2))
-ax2.set_xticks(np.arange(np.floor(latbin[0]),int(latbin[-1])+1, dlat*2))
-ax1.set_xticklabels([])
-ax2.set_xlabel('Latitude',fontsize=16)
-ax1.set_title(varmlongname[0]+' ('+varmunit[0]+')',fontsize=17)
-ax2.set_title(varmlongname[1]+' ('+varmunit[1]+')',fontsize=17)
-
-ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
diff --git a/python/plotting/plot_ship_pdf_AerosolSize.py b/python/plotting/plot_ship_pdf_AerosolSize.py
deleted file mode 100644
index 0c6a0d3..0000000
--- a/python/plotting/plot_ship_pdf_AerosolSize.py
+++ /dev/null
@@ -1,165 +0,0 @@
-"""
-# plot mean aerosol size ditribution for ship track data
-# average for all data
-# compare models and aircraft measurements
-"""
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_ARMdata import read_uhsas
-from read_netcdf import read_E3SM
-from time_format_change import cday2mmdd,yyyymmdd2cday
-from specific_data_treatment import  avg_time_2d
-
-#%% settings
-
-from settings import campaign, Model_List, color_model, shipuhsaspath, E3SM_ship_path, figpath_ship_statistics
-
-if not os.path.exists(figpath_ship_statistics):
-    os.makedirs(figpath_ship_statistics)
-
-lst = glob.glob(E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[0]+'_shipleg*.nc')
-
-
-#%% read in model
-nmodels=len(Model_List)
-pdf_model = list()
-pdfall_m = [np.empty((3000,0)) for mm in range(nmodels)]
-
-for mm in range(nmodels):
-    data2=list()
-    ntimes = list()
-    for ll in range(len(lst)):
-        if campaign=='MAGIC':
-            legnum=lst[ll][-5:-3]
-        elif campaign=='MARCUS':
-            legnum=lst[ll][-4]
-        print(legnum)
-        
-        filenamem = E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
-        (timem,data,timeunitm,datamunit,datamlongname)=read_E3SM(filenamem,'NCNall')
-        
-        # average for each file to reduce computational time
-        ntimes.append(sum(data[0,:]>0))  # number of valid values
-        data=data*1e-6   # change unit from 1/m3 to 1/cm3
-        
-        # average in time for quicker plot
-        time0=np.arange(timem[0],timem[-1]+0.04,1./24)
-        data0 = avg_time_2d(timem,data.T,time0)
-        pdfall_m[mm] = np.column_stack((pdfall_m[mm],data0.T))
-        
-        meandata=np.nanmean(data,1)
-        data2.append(meandata)
-        
-    # mean pdf
-    ntotal=sum(ntimes)
-    data3=[data2[ii]*ntimes[ii]/ntotal for ii in range(len(ntimes))]
-    pdf_model.append(sum(data3))
-    
-#%% read in observations
-
-nbins = 99 # for UHSAS at MAGIC
-pdfall_o = np.empty((nbins,0))
-
-if campaign=='MAGIC':
-    startdate='2012-09-22'
-    enddate='2013-09-26'
-elif campaign=='MARCUS':
-    startdate='2017-10-30'
-    enddate='2018-03-22'
-cday1=yyyymmdd2cday(startdate,'noleap')
-cday2=yyyymmdd2cday(enddate,'noleap')
-if startdate[0:4]!=enddate[0:4]:
-    cday2=cday2+365  # cover two years
-
-uhsasall=list()
-ntimes = list()
-for cc in range(cday1,cday2+1):
-    if cc<=365:
-        yyyymmdd=startdate[0:4]+cday2mmdd(cc)
-    else:
-        yyyymmdd=enddate[0:4]+cday2mmdd(cc-365)
-        
-    if campaign=='MAGIC':
-        filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.'+yyyymmdd+'*')
-    elif campaign=='MARCUS':
-        filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.'+yyyymmdd+'*')
-    if len(filenameo)==0:
-        continue  
-    elif len(filenameo)>1:
-        raise ValueError('find too many files')
-    
-    print(yyyymmdd)
-    
-    (time,dmin,dmax,uhsas,timeunit,uhunit,uhlongname)=read_uhsas(filenameo[0])
-    
-    uhsas=np.ma.filled(uhsas)
-    # average in time for quicker plot
-    time0=np.arange(1800,86400,3600)
-    data0 = avg_time_2d(time,uhsas,time0)
-    pdfall_o = np.column_stack((pdfall_o,data0.T))
-    
-    # average for each file to reduce computational time
-    ntimes.append(sum(uhsas[:,0]>=0))  # number of valid values
-    meandata=np.nanmean(uhsas,0)
-    meandata[np.isnan(meandata)]=0
-    uhsasall.append(meandata) 
-    
-size_u = (dmin+dmax)/2
-dsize_u = dmax-dmin
-
-# mean pdf
-ntotal=sum(ntimes)
-pdf_obs=sum([uhsasall[ii]*ntimes[ii]/ntotal for ii in range(len(ntimes))])
-
-#%% change to dN/dlnDp
-dlnDp_u=np.empty(nbins)
-for bb in range(len(size_u)):
-    dlnDp_u[bb]=np.log(dmax[bb]/dmin[bb])
-dlnDp=np.empty(3000)
-for bb in range(3000):
-    dlnDp[bb]=np.log((bb+2)/(bb+1))
-pdf_obs=pdf_obs/dlnDp_u
-for mm in range(nmodels):
-    pdf_model[mm]=pdf_model[mm]/dlnDp
-
-#%%
-pct1_o = [np.nanpercentile(pdfall_o[i,:]/dlnDp_u[i],10) for i in range(nbins)]
-pct2_o = [np.nanpercentile(pdfall_o[i,:]/dlnDp_u[i],90) for i in range(nbins)]
-pct1_m = [[] for mm in range(nmodels)]
-pct2_m = [[] for mm in range(nmodels)]
-for mm in range(nmodels):
-    pct1_m[mm] = [np.nanpercentile(pdfall_m[mm][i,:]/dlnDp[i],10) for i in range(3000)]
-    pct2_m[mm] = [np.nanpercentile(pdfall_m[mm][i,:]/dlnDp[i],90) for i in range(3000)]
-
-#%% plot
-figname = figpath_ship_statistics+'pdf_AerosolSize_'+campaign+'.png'
-
-print('plotting figures to '+figname)
-
-#fig = plt.figure()
-fig,ax = plt.subplots(figsize=(4,2.5))   # figsize in inches
-
-ax.plot(size_u,pdf_obs,color='k',label='Obs')
-for mm in range(nmodels):
-    ax.plot(np.arange(1,3001),pdf_model[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-
-ax.fill_between(size_u,pct1_o,pct2_o, alpha=0.5, facecolor='gray')
-for mm in range(nmodels):
-    ax.fill_between(np.arange(1,3001),pct1_m[mm],pct2_m[mm], alpha=0.2, facecolor=color_model[mm])
-    
-ax.legend(loc='upper right', shadow=False, fontsize='medium')
-ax.tick_params(color='k',labelsize=12)
-ax.set_xscale('log')
-ax.set_yscale('log')
-ax.set_ylim(0.01,1e4)
-ax.set_xlabel('Diameter (nm)',fontsize=13)
-ax.set_ylabel('#/dlnDp (cm$^{-3}$)',fontsize=13)
-ax.set_title(campaign,fontsize=14)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-
diff --git a/python/plotting/plot_ship_percentile_lat_CCN.py b/python/plotting/plot_ship_percentile_lat_CCN.py
deleted file mode 100644
index 578f7f8..0000000
--- a/python/plotting/plot_ship_percentile_lat_CCN.py
+++ /dev/null
@@ -1,223 +0,0 @@
-"""
-# plot ship-track CCN number concentration binned by different latitudes
-"""
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_ARMdata import read_ccn_magic, read_ccn
-from read_netcdf import read_E3SM
-from time_format_change import  cday2mmdd
-from quality_control import qc_mask_qcflag,qc_ccn_max
-
-#%% settings
-
-from settings import campaign, latbin, Model_List, color_model, \
-            shipccnpath, E3SM_ship_path, figpath_ship_statistics
-
-dlat = latbin[1]-latbin[0]
-latmin = latbin-dlat/2
-latmax = latbin+dlat/2
-latlen = len(latbin)
-
-if not os.path.exists(figpath_ship_statistics):
-    os.makedirs(figpath_ship_statistics)
-
-#%% read in model
-nmodels=len(Model_List)
-ccn3_m = list()
-ccn5_m = list()
-for mm in range(nmodels):
-    # initialize variables by latitude bins
-    ccn3_tmp = list()
-    ccn5_tmp = list()
-    for bb in range(latlen):
-        ccn3_tmp.append(np.empty(0))
-        ccn5_tmp.append(np.empty(0))
-    
-    lst = glob.glob(E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg*.nc')
-        
-    for ll in range(len(lst)):        
-        filenamem = lst[ll]
-        (timem,varm,timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['CCN3','CCN5','lat','lon'])
-        for ii in range(len(varm)):
-            varm[ii][varm[ii]<-9000] = np.nan
-        
-        lat0=varm[2]
-        lon0=varm[3]
-        ccn3=varm[0]        
-        ccn5=varm[1]
-        
-        # separate into latitude bins
-        for bb in range(latlen):
-            idx = np.logical_and(lat0>=latmin[bb], lat0<latmax[bb])
-            ccn3_tmp[bb]=np.hstack((ccn3_tmp[bb],ccn3[idx]))  
-            ccn5_tmp[bb]=np.hstack((ccn5_tmp[bb],ccn5[idx]))
-        
-    ccn3_m.append(ccn3_tmp)
-    ccn5_m.append(ccn5_tmp)
-        
-#%% read in observation
-ccn3_o = list()
-ccn5_o = list()
-for bb in range(latlen):
-    ccn3_o.append(np.empty(0))
-    ccn5_o.append(np.empty(0))
-    
-for ll in range(len(lst)):       
-    # use lat/lon from extracted model data
-    filenamem = lst[ll]
-    if campaign=='MAGIC':
-        legnum=lst[ll][-5:-3]
-    elif campaign=='MARCUS':
-        legnum=lst[ll][-4]
-        
-    (timem,[lat0,lon0],timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['lat','lon'])
-    lat0[lat0<-9000]=np.nan
-    lon0[lon0<-9000]=np.nan
-    # if time expands two years, add 365 days to the second year
-    if timem[0]>timem[-1]:
-        timem[timem<=timem[-1]]=timem[timem<=timem[-1]]+365
-        
-    # find the days related to the ship leg
-    day = [int(a) for a in timem]
-    day = list(set(day))
-    day.sort()
-    
-    # read in CCN
-    t_ccn=np.empty(0)
-    ccn=np.empty(0)
-    SS=np.empty(0)
-    for dd in day:
-        if campaign=='MAGIC':
-            if int(legnum)<=9:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipccnpath+'magaosccn100M1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
-                else:
-                    filenameo = glob.glob(shipccnpath+'magaosccn100M1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*.cdf')
-            else:
-                filenameo = glob.glob(shipccnpath+'magaosccn100M1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
-            if len(filenameo)==0:
-                continue  # some days may be missing
-            (time,timeunit,obs,dataunit,SS0)=read_ccn_magic(filenameo[0])
-        elif campaign=='MARCUS':
-            if int(legnum)<=2:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipccnpath+'maraosccn1colavgM1.b1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
-                else:
-                    filenameo = glob.glob(shipccnpath+'maraosccn1colavgM1.b1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
-            else:
-                filenameo = glob.glob(shipccnpath+'maraosccn1colavgM1.b1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
-            if len(filenameo)==0:
-                continue  # some days may be missing
-            (time,timeunit,obs,qc,dataunit,SS0)=read_ccn(filenameo[0])     
-            obs=qc_mask_qcflag(obs,qc)
-        t_ccn=np.hstack((t_ccn, dd+time/86400))
-        ccn=np.hstack((ccn, obs))
-        SS=np.hstack((SS, SS0))
-        
-    ccn=qc_ccn_max(ccn,SS)
-    
-    # if time expands two years, add 365 days to the second year
-    if t_ccn[0]>t_ccn[-1]:
-        t_ccn[t_ccn<=t_ccn[-1]]=t_ccn[t_ccn<=t_ccn[-1]]+365
-    # SS=0.1%
-    idx = np.logical_and(SS>0.05, SS<0.15)
-    t_ccn1 = t_ccn[idx]
-    ccn1o = ccn[idx]
-    SS1 = 0.1
-    # SS=0.5%
-    idx = np.logical_and(SS>0.4, SS<0.6)
-    t_ccn5 = t_ccn[idx]
-    ccn5o = ccn[idx]
-    SS5 = 0.5
-    
-    lat1=np.interp(t_ccn1,timem,lat0)
-    lon1=np.interp(t_ccn1,timem,lon0)
-    lat5=np.interp(t_ccn5,timem,lat0)
-    lon5=np.interp(t_ccn5,timem,lon0)
-    # separate into latitude bins
-    for bb in range(latlen):
-        idx = np.logical_and(lat1>=latmin[bb], lat1<latmax[bb])
-        ccn3_o[bb]=np.hstack((ccn3_o[bb],ccn1o[idx]))  
-        idx = np.logical_and(lat5>=latmin[bb], lat5<latmax[bb])
-        ccn5_o[bb]=np.hstack((ccn5_o[bb],ccn5o[idx]))  
-
-#%%
-for bb in range(latlen):
-    ccn3_o[bb] = ccn3_o[bb][~np.isnan(ccn3_o[bb])]
-    ccn5_o[bb] = ccn5_o[bb][~np.isnan(ccn5_o[bb])]
-
-
-#%% make plot
-# set position shift so that models and obs are not overlapped
-p_shift = np.arange(nmodels+1)
-p_shift = (p_shift - p_shift.mean())*0.2
-
-    
-figname = figpath_ship_statistics+'percentile_lat_CCN_'+campaign+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-ax1.boxplot(ccn3_o,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax1.boxplot(ccn3_m[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-ax1.tick_params(color='k',labelsize=15)
-#ax1.set_yscale('log')
-ax1.set_xlim(-1,latlen)
-ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-ax1.set_xticklabels([])
-# plot temporal lines for label
-ax1.plot([],c='k',label='OBS')
-for mm in range(nmodels):
-    ax1.plot([],c=color_model[mm],label=Model_List[mm])
-    
-ax2.boxplot(ccn5_o,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax2.boxplot(ccn5_m[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-ax2.tick_params(color='k',labelsize=15)
-#ax2.set_yscale('log')
-ax2.set_xlim(-1,latlen)
-ax2.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-ax2.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-# plot temporal lines for label
-ax2.plot([],c='k',label='OBS')
-for mm in range(nmodels):
-    ax2.plot([],c=color_model[mm],label=Model_List[mm])
-    
-# ax1.legend(loc='upper right', fontsize='large')
-ax2.legend(loc='upper right', fontsize='x-large')
-
-# supersaturation
-fig.text(0.08,0.98,'SS='+str(SS1)+'%')
-fig.text(0.08,0.47,'SS='+str(SS5)+'%')
-    
-ax2.set_xlabel('Latitude',fontsize=16)
-ax1.set_title('CCN Number Concentration (cm$^{-3}$)',fontsize=17)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-
-
diff --git a/python/plotting/plot_ship_percentile_lat_CN.py b/python/plotting/plot_ship_percentile_lat_CN.py
deleted file mode 100644
index c60f56b..0000000
--- a/python/plotting/plot_ship_percentile_lat_CN.py
+++ /dev/null
@@ -1,250 +0,0 @@
-"""
-# plot ship-track aerosol number concentration binned by different latitudes
-"""
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_ARMdata import read_cpc, read_uhsas
-from read_netcdf import read_E3SM
-from time_format_change import  cday2mmdd
-from quality_control import qc_mask_qcflag,qc_remove_neg,qc_cn_max
-
-#%% settings
-
-from settings import campaign, latbin, Model_List, color_model, \
-            shipcpcpath, shipuhsaspath, E3SM_ship_path, figpath_ship_statistics
-
-dlat = latbin[1]-latbin[0]
-latmin = latbin-dlat/2
-latmax = latbin+dlat/2
-latlen = len(latbin)
-
-if not os.path.exists(figpath_ship_statistics):
-    os.makedirs(figpath_ship_statistics)
-
-
-#%% read in model
-nmodels=len(Model_List)
-cpc_m = list()
-uhsas_m = list()
-for mm in range(nmodels):
-    # initialize variables by latitude bins
-    cpc_tmp = list()
-    uh_tmp = list()
-    for bb in range(latlen):
-        cpc_tmp.append(np.empty(0))
-        uh_tmp.append(np.empty(0))
-    
-    lst = glob.glob(E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[mm]+'_shipleg*.nc')
-        
-    for ll in range(len(lst)):        
-        filenamem = lst[ll]
-        (timem,varm,timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['NCN','NCNall','lat','lon'])
-        for ii in range(len(varm)):
-            varm[ii][varm[ii]<-9000] = np.nan
-        
-        lat0=varm[2]
-        lon0=varm[3]
-        NCN=varm[0]        
-        NCN_uh = np.nansum(varm[1][54:1000,:],0)   # UHSAS CN size range: 55-1000nm
-        
-        # separate into latitude bins
-        for bb in range(latlen):
-            idx = np.logical_and(lat0>=latmin[bb], lat0<latmax[bb])
-            cpc_tmp[bb]=np.hstack((cpc_tmp[bb],NCN[idx]*1e-6))   # change unit from 1/m3 to 1/cm3
-            uh_tmp[bb]=np.hstack((uh_tmp[bb],NCN_uh[idx]*1e-6))
-        
-    cpc_m.append(cpc_tmp)
-    uhsas_m.append(uh_tmp)
-        
-#%% read in observation
-cpc_o = list()
-uhsas_o = list()
-for bb in range(latlen):
-    cpc_o.append(np.empty(0))
-    uhsas_o.append(np.empty(0))
-    
-for ll in range(len(lst)):       
-    # use lat/lon from extracted model data
-    filenamem = lst[ll]
-    if campaign=='MAGIC':
-        legnum=lst[ll][-5:-3]
-    elif campaign=='MARCUS':
-        legnum=lst[ll][-4]
-    (timem,[lat1,lon1],timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['lat','lon'])
-    lat1[lat1<-9000]=np.nan
-    lon1[lon1<-9000]=np.nan
-        
-    # find the days related to the ship leg
-    day = [int(a) for a in timem]
-    day = list(set(day))
-    day.sort()
-    
-    # read in CPC    
-    t_cpc=np.empty(0)
-    cpc=np.empty(0)
-    for dd in day:
-        if campaign=='MAGIC':
-            if int(legnum)<=9:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*')
-                else:
-                    filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*')
-            else:
-                filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*')
-            if len(filenameo)==0:
-                continue  # some days may be missing
-        elif campaign=='MARCUS':
-            if int(legnum)<=2:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
-                else:
-                    filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
-            else:
-                filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
-            if len(filenameo)==0:
-                continue  # some days may be missing
-             
-        (time,obs,qc,timeunit,dataunit)=read_cpc(filenameo[0])
-        obs=qc_mask_qcflag(obs,qc)
-        t_cpc=np.hstack((t_cpc, dd+time/86400))
-        cpc=np.hstack((cpc, obs))
-    cpc=qc_remove_neg(cpc)
-    cpc=qc_cn_max(cpc,10)
-    # if time expands two years, add 365 days to the second year
-    if t_cpc[0]>t_cpc[-1]:
-        t_cpc[t_cpc<=t_cpc[-1]]=t_cpc[t_cpc<=t_cpc[-1]]+365
-    lat2=np.interp(t_cpc,timem,lat1)
-    lon2=np.interp(t_cpc,timem,lon1)
-    # separate into latitude bins
-    for bb in range(latlen):
-        idx = np.logical_and(lat2>=latmin[bb], lat2<latmax[bb])
-        cpc_o[bb]=np.hstack((cpc_o[bb],cpc[idx]))  
-    
-    # read in UHSAS
-    t_uh=np.empty(0)
-    uhsas=np.empty(0)
-    for dd in day:
-        if campaign=='MAGIC':
-            if int(legnum)<=9:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
-                else:
-                    filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*.cdf')
-            else:
-                filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
-        elif campaign=='MARCUS':
-            if int(legnum)<=2:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
-                else:
-                    filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
-            else:
-                filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
-        
-        if len(filenameo)==0:
-            continue  # some days may be missing
-        if len(filenameo)>1:
-            raise ValueError('find too many files')
-            
-        (time,dmin,dmax,obs,timeunit,uhunit,uhlongname)=read_uhsas(filenameo[0])
-        obs=np.ma.filled(obs)
-        obs=qc_remove_neg(obs)
-        uhsas=np.hstack((uhsas, np.nansum(obs,1)))
-        t_uh = np.hstack((t_uh,time/86400+dd))
-        
-    uhsas=qc_cn_max(uhsas,100)
-    # if no obs available, fill one data with NaN
-    if len(t_uh)==0:
-        t_uh=[timem[0],timem[1]]
-        uhsas=np.full((2),np.nan)
-    # if time expands two years, add 365 days to the second year
-    if t_uh[0]>t_uh[-1]:
-        t_uh[t_uh<=t_uh[-1]]=t_uh[t_uh<=t_uh[-1]]+365
-    lat3=np.interp(t_uh,timem,lat1)
-    lon3=np.interp(t_uh,timem,lon1)
-    # separate into latitude bins
-    for bb in range(latlen):
-        idx = np.logical_and(lat3>=latmin[bb], lat3<latmax[bb])
-        uhsas_o[bb]=np.hstack((uhsas_o[bb],uhsas[idx]))  
-        
-#%% 
-for bb in range(latlen):
-    cpc_o[bb] = cpc_o[bb][~np.isnan(cpc_o[bb])]
-    uhsas_o[bb] = uhsas_o[bb][~np.isnan(uhsas_o[bb])]
-
-
-#%% make plot
-# set position shift so that models and obs are not overlapped
-p_shift = np.arange(nmodels+1)
-p_shift = (p_shift - p_shift.mean())*0.2
-
-    
-figname = figpath_ship_statistics+'percentile_lat_CN_'+campaign+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-ax1.boxplot(cpc_o,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax1.boxplot(cpc_m[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-ax1.tick_params(color='k',labelsize=15)
-ax1.set_yscale('log')
-ax1.set_xlim(-1,latlen)
-ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-ax1.set_xticklabels([])
-# plot temporal lines for label
-ax1.plot([],c='k',label='CPC')
-for mm in range(nmodels):
-    ax1.plot([],c=color_model[mm],label=Model_List[mm])
-ax1.legend(loc='upper left', fontsize='x-large')
-    
-ax2.boxplot(uhsas_o,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax2.boxplot(uhsas_m[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-ax2.tick_params(color='k',labelsize=15)
-ax2.set_yscale('log')
-ax2.set_ylim(10,3000)
-ax2.set_yticks([10,100,1000])
-ax2.set_xlim(-1,latlen)
-ax2.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-ax2.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-# plot temporal lines for label
-ax2.plot([],c='k',label='UHSAS')
-for mm in range(nmodels):
-    ax2.plot([],c=color_model[mm],label=Model_List[mm])
-ax2.legend(loc='upper left', fontsize='x-large')
-    
-# # set xlimit consistent in subplots
-# ylim1 = ax1.get_ylim()
-# ylim2 = ax2.get_ylim()
-# ax1.set_ylim([min(ylim1[0],ylim2[0]), max(ylim1[1],ylim2[1])])
-# ax2.set_ylim([min(ylim1[0],ylim2[0]), max(ylim1[1],ylim2[1])])
-
-ax2.set_xlabel('Latitude',fontsize=16)
-ax1.set_title('Aerosol Number Concentration (cm$^{-3}$)',fontsize=17)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
diff --git a/python/plotting/plot_ship_percentile_lat_LWP.py b/python/plotting/plot_ship_percentile_lat_LWP.py
deleted file mode 100644
index 72e76da..0000000
--- a/python/plotting/plot_ship_percentile_lat_LWP.py
+++ /dev/null
@@ -1,178 +0,0 @@
-"""
-# plot ship-track liquid water path binned by different latitudes
-"""
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_ARMdata import read_mwr
-from read_netcdf import read_E3SM
-from time_format_change import yyyymmdd2cday, cday2mmdd
-
-#%% settings
-
-from settings import campaign, latbin, Model_List, color_model, \
-             shipmwrpath, E3SM_ship_path, figpath_ship_statistics
-
-dlat = latbin[1]-latbin[0]
-latmin = latbin-dlat/2
-latmax = latbin+dlat/2
-latlen = len(latbin)
-
-if not os.path.exists(figpath_ship_statistics):
-    os.makedirs(figpath_ship_statistics)
-
-#%% read in model
-nmodels=len(Model_List)
-lwp_m = list()
-rain_m = list()
-lonm = np.empty(0)
-latm = np.empty(0)
-timem = np.empty(0)
-
-for mm in range(nmodels):
-    
-    # initialize variables by latitude bins
-    lwp_tmp = list()
-    rain_tmp = list()
-    for bb in range(latlen):
-        lwp_tmp.append(np.empty(0))
-        rain_tmp.append(np.empty(0))
-        
-    lst = glob.glob(E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg*.nc')
-    lst.sort()   
-    for ll in range(len(lst)):
-        filenamem = lst[ll]
-        (timem0,varm,timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['TGCLDLWP','PRECT','lat','lon'])
-        for ii in range(len(varm)):
-            varm[ii][varm[ii]<-9000] = np.nan
-            
-        lat0=varm[2]
-        lon0=varm[3]
-        
-        # save all time and lon/lat to interpolate observation
-        if mm==0:
-            timem=np.hstack((timem,timem0))
-            lonm=np.hstack((lonm,lon0))
-            latm=np.hstack((latm,lat0))
-        
-        # separate into latitude bins
-        for bb in range(latlen):
-            idx = np.logical_and(lat0>=latmin[bb], lat0<latmax[bb])
-            lwp_tmp[bb]=np.hstack((lwp_tmp[bb],varm[0][idx]*1000))
-        
-    lwp_m.append(lwp_tmp)
-    
-# change the unit
-varmunit[0]='g/m2'
-varmunit[1]='mm/hr'
-varmlongname[0]='LWP'
-varmlongname[1]='Rainrate'
-
-idx_neg = np.where((timem[1:]-timem[:-1])<0)  # find the day from Dec 31 to Jan 1.
-if len(idx_neg)!=1:
-    raise ValueError('this code is only designed for two continuous years. If more than two years please edit this part')
-else:
-    timem[idx_neg[0][0]+1:] = timem[idx_neg[0][0]+1:]+365
-
-#%% read in observation
-
-# initialize variables by latitude bins
-lwp_o = list()
-rain_o = list()
-for bb in range(latlen):
-    lwp_o.append(np.empty(0))
-    rain_o.append(np.empty(0))
-
-timeo = np.empty(0)
-lwpo = np.empty(0)
-
-if campaign=='MAGIC':
-    startdate='2012-10-05'
-    enddate='2013-09-26'
-    # startdate='2013-07-01'
-    # enddate='2013-08-31'
-    mwrdatastream='magmwrret1liljclouM1'
-elif campaign=='MARCUS':
-    startdate='2017-10-30'
-    enddate='2018-03-22'
-    mwrdatastream='marmwrret1liljclouM1'
-    
-cday1=yyyymmdd2cday(startdate,'noleap')
-cday2=yyyymmdd2cday(enddate,'noleap')
-if startdate[0:4]!=enddate[0:4]:
-    cday2=cday2+365  # cover two years
-        
-for cc in range(cday1,cday2+1):
-    if cc<=365:
-        yyyymmdd=startdate[0:4]+cday2mmdd(cc)
-    else:
-        yyyymmdd=enddate[0:4]+cday2mmdd(cc-365)
-            
-    # read in MWR
-    filenameo = glob.glob(shipmwrpath+mwrdatastream+'.s2.'+yyyymmdd+'.*')
-    if len(filenameo)==0:
-        continue  # some days may be missing
-    (time,lwp,timeunit,lwpunit,lwpflag)=read_mwr(filenameo[0],'be_lwp')
-    lwp[lwp<-9000]=np.nan
-    # if no obs available, fill one data with NaN
-    if len(time)==0:
-        continue
-    
-    timeo=np.hstack((timeo,cc+time/86400.))
-    lwpo = np.hstack((lwpo,lwp))
-    
-lat1=np.interp(timeo,timem,latm)
-lon1=np.interp(timeo,timem,lonm)
-# separate into latitude bins
-for bb in range(latlen):
-    idx = np.logical_and(lat1>=latmin[bb], lat1<latmax[bb])
-    lwp_o[bb]=np.hstack((lwp_o[bb],lwpo[idx]))  
-            
-#%% 
-for bb in range(latlen):
-    lwp_o[bb] = lwp_o[bb][~np.isnan(lwp_o[bb])]
-
-#%% make plot
-# set position shift so that models and obs are not overlapped
-p_shift = np.arange(nmodels+1)
-p_shift = (p_shift - p_shift.mean())*0.2
-
-figname = figpath_ship_statistics+'percentile_lat_LWP_'+campaign+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1) = plt.subplots(figsize=(8,2))   # figsize in inches
-plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-ax1.boxplot(lwp_o,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax1.boxplot(lwp_m[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-ax1.tick_params(color='k',labelsize=15)
-# ax1.set_yscale('log')
-ax1.set_xlim(-1,latlen)
-ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-ax1.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-# plot temporal lines for label
-ax1.plot([],c='k',label='OBS')
-for mm in range(nmodels):
-    ax1.plot([],c=color_model[mm],label=Model_List[mm])
-ax1.legend(loc='upper left', fontsize='x-large')
-    
-
-ax1.set_xlabel('Latitude',fontsize=16)
-ax1.set_title('LWP (g/m$^{2}$)',fontsize=17)
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-
diff --git a/python/plotting/plot_ship_percentile_lat_met.py b/python/plotting/plot_ship_percentile_lat_met.py
deleted file mode 100644
index 7fb08da..0000000
--- a/python/plotting/plot_ship_percentile_lat_met.py
+++ /dev/null
@@ -1,289 +0,0 @@
-"""
-# plot ship-track meteorological variables binned by different latitudes
-"""
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_ship import read_marmet
-from read_ARMdata import read_met
-from read_netcdf import read_E3SM
-from time_format_change import yyyymmdd2cday,  cday2mmdd
-
-
-#%% settings
-
-from settings import campaign, latbin, Model_List, color_model, \
-            shipmetpath, E3SM_ship_path, figpath_ship_statistics
-
-dlat = latbin[1]-latbin[0]
-latmin = latbin-dlat/2
-latmax = latbin+dlat/2
-latlen = len(latbin)
-
-if not os.path.exists(figpath_ship_statistics):
-    os.makedirs(figpath_ship_statistics)
-
-
-#%% read in observation
-
-# initialize variables by latitude bins
-T_o = list()
-RH_o = list()
-ps_o = list()
-# rain_o = list()
-for bb in range(latlen):
-    T_o.append(np.empty(0))
-    RH_o.append(np.empty(0))
-    ps_o.append(np.empty(0))
-    # rain_o.append(np.empty(0))
-
-if campaign=='MAGIC':
-    lst = glob.glob(shipmetpath+'marmet*.txt')
-    lst.sort()
-    for ll in range(len(lst)):
-        legnum=lst[ll][-6:-4]
-        
-        filenameo = shipmetpath+'marmet'+legnum+'.txt'
-        (shipdata,shipvarlist) = read_marmet(filenameo)        
-        
-        # get variables
-        lat=np.array([float(a[shipvarlist.index('lat')]) for a in shipdata]) 
-        lon=np.array([float(a[shipvarlist.index('lon')]) for a in shipdata]) 
-        sst=np.array([float(a[shipvarlist.index('ssst')]) for a in shipdata]) 
-        ps=np.array([float(a[shipvarlist.index('bp')]) for a in shipdata])    
-        rh=np.array([float(a[shipvarlist.index('rh')]) for a in shipdata]) 
-        ta=np.array([float(a[shipvarlist.index('ta')]) for a in shipdata]) 
-        rain=np.array([float(a[shipvarlist.index('org')]) for a in shipdata]) 
-    
-        lat[lat==-999]=np.nan
-        lon[lon==-999]=np.nan
-        sst[sst==-999]=np.nan
-        ps[ps==-999]=np.nan
-        rh[rh==-999]=np.nan
-        ta[ta==-999]=np.nan
-        rain[rain==-999]=np.nan
-        
-        # rain rate in leg 19 are unrealistic. mask all data
-        if legnum=='19':
-            rain=rain*np.nan
-    
-        # separate into latitude bins
-        for bb in range(latlen):
-            idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
-            T_o[bb]=np.hstack((T_o[bb],ta[idx]))
-            RH_o[bb]=np.hstack((RH_o[bb],rh[idx]))
-            ps_o[bb]=np.hstack((ps_o[bb],ps[idx]))
-            # rain_o[bb]=np.hstack((rain_o[bb],rain[idx]))
-            
-elif campaign=='MARCUS':
-    
-    startdate='2017-10-30'
-    enddate='2018-03-22'
-    cday1=yyyymmdd2cday(startdate,'noleap')
-    cday2=yyyymmdd2cday(enddate,'noleap')
-    if startdate[0:4]!=enddate[0:4]:
-        cday2=cday2+365  # cover two years
-            
-    for cc in range(cday1,cday2+1):
-        if cc<=365:
-            yyyymmdd=startdate[0:4]+cday2mmdd(cc)
-        else:
-            yyyymmdd=enddate[0:4]+cday2mmdd(cc-365)
-            
-        lst0 = glob.glob(shipmetpath+'maraadmetX1.b1.'+yyyymmdd+'*')
-        if len(lst0)==0:
-            continue
-        (time0,lon,timeunit,lonunit,lon_long_name)=read_met(lst0[0],'lon')
-        (time0,lat,timeunit,lonunit,lon_long_name)=read_met(lst0[0],'lat')
-        (time0,ta1,timeunit,taunit,ta_long_name)=read_met(lst0[0],'air_temperature_port')
-        (time0,ta2,timeunit,taunit,ta_long_name)=read_met(lst0[0],'air_temperature_starboard')
-        (time0,rh1,timeunit,rhunit,rh_long_name)=read_met(lst0[0],'relative_humidity_port')
-        (time0,rh2,timeunit,rhunit,rh_long_name)=read_met(lst0[0],'relative_humidity_starboard')
-        (time0,ps,timeunit,psunit,ps_long_name)=read_met(lst0[0],'atmospheric_pressure')
-        
-        ta = (ta1+ta2)/2
-        rh = (rh1+rh2)/2
-        
-        ps[ps<=-999]=np.nan
-        rh[rh<=-999]=np.nan
-        ta[ta<=-999]=np.nan
-        
-        # separate into latitude bins
-        for bb in range(latlen):
-            idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
-            T_o[bb]=np.hstack((T_o[bb],ta[idx]))
-            RH_o[bb]=np.hstack((RH_o[bb],rh[idx]))
-            ps_o[bb]=np.hstack((ps_o[bb],ps[idx]))
-        
-
-    
-#%% read in model
-nmodels=len(Model_List)
-T_m = list()
-RH_m = list()
-ps_m = list()
-rain_m = list()
-for mm in range(nmodels):
-    
-    # initialize variables by latitude bins
-    T_tmp = list()
-    RH_tmp = list()
-    ps_tmp = list()
-    rain_tmp = list()
-    for bb in range(latlen):
-        T_tmp.append(np.empty(0))
-        RH_tmp.append(np.empty(0))
-        ps_tmp.append(np.empty(0))
-        rain_tmp.append(np.empty(0))
-    
-    lst = glob.glob(E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg*.nc')
-    lst.sort()
-    for filenamem in lst:    
-        (timem,varm,timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['T','RELHUM','PS','PRECT','lat','lon'])
-        for ii in range(len(varm)):
-            varm[ii][varm[ii]<-9000] = np.nan
-            
-        lat0=varm[4]
-        lon0=varm[5]
-        
-        # separate into latitude bins
-        for bb in range(latlen):
-            idx = np.logical_and(lat0>=latmin[bb], lat0<latmax[bb])
-            T_tmp[bb]=np.hstack((T_tmp[bb],varm[0][idx]-273.16))
-            RH_tmp[bb]=np.hstack((RH_tmp[bb],varm[1][idx]))
-            ps_tmp[bb]=np.hstack((ps_tmp[bb],varm[2][idx]*0.01))
-            rain_tmp[bb]=np.hstack((rain_tmp[bb],varm[3][idx]*3600*1000))
-        
-    T_m.append(T_tmp)
-    RH_m.append(RH_tmp)
-    ps_m.append(ps_tmp)
-    rain_m.append(rain_tmp)
-    
-# change the unit
-varmunit[0]='C'
-varmunit[2]='hPa'
-varmunit[3]='mm/hr'
-varmlongname[3]='Rainrate'
-
-#%% calculate the mean and standard error for each bin
-for bb in range(latlen):
-    T_o[bb] = T_o[bb][~np.isnan(T_o[bb])]
-    RH_o[bb] = RH_o[bb][~np.isnan(RH_o[bb])]
-    ps_o[bb] = ps_o[bb][~np.isnan(ps_o[bb])]
-    # rain_o[bb] = rain_o[bb][~np.isnan(rain_o[bb])]
-    
-#%% make plot
-# set position shift so that models and obs are not overlapped
-p_shift = np.arange(nmodels+1)
-p_shift = (p_shift - p_shift.mean())*0.2
-
-    
-figname = figpath_ship_statistics+'percentile_lat_met_'+campaign+'.png'
-print('plotting figures to '+figname)
-
-fig,(ax1,ax2,ax3) = plt.subplots(3,1,figsize=(8,6))   # figsize in inches
-plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-ax1.boxplot(T_o,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax1.boxplot(T_m[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-ax1.tick_params(color='k',labelsize=15)
-# ax1.set_yscale('log')
-ax1.set_xlim(-1,latlen)
-ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-# plot temporal lines for label
-ax1.plot([],c='k',label='OBS')
-for mm in range(nmodels):
-    ax1.plot([],c=color_model[mm],label=Model_List[mm])
-    
-ax2.boxplot(RH_o,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax2.boxplot(RH_m[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-ax2.tick_params(color='k',labelsize=15)
-# ax2.set_yscale('log')
-ax2.set_xlim(-1,latlen)
-ax2.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-# plot temporal lines for label
-ax2.plot([],c='k',label='OBS')
-for mm in range(nmodels):
-    ax2.plot([],c=color_model[mm],label=Model_List[mm])
-    
-ax3.boxplot(ps_o,whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-            boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-for mm in range(nmodels):
-    c = color_model[mm]
-    ax3.boxplot(ps_m[mm],whis=(5,95),showmeans=False,showfliers=False,
-            positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-            boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-            medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-            vert=True, patch_artist=True)    # need patch_artist to fill color in box
-ax3.tick_params(color='k',labelsize=15)
-# ax3.set_yscale('log')
-ax3.set_xlim(-1,latlen)
-ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
-# plot temporal lines for label
-ax3.plot([],c='k',label='OBS')
-for mm in range(nmodels):
-    ax3.plot([],c=color_model[mm],label=Model_List[mm])
-    
-# ax4.boxplot(rain_o,whis=(5,95),showmeans=False,showfliers=False,
-#             positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
-#             boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
-#             medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
-#             vert=True, patch_artist=True)    # need patch_artist to fill color in box
-# for mm in range(nmodels):
-#     c = color_model[mm]
-#     ax4.boxplot(rain_m[mm],whis=(5,95),showmeans=False,showfliers=False,
-#             positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
-#             boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
-#             medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
-#             vert=True, patch_artist=True)    # need patch_artist to fill color in box
-# ax4.tick_params(color='k',labelsize=12)
-# # ax4.set_yscale('log')
-# ax4.set_xlim(-1,latlen)
-# ax4.set_xticks(np.arange(-.25,latlen,2))
-# # plot temporal lines for label
-# ax4.plot([],c='k',label='OBS')
-# for mm in range(nmodels):
-#     ax4.plot([],c=color_model[mm],label=Model_List[mm])
-    
-
-ax3.set_xlabel('Latitude',fontsize=16)
-
-ax1.set_xticklabels([])
-ax2.set_xticklabels([])
-ax3.set_xticklabels([])
-ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
-ax1.set_title(varmlongname[0]+' ('+varmunit[0]+')',fontsize=17)
-ax2.set_title(varmlongname[1]+' ('+varmunit[1]+')',fontsize=17)
-ax3.set_title(varmlongname[2]+' ('+varmunit[2]+')',fontsize=17)
-# ax4.set_title(varmlongname[3]+' ('+varmunit[3]+')',fontsize=15)
-
-ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
-
-fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
diff --git a/python/plotting/plot_ship_timeseries_CCN.py b/python/plotting/plot_ship_timeseries_CCN.py
deleted file mode 100644
index 0500513..0000000
--- a/python/plotting/plot_ship_timeseries_CCN.py
+++ /dev/null
@@ -1,158 +0,0 @@
-"""
-# plot timeseries of surface CCN number concentration along each ship leg
-"""
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_ARMdata import read_ccn_magic, read_ccn
-from read_netcdf import read_E3SM
-from time_format_change import cday2mmdd
-from specific_data_treatment import mask_model_ps
-from quality_control import qc_mask_qcflag,qc_ccn_max
-
-
-#%% settings
-
-from settings import campaign, Model_List, color_model, \
-            shipccnpath, shipmetpath, E3SM_ship_path, figpath_ship_timeseries
-
-if not os.path.exists(figpath_ship_timeseries):
-    os.makedirs(figpath_ship_timeseries)
-
-lst = glob.glob(E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[0]+'_shipleg*.nc')
-lst.sort()
-
-for ll in range(len(lst)):
-    
-    if campaign=='MAGIC':
-        legnum=lst[ll][-5:-3]
-    elif campaign=='MARCUS':
-        legnum=lst[ll][-4]
-    
-     
-    #%% read in model
-    nmodels=len(Model_List)
-    ccn1m = list()
-    ccn5m = list()
-    for mm in range(nmodels):
-        filenamem = E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
-    
-        (timem,ccn1,timeunitm,ccn1unit,ccn1longname)=read_E3SM(filenamem,'CCN3')
-        (timem,ccn5,timeunitm,ccn5unit,ccn5longname)=read_E3SM(filenamem,'CCN5')
-    
-        ccn1m.append(ccn1)
-        ccn5m.append(ccn5)
-        
-    # mask data where model grid is not at ocean surface (Ps is too different than obs)
-    (timem,psm,timeunitm,psmunit,psmlongname)=read_E3SM(filenamem,'PS')
-    datamask = mask_model_ps(timem,0.01*psm,legnum,campaign,shipmetpath)
-    # for mm in range(nmodels):
-    #     ccn1m[mm][datamask]=np.nan
-    #     ccn5m[mm][datamask]=np.nan
-    
-    year0 = str(int(timeunitm.split()[2][0:4])+1)
-    
-    #%% read in observations
-    # find the days related to the ship leg
-    day = [int(a) for a in timem]
-    day = list(set(day))
-    day.sort()
-    
-    t_ccn=np.empty(0)
-    ccn=np.empty(0)
-    SS=np.empty(0)
-    for dd in day:
-        
-        if campaign=='MAGIC':
-            if int(legnum)<=9:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipccnpath+'magaosccn100M1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
-                else:
-                    filenameo = glob.glob(shipccnpath+'magaosccn100M1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*.cdf')
-            else:
-                filenameo = glob.glob(shipccnpath+'magaosccn100M1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
-            if len(filenameo)==0:
-                continue  # some days may be missing
-            (time,timeunit,obs,dataunit,SS0)=read_ccn_magic(filenameo[0])
-        elif campaign=='MARCUS':
-            if int(legnum)<=2:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipccnpath+'maraosccn1colavgM1.b1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
-                else:
-                    filenameo = glob.glob(shipccnpath+'maraosccn1colavgM1.b1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
-            else:
-                filenameo = glob.glob(shipccnpath+'maraosccn1colavgM1.b1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
-            if len(filenameo)==0:
-                continue  # some days may be missing
-            (time,timeunit,obs,qc,dataunit,SS0)=read_ccn(filenameo[0])            
-            obs=qc_mask_qcflag(obs,qc)
-            
-        t_ccn=np.hstack((t_ccn, dd+time/86400))
-        ccn=np.hstack((ccn, obs))
-        SS=np.hstack((SS, SS0))
-        ccn=qc_ccn_max(ccn,SS)
-        
-    # if time expands two years, add 365 days to the second year
-    if t_ccn[0]>t_ccn[-1]:
-        t_ccn[t_ccn<=t_ccn[-1]]=t_ccn[t_ccn<=t_ccn[-1]]+365
-        
-    # ccn[np.logical_or(ccn<0,ccn>1500)]=np.nan
-    # SS=0.1%
-    idx = np.logical_and(SS>0.05, SS<0.15)
-    t_ccn1 = t_ccn[idx]
-    ccn1o = ccn[idx]
-    SS1 = 0.1
-    # SS=0.5%
-    idx = np.logical_and(SS>0.4, SS<0.6)
-    t_ccn5 = t_ccn[idx]
-    ccn5o = ccn[idx]
-    SS5 = 0.5
-    
-    #%% make plot
-        
-    figname = figpath_ship_timeseries+'timeseries_CCN_'+campaign+'_ship'+legnum+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-    ax1.plot(t_ccn1,ccn1o,color='k',linewidth=1,label='OBS')
-    for mm in range(nmodels):
-        ax1.plot(timem, ccn1m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    # ax1.set_yscale('log')
-    ax1.tick_params(color='k',labelsize=12)
-    ylim1 = ax1.get_ylim()
-    
-    ax2.plot(t_ccn5,ccn5o,color='k',linewidth=1,label='OBS')
-    for mm in range(nmodels):
-        ax2.plot(timem, ccn5m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    # ax2.set_yscale('log')
-    ax2.tick_params(color='k',labelsize=12)
-    ylim2 = ax2.get_ylim()
-    
-    # set ylimit consistent in subplots
-    # ax1.set_ylim([ylim1[0], ylim2[1]])
-    # ax2.set_ylim([ylim1[0], ylim2[1]])
-    
-    ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.2, .5))
-    ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.2, .5))
-    
-    # supersaturation
-    fig.text(0.08,0.9,'SS='+str(SS1)+'%')
-    fig.text(0.08,0.4,'SS='+str(SS5)+'%')
-    
-    ax2.set_xlabel('Calendar Day in '+year0,fontsize=14)
-        
-    ax1.set_title('CCN Number Concentration (cm$^{-3}$)',fontsize=15)
-    
-    fig.text(.08, .999,'trip # '+legnum, fontsize=12)
-    
-    # mask non-ocean model grid (ps is inconsistent with obs)
-    ax1.vlines(timem[datamask],ylim1[0],ylim1[1],color='lightgray')
-    ax2.vlines(timem[datamask],ylim2[0],ylim2[1],color='lightgray')
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
\ No newline at end of file
diff --git a/python/plotting/plot_ship_timeseries_CN.py b/python/plotting/plot_ship_timeseries_CN.py
deleted file mode 100644
index 30034f1..0000000
--- a/python/plotting/plot_ship_timeseries_CN.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""
-# plot timeseries of surface aerosol number concentration along each ship leg
-"""
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_ARMdata import read_cpc, read_uhsas
-from read_netcdf import read_E3SM
-from time_format_change import  cday2mmdd
-from specific_data_treatment import mask_model_ps
-from quality_control import qc_mask_qcflag,qc_remove_neg,qc_cn_max
-
-#%% settings
-
-from settings import campaign, Model_List, color_model, \
-            shipcpcpath, shipmetpath, shipuhsaspath, E3SM_ship_path, figpath_ship_timeseries
-
-if not os.path.exists(figpath_ship_timeseries):
-    os.makedirs(figpath_ship_timeseries)
-
-lst = glob.glob(E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[0]+'_shipleg*.nc')
-lst.sort()
-
-for ll in range(len(lst)):
-    
-    if campaign=='MAGIC':
-        legnum=lst[ll][-5:-3]
-    elif campaign=='MARCUS':
-        legnum=lst[ll][-4]
-    
-    #%% read in model
-    nmodels=len(Model_List)
-    datam = list()
-    databins = list()
-    for mm in range(nmodels):
-        filenamem = E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
-    
-        (timem,NCNall,timeunitm,datamunit,datamlongname)=read_E3SM(filenamem,'NCNall')
-        (timem,data,timeunitm,datamunit,datamlongname)=read_E3SM(filenamem,'NCN')
-    
-        datam.append(data*1e-6)    # change unit from 1/m3 to 1/cm3
-        databins.append(NCNall*1e-6)    # change unit from 1/m3 to 1/cm3
-        
-        # mask data where model grid is not at ocean surface (Ps is too different than obs)
-        filenamem = E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
-        (timem,psm,timeunitx,psmunit,psmlongname)=read_E3SM(filenamem,'PS')
-        datamask = mask_model_ps(timem,0.01*psm,legnum,campaign,shipmetpath)
-        # for mm in range(nmodels):
-        #     datam[mm][datamask]=np.nan
-        
-    year0 = str(int(timeunitm.split()[2][0:4])+1)
-    
-    #%% read in observations
-    # find the days related to the ship leg
-    day = [int(a) for a in timem]
-    day = list(set(day))
-    day.sort()
-
-    # CPC    
-    t_cpc=np.empty(0)
-    cpc=np.empty(0)
-    for dd in day:
-        
-        if campaign=='MAGIC':
-            if int(legnum)<=9:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*')
-                else:
-                    filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*')
-            else:
-                filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*')
-            if len(filenameo)==0:
-                continue  # some days may be missing
-        elif campaign=='MARCUS':
-            if int(legnum)<=2:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
-                else:
-                    filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
-            else:
-                filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
-            if len(filenameo)==0:
-                continue  # some days may be missing
-                
-                
-        (time,obs,qc,timeunit,dataunit)=read_cpc(filenameo[0])
-        obs=qc_mask_qcflag(obs,qc)
-        t_cpc=np.hstack((t_cpc, dd+time/86400))
-        cpc=np.hstack((cpc, obs))
-        
-    cpc=qc_remove_neg(cpc)
-    cpc=qc_cn_max(cpc,10)
-    # if time expands two years, add 365 days to the second year
-    if t_cpc[0]>t_cpc[-1]:
-        t_cpc[t_cpc<=t_cpc[-1]]=t_cpc[t_cpc<=t_cpc[-1]]+365
-
-    # UHSAS
-    t_uh=np.empty(0)
-    uhsas=np.empty(0)
-    for dd in day:
-        
-        if campaign=='MAGIC':
-            if int(legnum)<=9:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
-                else:
-                    filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*.cdf')
-            else:
-                filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
-        elif campaign=='MARCUS':
-            if int(legnum)<=2:
-                if dd<=365:  # year 2012
-                    filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
-                else:
-                    filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
-            else:
-                filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
-        
-        if len(filenameo)==0:
-            continue  # some days may be missing
-        if len(filenameo)>1:
-            raise ValueError('find too many files')
-            
-        (time,dmin,dmax,obs,timeunit,uhunit,uhlongname)=read_uhsas(filenameo[0])
-        obs=np.ma.filled(obs)
-        obs=qc_remove_neg(obs)
-        uhsas=np.hstack((uhsas, np.nansum(obs,1)))
-        t_uh = np.hstack((t_uh,time/86400+dd))
-        
-    uhsas=qc_cn_max(uhsas,100)
-    # if no obs available, fill one data with NaN
-    if len(t_uh)==0:
-        t_uh=[timem[0],timem[1]]
-        uhsas=np.full((2),np.nan)
-        
-    # if time expands two years, add 365 days to the second year
-    if t_uh[0]>t_uh[-1]:
-        t_uh[t_uh<=t_uh[-1]]=t_uh[t_uh<=t_uh[-1]]+365
-            
-    #%% Calculate model aerosol number concentration for UHSAS size range
-    b1 = int(dmin[0])
-    b2 = int(dmax[-1])
-    datam2=list()
-    for mm in range(nmodels):
-        datam2.append(np.nansum(databins[mm][b1-1:b2,:],0))
-        # datam2[mm][datamask]=np.nan
-    
-    #%% make plot
-        
-    figname = figpath_ship_timeseries+'timeseries_CN_'+campaign+'_ship'+legnum+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-    ax1.plot(t_cpc,cpc,color='k',linewidth=1,label='CPC')
-    for mm in range(nmodels):
-        ax1.plot(timem, datam[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    # ax1.set_yscale('log')
-    ax1.tick_params(color='k',labelsize=12)
-    ylim1 = ax1.get_ylim()
-    
-    ax2.plot(t_uh,uhsas,color='k',linewidth=1,label='UHSAS')
-    # ax2.plot(t_uh,uhsas,color='k',linewidth=1,label='UHSAS ('+str(b1)+'-'+str(b2)+'nm)')
-    for mm in range(nmodels):
-        ax2.plot(timem, datam2[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    # ax1.set_yscale('log')
-    ax2.tick_params(color='k',labelsize=12)
-    ylim2 = ax2.get_ylim()
-    
-    ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.2, .5))
-    ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.2, .5))
-    
-    ax2.set_xlabel('Calendar Day in '+year0,fontsize=14)
-        
-    ax1.set_title('Aerosol Number Concentration (cm$^{-3}$)',fontsize=15)
-    
-    fig.text(.08, .999,'trip # '+legnum, fontsize=12)
-    
-    # mask non-ocean model grid (ps is inconsistent with obs)
-    ax1.vlines(timem[datamask],ylim1[0],ylim1[1],color='lightgray')
-    ax2.vlines(timem[datamask],ylim2[0],ylim2[1],color='lightgray')
-    
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
\ No newline at end of file
diff --git a/python/plotting/plot_ship_timeseries_met.py b/python/plotting/plot_ship_timeseries_met.py
deleted file mode 100644
index 9c9e9d1..0000000
--- a/python/plotting/plot_ship_timeseries_met.py
+++ /dev/null
@@ -1,202 +0,0 @@
-"""
-# plot timeseries of basic meteorological variables along ship track
-"""
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import os
-import glob
-import matplotlib.pyplot as plt
-import numpy as np
-from read_ship import read_marmet
-from read_ARMdata import read_met
-from read_netcdf import read_E3SM
-from time_format_change import yyyymmdd2cday,  cday2mmdd
-from specific_data_treatment import  avg_time_1d
-
-
-#%% settings
-
-from settings import campaign, Model_List, color_model, shipmetpath, E3SM_ship_path, figpath_ship_timeseries
-
-if not os.path.exists(figpath_ship_timeseries):
-    os.makedirs(figpath_ship_timeseries)
-
-
-lst = glob.glob(E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[0]+'_shipleg*.nc')
-lst.sort()
-
-for ll in range(len(lst)):
-    
-    #%% for MAGIC, read each ship leg
-    if campaign=='MAGIC':
-        legnum=lst[ll][-5:-3]
-        filenameo = shipmetpath+'marmet'+legnum+'.txt'
-        (shipdata,shipvarlist) = read_marmet(filenameo)
-        year=[a[1] for a in shipdata]
-        month=[a[2] for a in shipdata]
-        day=[a[3] for a in shipdata]
-        hh=[int(a[4]) for a in shipdata]
-        mm=[int(a[5]) for a in shipdata]
-        ss=[int(a[6]) for a in shipdata]
-        yyyymmdd = [year[i]+month[i]+day[i] for i in range(len(year))]   # yyyymmdd
-        # get time in calendar day
-        time = np.array(hh)/24. + np.array(mm)/1440. + np.array(ss)/86400. 
-        time = np.array([time[i] + yyyymmdd2cday(yyyymmdd[i],'noleap') for i in range(len(time))])
-        if time[-1]<time[0]:
-            time[time<=time[-1]]=time[time<=time[-1]]+365
-        
-        
-        # get variables
-        ps=np.array([float(a[shipvarlist.index('bp')]) for a in shipdata])    
-        rh=np.array([float(a[shipvarlist.index('rh')]) for a in shipdata]) 
-        ta=np.array([float(a[shipvarlist.index('ta')]) for a in shipdata]) 
-        rain=np.array([float(a[shipvarlist.index('org')]) for a in shipdata]) 
-        lat=np.array([float(a[7]) for a in shipdata])
-    
-        lat[lat==-999]=np.nan
-        ps[ps==-999]=np.nan
-        rh[rh==-999]=np.nan
-        ta[ta==-999]=np.nan
-        rain[rain==-999]=np.nan
-        
-        # rain needs to be averaged into 1-hr timewindow for comparison with model
-        time1hr = np.arange(int(time[0]*24),int(time[-1]*24)+1)/24.
-        rain2=avg_time_1d(time,rain,time1hr)
-        
-        # rain rate in leg 19 are unrealistic. mask all data
-        if legnum=='19':
-            rain=rain*np.nan
-            rain2=rain2*np.nan
-    
-    #%% for MARCUS, read files for each vessel trip
-    elif campaign=='MARCUS':
-        legnum=lst[ll][-4]
-        if legnum=='1':
-            startdate='2017-10-30'
-            enddate='2017-12-02'
-        elif legnum=='2':
-            startdate='2017-12-13'
-            enddate='2018-01-11'
-        elif legnum=='3':
-            startdate='2018-01-16'
-            enddate='2018-03-04'
-        elif legnum=='4':
-            startdate='2018-03-09'
-            enddate='2018-03-22'
-            
-        year=[startdate[0:4]]
-        
-        cday1=yyyymmdd2cday(startdate,'noleap')
-        cday2=yyyymmdd2cday(enddate,'noleap')
-        if startdate[0:4]!=enddate[0:4]:
-            cday2=cday2+365  # cover two years
-
-        time=np.empty(0)
-        lon=np.empty(0)
-        lat=np.empty(0)
-        ta=np.empty(0)
-        rh=np.empty(0)
-        ps=np.empty(0)
-        for cc in range(cday1,cday2+1):
-            if cc<=365:
-                yyyymmdd=startdate[0:4]+cday2mmdd(cc)
-            else:
-                yyyymmdd=enddate[0:4]+cday2mmdd(cc-365)
-                
-            lst0 = glob.glob(shipmetpath+'maraadmetX1.b1.'+yyyymmdd+'*')
-            (time0,lon0,timeunit,lonunit,lon_long_name)=read_met(lst0[0],'lon')
-            (time0,lat0,timeunit,lonunit,lon_long_name)=read_met(lst0[0],'lat')
-            (time0,ta1,timeunit,taunit,ta_long_name)=read_met(lst0[0],'air_temperature_port')
-            (time0,ta2,timeunit,taunit,ta_long_name)=read_met(lst0[0],'air_temperature_starboard')
-            (time0,rh1,timeunit,rhunit,rh_long_name)=read_met(lst0[0],'relative_humidity_port')
-            (time0,rh2,timeunit,rhunit,rh_long_name)=read_met(lst0[0],'relative_humidity_starboard')
-            (time0,ps0,timeunit,psunit,ps_long_name)=read_met(lst0[0],'atmospheric_pressure')
-            
-            time = np.hstack((time, time0/86400. + cc))
-            lat = np.hstack((lat,lat0))
-            lon = np.hstack((lon,lon0))
-            ta = np.hstack((ta,(ta1+ta2)/2))
-            rh = np.hstack((rh,(rh1+rh2)/2))
-            ps = np.hstack((ps,ps0))
-            
-        ps[ps<=-999]=np.nan
-        rh[rh<=-999]=np.nan
-        ta[ta<=-999]=np.nan
-        lat[lat<=-999]=np.nan
-        
-        
-    #%% read in model
-    nmodels=len(Model_List)
-    T_m = list()
-    RH_m = list()
-    ps_m = list()
-    rain_m = list()
-    for mm in range(nmodels):
-        filenamem = E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
-    
-        (timem,varm,timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['T','RELHUM','PS','PRECT','TREFHT'])
-    
-        T_m.append(varm[0]-273.16)
-        RH_m.append(varm[1])
-        ps_m.append(varm[2]*0.01)
-        rain_m.append(varm[3]*3600*1000)
-        
-    # change the unit
-    varmunit[0]='C'
-    varmunit[2]='hPa'
-    varmunit[3]='mm/hr'
-    varmlongname[3]='Rainrate'
-        
-    if len(time)!=len(timem):
-        raise ValueError('model and observation have inconsistent time dimension')
-    
-    #%% make plot
-        
-    figname = figpath_ship_timeseries+'timeseries_met_'+campaign+'_ship'+legnum+'.png'
-    print('plotting figures to '+figname)
-    
-    fig,(ax0,ax1,ax2,ax3) = plt.subplots(4,1,figsize=(8,7))   # figsize in inches
-    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
-    
-    ax0.plot(time,lat,color='k')
-    ax0.tick_params(color='k',labelsize=12)
-    
-    ax1.plot(time,ta,color='k',linewidth=1,label='OBS')
-    for mm in range(nmodels):
-        ax1.plot(time, T_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    ax1.tick_params(color='k',labelsize=12)
-    
-    ax2.plot(time,rh,color='k',linewidth=1,label='OBS')
-    for mm in range(nmodels):
-        ax2.plot(time, RH_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    ax2.tick_params(color='k',labelsize=12)
-    
-    ax3.plot(time,ps,color='k',linewidth=1,label='OBS')
-    for mm in range(nmodels):
-        ax3.plot(time, ps_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    ax3.tick_params(color='k',labelsize=12)
-    ax3.set_ylim(int(min(np.nanmin(ps), np.nanmin(ps_m[0]-2))),int(max(np.nanmax(ps),np.nanmax(ps_m[0])))+2)
-    
-    # # ax4.plot(time,rain,color='k',linewidth=1,label='OBS')
-    # ax4.plot(time1hr,rain2,color='k',linewidth=1,label='OBS')  # 1hr averaged rain. 
-    # for mm in range(nmodels):
-    #     ax4.plot(time, rain_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
-    # ax4.tick_params(color='k',labelsize=12)
-    
-    ax0.set_xticklabels([])
-    ax1.set_xticklabels([])
-    ax2.set_xticklabels([])
-    ax3.set_xlabel('Calenday Day in '+year[0],fontsize=14)
-    ax0.set_title('latitude',fontsize=15)
-    ax1.set_title(varmlongname[0]+' ('+varmunit[0]+')',fontsize=15)
-    ax2.set_title(varmlongname[1]+' ('+varmunit[1]+')',fontsize=15)
-    ax3.set_title(varmlongname[2]+' ('+varmunit[2]+')',fontsize=15)
-    # ax4.set_title(varmlongname[3]+' ('+varmunit[3]+')',fontsize=15)
-    
-    ax3.legend(loc='lower right', shadow=False, fontsize='large')
-    
-    fig.text(.1, .999,'trip # '+legnum, fontsize=15)
-    
-    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
-    
\ No newline at end of file
diff --git a/python/plotting/settings.py b/python/plotting/settings.py
deleted file mode 100644
index cecb7f6..0000000
--- a/python/plotting/settings.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# settings of the aerosol diagnostic package
-
-import numpy as np
-
-#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# global settings
-
-############ these settings will be replaced by the settings in scripts_*.csh #############
-# set field campaign name. More settings on specific field campaigns are in next section
-campaign = 'ACEENA'
-# set model names. up to three
-Model_List = ['E3SMv1']
-# set line colors for each model. corresponding to the Model_List
-color_model = ['r','b','g']
-# set IOP that the statistics, pdf and percentiles are averaged for. Only available for HISCALE and ACEENA
-# IOP1/IOP2 
-IOP = 'IOP1'
-############ these settings will be replaced by the settings in scripts_*.csh #############
-
-
-# path of the diagnostic package
-package_path = '../../'
-
-# path of E3SM model data (h3) for preprocessing. list with the same length of Model_List
-E3SM_h3_path=[]
-E3SM_h3_filehead=[]     # filename before .cam.h3.yyyy-mm-dd.00000.nc
-for mm in Model_List:
-    E3SM_h3_path.append('/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_h3/')
-    if campaign=='MAGIC':
-        E3SM_h3_filehead.append(mm+'_2012-2013')
-    else:
-#        E3SM_h3_filehead.append(mm+'_2014-2018')
-        E3SM_h3_filehead.append(mm)
-    #E3SM_h3_path.append('/qfs/projects/eagles/zhan524/simulations/compy_F20TRC5-CMIP6_ne30_EG1_R2_'+mm+'/h3/')
-    #E3SM_h3_filehead.append('compy_F20TRC5-CMIP6_ne30_EG1_R2_'+mm)
-
-# path of output figures
-figpath_aircraft_timeseries = package_path+'testcase/figures/'
-figpath_aircraft_statistics = package_path+'testcase/figures/'
-figpath_ship_timeseries = package_path+'testcase/figures/'
-figpath_ship_statistics = package_path+'testcase/figures/'
-figpath_sfc_timeseries = package_path+'testcase/figures/'
-figpath_sfc_statistics = package_path+'testcase/figures/'
-figpath_profile_timeseries = package_path+'testcase/figures/'
-
-
-#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# settings for different field campaigns
-
-# set location and time information
-if  campaign=='ACEENA':
-    site='ENA'
-    # lat/lon for ENA
-    lat0 = 39.09527
-    lon0 = 360-28.0339
-    # bin of flight heights to calculate percentiles
-    height_bin = np.arange(100,4300,300)
-    
-    # time periods for IOPs. needed in preprocessing of surface data
-    if IOP=='IOP1':
-        start_date='2017-06-30'
-        end_date='2017-06-31'
-    elif IOP=='IOP2':
-        start_date='2018-01-21'
-        end_date='2018-02-19'
-    
-    # observational data path. 
-    # aircraf measurements merged_bin data are used for all plot_flight_*.py to provide flight/cloud/CVI info
-    merged_size_path=package_path+'testcase/data/obs/'
-    iwgpath = package_path+'testcase/data/obs/'
-    cvipath = package_path+'testcase/data/obs/'
-    amspath = package_path+'testcase/data/obs/AMS/'
-    
-    # model path
-    # pre-processed model path    
-    E3SM_aircraft_path = package_path+'testcase/data/model/'
-   
-    
-else:
-    raise ValueError("Test case should only for ACEENA. Current campaign is: "+campaign)
diff --git a/python/preprocessing/extract_E3SM_AOD_globalmean.py b/python/preprocessing/extract_E3SM_AOD_globalmean.py
deleted file mode 100644
index d16ad96..0000000
--- a/python/preprocessing/extract_E3SM_AOD_globalmean.py
+++ /dev/null
@@ -1,77 +0,0 @@
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import numpy as np
-from time_format_change import  yyyymmdd2cday, cday2mmdd
-from read_netcdf import read_E3SM
-from netCDF4 import Dataset
-
-
-#%% settings
-
-dateall = ['2015-10','2015-11','2015-12','2016-01','2016-02','2016-03','2016-04','2016-05','2016-06','2016-07',\
-           '2016-08','2016-09','2016-10','2016-11','2016-12','2017-01','2017-02','2017-03','2017-04','2017-05',\
-           '2017-06','2017-07','2017-08','2017-09','2017-10','2017-11','2017-12','2018-01','2018-02','2018-03']
-
-E3SM_input_path =  '/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_h0/'   
-E3SM_output_path = '../../figures/'
-model='E3SMv1'
-
-#%%  process data for each day
-for date in dateall:
-    
-    print(date)
-    filename_input = E3SM_input_path+'E3SMv1_2014-2018.cam.h0.'+date+'.nc'
-    
-    if date=='2015-10':
-        (timem,lonm,timeunitm,lonmunit,lonmname)=read_E3SM(filename_input,'lon')
-        (timem,latm,timeunitm,latmunit,latmname)=read_E3SM(filename_input,'lat')
-        aodall = np.empty((0,len(lonm)))
-    (timem,aod,timeunitm,aodunit,aodname)=read_E3SM(filename_input,'AODVIS')
-    aod[aod>1e10]=np.nan
-    aodall = np.vstack((aodall,np.ma.filled(aod,np.nan)))
-    
-aodmean = np.nanmean(aodall,0)
-if len(aodmean)!=len(lonm):
-    print(aodall.shape)
-    error
-    
-    
-# %% output extacted file
-outputname = 'AOD_mean_global_'+model+'.nc'
-print('output to this file: '+E3SM_output_path+outputname)
-
-# define filename
-f = Dataset(E3SM_output_path+outputname, 'w', format='NETCDF4')
-
-# define dimensions
-t = f.createDimension('ncol', None)  # unlimited
-
-# create variable list
-lat_o = f.createVariable("lat","f8",("ncol",))
-lon_o = f.createVariable("lon","f8",("ncol",))
-var_o = f.createVariable("AODmean",'f8',("ncol",))
-
-# write data
-lat_o[:] = latm
-lon_o[:] = lonm
-var_o[:] = aodmean
-
-# attributes
-lat_o.units = latmunit
-lon_o.units = lonmunit
-var_o.units = aodunit
-var_o.long_name = aodname
-
-# global attributes
-import time as ttt
-f.description = model+" extact global mean AOD"
-f.history = "Created by Shuaiqi at " + ttt.ctime(ttt.time())
-
-f.close()
-    
-
-        
-        
-        
diff --git a/python/preprocessing/extract_E3SM_AOD_regionmean.py b/python/preprocessing/extract_E3SM_AOD_regionmean.py
deleted file mode 100644
index 2f0fe06..0000000
--- a/python/preprocessing/extract_E3SM_AOD_regionmean.py
+++ /dev/null
@@ -1,118 +0,0 @@
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import numpy as np
-from time_format_change import  yyyymmdd2cday, cday2mmdd
-from read_netcdf import read_E3SM
-from netCDF4 import Dataset
-
-
-#%% settings
-
-from settings import campaign, E3SM_h3_path, E3SM_h3_filehead, Model_List
-
-E3SM_region_path = '../../figures/'
-
-    
-if campaign=='HISCALE':
-    E3SMdomain_range='260e_to_265e_34n_to_39n'    # domain range in E3SM regional output
-    start_date='2016-04-25'
-    end_date='2016-09-22'
-elif campaign=='ACEENA':
-    E3SMdomain_range='330e_to_335e_37n_to_42n'   
-    start_date='2017-06-20'
-    end_date='2018-02-19'
-elif campaign=='CSET':
-    E3SMdomain_range='202e_to_240e_19n_to_40n' 
-    start_date='2015-07-01'
-    end_date='2015-08-15'   
-elif campaign=='SOCRATES':
-    E3SMdomain_range='133e_to_164e_42s_to_63s'    
-    start_date='2018-01-15'
-    end_date='2018-02-24'
-elif campaign=='MAGIC':
-    E3SMdomain_range='202e_to_243e_20n_to_35n'    # domain range in E3SM regional output 
-    start_date='2012-10-01'
-    end_date='2013-09-30'
-elif campaign=='MARCUS':
-    E3SMdomain_range='60e_to_160e_42s_to_70s'    
-    start_date='2017-10-01'
-    end_date='2018-04-20'
-else:
-    print('ERROR: please specify domain info for '+campaign)
-    error
-
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date,'noleap')
-cday2 = yyyymmdd2cday(end_date,'noleap')
-
-year0 = start_date[0:4]
-if start_date[0:4]!=end_date[0:4]:
-    cday2=cday2+365
-
-for mm in range(len(Model_List)):
-    model=Model_List[mm]
-    #%%  process data for each day
-    for cday in range(cday1,cday2+1):
-        if cday>365:
-            mmdd=cday2mmdd(cday-365)
-            date=end_date[0:4]+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        else:
-            mmdd=cday2mmdd(cday)
-            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
-        
-        print(date)
-        filename_input = E3SM_h3_path[mm]+E3SM_h3_filehead[mm]+'.cam.h3.'+date+'-00000.nc'
-        
-        if cday==cday1:
-            (timem,lonm,timeunitm,lonmunit,lonmname)=read_E3SM(filename_input,'lon_'+E3SMdomain_range)
-            (timem,latm,timeunitm,latmunit,latmname)=read_E3SM(filename_input,'lat_'+E3SMdomain_range)
-            aodall = np.empty((0,len(lonm)))
-        (timem,aod,timeunitm,aodunit,aodname)=read_E3SM(filename_input,'AODVIS_'+E3SMdomain_range)
-        aod[aod>1e10]=np.nan
-        aodall = np.vstack((aodall,np.ma.filled(aod,np.nan)))
-        
-    aodmean = np.nanmean(aodall,0)
-    if len(aodmean)!=len(lonm):
-        print(aodall.shape)
-        error
-        
-        
-    # %% output extacted file
-    outputname = 'AOD_mean_'+campaign+'_'+model+'.nc'
-    print('output to this file: '+E3SM_region_path+outputname)
-    
-    # define filename
-    f = Dataset(E3SM_region_path+outputname, 'w', format='NETCDF4')
-    
-    # define dimensions
-    t = f.createDimension('ncol', None)  # unlimited
-    
-    # create variable list
-    lat_o = f.createVariable("lat","f8",("ncol",))
-    lon_o = f.createVariable("lon","f8",("ncol",))
-    var_o = f.createVariable("AODmean",'f8',("ncol",))
-    
-    # write data
-    lat_o[:] = latm
-    lon_o[:] = lonm
-    var_o[:] = aodmean
-    
-    # attributes
-    lat_o.units = latmunit
-    lon_o.units = lonmunit
-    var_o.units = aodunit
-    var_o.long_name = aodname
-    
-    # global attributes
-    import time as ttt
-    f.description = model+" extact mean AOD during "+campaign
-    f.history = "Created by Shuaiqi at " + ttt.ctime(ttt.time())
-    
-    f.close()
-    
-
-        
-        
-        
diff --git a/python/preprocessing/prep_E3SM_flighttrack_allvars.py b/python/preprocessing/prep_E3SM_flighttrack_allvars.py
deleted file mode 100644
index 2f3ce81..0000000
--- a/python/preprocessing/prep_E3SM_flighttrack_allvars.py
+++ /dev/null
@@ -1,210 +0,0 @@
-"""
-# prepare E3SM aerosol variables for flight tracks
-# input data is IWG measurements from aircraft and E3SM regional output
-# output is aerosol variables for each flight
-"""
-import sys
-sys.path.insert(1, '../subroutines/')
-
-import glob
-import os
-import numpy as np
-from time_format_change import hhmmss2sec, timeunit2cday
-from read_aircraft import read_iwg1, read_RF_NCAR
-from read_netcdf import read_E3SM
-from netCDF4 import Dataset
-
-def find_nearest(xall, yall, x, y):
-    distance = np.square(xall-x) + np.square(yall-y)
-    idx = distance.argmin()
-    return(idx)
-
-
-#%% settings
-
-
-from settings import campaign, E3SM_h3_path, E3SM_h3_filehead, E3SM_aircraft_path, Model_List
-
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP, iwgpath
-elif campaign in ['CSET', 'SOCRATES']:
-    from settings import RFpath
-else:
-    raise ValueError('this aircraft campaign is not recognized: ' + campaign)
-
-if not os.path.exists(E3SM_aircraft_path):
-    os.makedirs(E3SM_aircraft_path)
-
-
-#%% find all flight data
-if campaign == 'HISCALE':
-    lst = glob.glob(iwgpath + '*a2.txt')
-    lst.sort()
-    if IOP == 'IOP1':
-        lst = lst[0:17]
-    elif IOP == 'IOP2':
-        lst = lst[17:]
-    elif IOP[0:4] == '2016':
-        a = lst[0].split('_' + campaign + '_')
-        lst = glob.glob(a[0] + '*' + IOP + '*')
-        lst.sort()
-elif campaign == 'ACEENA':
-    lst = glob.glob(iwgpath + '*a2.txt')
-    lst.sort()
-    if IOP == 'IOP1':
-        lst = lst[0:20]
-    elif IOP == 'IOP2':
-        lst = lst[20:]
-    elif IOP[0:4] == '2017' or IOP[0:4] == '2018':
-        a = lst[0].split('_' + campaign + '_')
-        lst = glob.glob(a[0] + '*' + IOP + '*')
-        lst.sort()
-elif campaign in ['CSET', 'SOCRATES']:
-    lst = glob.glob(RFpath + 'RF*.PNI.nc')
-    lst.sort()
-else:
-    raise ValueError('this aircraft campaign is not recognized: ' + campaign)
-    
-print('total number of files:' + str(len(lst)))
-
-for filename in lst:
-    
-    fname = filename.split('.')
-    #%% read in flight data
-    if campaign in ['HISCALE', 'ACEENA']:
-        date = fname[-3]
-        print('input data for ' + date)
-        year = date[0:4]
-        month = date[4:6]
-        
-        (flight, flightvars) = read_iwg1(filename)
-        timelen = len(flight)
-        # get lat, lon, height, time
-        lon = np.empty(timelen)
-        lat = np.empty(timelen)
-        height = np.empty(timelen)
-        time = np.empty(timelen)
-        if np.logical_and(campaign == 'ACEENA', date == '20180216a'):
-            flight.insert(1403, list(flight[1403]))
-            tstr = flight[1403][1]
-            tstr = tstr[0:-1] + str(int(tstr[-1])-1)
-            flight[1403][1] = tstr
-            del flight[-1]
-        for t in range(timelen):
-            lat[t] = float(flight[t][2])
-            lon[t] = float(flight[t][3]) + 360
-            height[t] = float(flight[t][4])
-            timestr = flight[t][1].split(' ')
-            time[t] = hhmmss2sec(timestr[1])
-    
-    elif campaign in ['CSET', 'SOCRATES']:
-        date = fname[-4]
-        print('input data for ' + date)
-        year = date[0:4]
-        month = date[4:6]
-        (time, height, timeunit, hunit, hlongname, cellsize, cellunit) = read_RF_NCAR(filename, 'ALT')
-        (time, lat, timeunit, latunit, latlongname, cellsize, cellunit) = read_RF_NCAR(filename, 'LAT')
-        (time, lon, timeunit, lonunit, lonlongname, cellsize, cellunit) = read_RF_NCAR(filename, 'LON')
-        lon[lon<0] = lon[lon<0] + 360
-        
-    #%% set variables and model region
-    for mm in range(len(Model_List)):
-        model = Model_List[mm]
-        variable_names = ['T', 'U', 'V', 'Q', 'RELHUM', 'RHW', 'RHI', 'CLOUD', 'CLDLIQ', 'NUMLIQ',
-                          'CLDICE', 'NUMICE', 'RAINQM', 'NUMRAI', 'AWNI', 'AWNC', 'AQRAIN', 'AQSNOW',
-                          'AREI', 'AREL', 'FICE', 'IWC', 'LWC', 'ICLDIWP', 'ICLDTWP', 'ICWNC', 'ICINC',
-                          'WP2_CLUBB', 'CCN1', 'CCN3', 'CCN5', 'bc_a1', 'bc_a3', 'bc_a4', 'dst_a1', 'dst_a3',
-                          'mom_a1', 'mom_a2', 'mom_a3', 'mom_a4', 'ncl_a1', 'ncl_a2', 'ncl_a3',
-                          'pom_a1', 'pom_a3', 'pom_a4', 'so4_a1', 'so4_a2', 'so4_a3',
-                          'soa_a1', 'soa_a2', 'soa_a3', 'num_a1', 'num_a2', 'num_a3', 'num_a4',
-                          'num_c1', 'num_c2', 'num_c3', 'num_c4', "dgnd_a01", "dgnd_a02", "dgnd_a03", "dgnd_a04",
-                          "dgnw_a01", "dgnw_a02", "dgnw_a03", "dgnw_a04", 'EXTINCT', 'ABSORB']
-    
-        if model == 'NucSoaCond': # with so4 and soa in nucleation mode
-            variable_names = variable_names + ['so4_a5', 'soa_a5', 'num_a5', 'num_c5', "dgnd_a05", "dgnw_a05"]
-        elif model == 'Nuc':      # only with so4 in nucleation mode
-            variable_names = variable_names + ['so4_a5', 'num_a5', 'num_c5', "dgnd_a05", "dgnw_a05"]
-        varlen = len(variable_names)
-        
-        if campaign == 'HISCALE':
-            E3SMdomain_range = '260e_to_265e_34n_to_39n'    # domain range in E3SM regional output
-        elif campaign == 'ACEENA':
-            E3SMdomain_range = '330e_to_335e_37n_to_42n'   
-        elif campaign == 'CSET':
-            E3SMdomain_range = '202e_to_240e_19n_to_40n'   
-        elif campaign == 'SOCRATES':
-            E3SMdomain_range = '133e_to_164e_42s_to_63s'   
-        else:
-            raise ValueError('this aircraft campaign is not recognized: ' + campaign)
-        
-        #%% read in E3SM data
-        variables_out = list()
-        pblh_out = list()
-        for varname in variable_names:
-            variables_out.append([])
-        
-        date2 = date[0:4] + '-' + date[4:6] + '-' + date[6:8]
-        filename_input = E3SM_h3_path[mm] + E3SM_h3_filehead[mm] + '.cam.h3.' + date2 + '-00000.nc'
-    
-        (timem, lonm, timeunitm, lonmunit, lonmname) = read_E3SM(filename_input, 'lon_' + E3SMdomain_range)
-        (timem, latm, timeunitm, latmunit, latmname) = read_E3SM(filename_input, 'lat_' + E3SMdomain_range)
-        (timem, z3, timeunitm, zunit, zname) = read_E3SM(filename_input, 'Z3_' + E3SMdomain_range)
-        (timem, pblh, timeunitm, pblhunit, pblhname) = read_E3SM(filename_input, 'PBLH_' + E3SMdomain_range)
-        # read in all variables
-        (timem, variables, timeunitm, var_units, var_longnames) = \
-                     read_E3SM(filename_input, [a + '_' + E3SMdomain_range for a in variable_names])
-                     
-        cdaym = timeunit2cday(timeunitm, 'noleap')
-        yearm = timeunitm.split(' ')[2][0:4]
-        timem = 86400* (timem.data - int(timem[0]))
-        
-        for tt in range(len(time)):
-            t_idx = np.abs(timem-time[tt]).argmin()
-            x_idx = find_nearest(lonm, latm, lon[tt], lat[tt])
-            z_idx = np.abs(z3[t_idx, :, x_idx]-height[tt]).argmin()
-            for vv in range(varlen):
-                variables_out[vv].append(variables[vv][t_idx, z_idx, x_idx])
-            pblh_out.append(pblh[t_idx, x_idx])
-            
-         # %% output extacted file
-        outputname = 'Aircraft_vars_' + campaign + '_' + model + '_' + date + '.nc'
-        print('output to this file: ' + E3SM_aircraft_path + outputname)
-        
-        # define filename
-        f = Dataset(E3SM_aircraft_path + outputname, 'w', format = 'NETCDF4')
-        
-        # define dimensions
-        t = f.createDimension('time', None)  # unlimited
-        
-        # create variable list
-        time_o = f.createVariable("time", "f8", ("time",))
-        height_o = f.createVariable("height", 'f8', ("time",))
-        pblh_o = f.createVariable('PBLH', 'f8', ("time",))
-        var_o = list()
-        for vv in range(varlen):
-            var_o.append (f.createVariable(variable_names[vv], 'f8', ("time", )))
-        
-        # write data
-        time_o[:] = time
-        height_o[:] = height
-        pblh_o[:] = np.array(pblh_out)
-        for vv in range(varlen):
-            var_o[vv][:] = np.array(variables_out[vv])
-        
-        # attributes
-        time_o.units = "Seconds since " + date2 + ' 00:00 UTC'
-        height_o.units = 'm MSL'
-        pblh_o.units = pblhunit
-        pblh_o.long_name = pblhname
-        for vv in range(varlen):
-            var_o[vv].units = var_units[vv]
-            var_o[vv].long_name = var_longnames[vv]
-        
-        # global attributes
-        import time as ttt
-        f.description = model + " extact for aircraft track for " + campaign
-        f.aircraftfile = filename.split('\\')[-1]
-        f.create_time = ttt.ctime(ttt.time())
-        
-        f.close()
-        
diff --git a/python/preprocessing/prep_E3SM_flighttrack_bins.py b/python/preprocessing/prep_E3SM_flighttrack_bins.py
deleted file mode 100644
index 90167f5..0000000
--- a/python/preprocessing/prep_E3SM_flighttrack_bins.py
+++ /dev/null
@@ -1,234 +0,0 @@
-"""
-# prepare E3SM aerosol size distribution for flight tracks
-# input data is IWG measurements from aircraft and E3SM regional output
-# output is  aerosol size distribution for each flight
-"""
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import glob
-import os
-import numpy as np
-from time_format_change import hhmmss2sec, timeunit2cday
-from read_aircraft import read_iwg1, read_RF_NCAR
-from read_netcdf import read_E3SM
-from CN_mode_to_size import calc_CNsize_cutoff_0_3000nm
-from netCDF4 import Dataset
-
-
-def find_nearest(xall,yall,x,y):
-    distance = np.square(xall-x) + np.square(yall-y)
-    idx = distance.argmin()
-    return(idx)
-
-
-#%% settings
-
-from settings import campaign, E3SM_h3_path, E3SM_h3_filehead, E3SM_aircraft_path, Model_List
-
-if campaign in ['HISCALE', 'ACEENA']:
-    from settings import IOP, iwgpath
-elif campaign in ['CSET', 'SOCRATES']:
-    from settings import RFpath
-else:
-    raise ValueError('this aircraft campaign is not recognized: '+campaign)
-    
-if not os.path.exists(E3SM_aircraft_path):
-    os.makedirs(E3SM_aircraft_path)
-
-
-if campaign=='HISCALE':
-    E3SMdomain_range='260e_to_265e_34n_to_39n'    # domain range in E3SM regional output
-elif campaign=='ACEENA':
-    E3SMdomain_range='330e_to_335e_37n_to_42n'   
-elif campaign=='CSET':
-    E3SMdomain_range='202e_to_240e_19n_to_40n'   
-elif campaign=='SOCRATES':
-    E3SMdomain_range='133e_to_164e_42s_to_63s'  
-else:
-    raise ValueError('this aircraft campaign is not recognized: '+campaign)
-
-#%% find all flight data
-if campaign=='HISCALE':
-    lst = glob.glob(iwgpath+'*a2.txt')
-    lst.sort()
-    if IOP=='IOP1':
-        lst=lst[0:17]
-    elif IOP=='IOP2':
-        lst=lst[17:]
-    elif IOP[0:4]=='2016':
-        a=lst[0].split('_'+campaign+'_')
-        lst = glob.glob(a[0]+'*'+IOP+'*')
-        lst.sort()
-elif campaign=='ACEENA':
-    lst = glob.glob(iwgpath+'*a2.txt')
-    lst.sort()
-    if IOP=='IOP1':
-        lst=lst[0:20]
-    elif IOP=='IOP2':
-        lst=lst[20:]
-    elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
-        a=lst[0].split('_'+campaign+'_')
-        lst = glob.glob(a[0]+'*'+IOP+'*')
-        lst.sort()
-elif campaign in ['CSET', 'SOCRATES']:
-    lst = glob.glob(RFpath+'RF*.PNI.nc')
-    lst.sort()
-else:
-    raise ValueError('this aircraft campaign is not recognized: '+campaign)
-    
-print('total number of files:'+str(len(lst)))
-
-for filename in lst:
-    
-    fname=filename.split('.')
-    #%% read in flight data
-    if campaign in ['HISCALE', 'ACEENA']:
-        date=fname[-3]
-        print('input data for '+date)
-        year=date[0:4]
-        month=date[4:6]
-        
-        (flight,flightvars)=read_iwg1(filename)
-        timelen = len(flight)
-        # get lat, lon, height, time
-        lon=np.empty(timelen)
-        lat=np.empty(timelen)
-        height=np.empty(timelen)
-        time=np.empty(timelen)
-        if np.logical_and(campaign=='ACEENA', date=='20180216a'):
-            flight.insert(1403,list(flight[1403]))
-            tstr=flight[1403][1]
-            tstr=tstr[0:-1]+str(int(tstr[-1])-1)
-            flight[1403][1]=tstr
-            del flight[-1]
-        for t in range(timelen):
-            lat[t]=float(flight[t][2])
-            lon[t]=float(flight[t][3])+360
-            height[t]=float(flight[t][4])
-            timestr=flight[t][1].split(' ')
-            time[t]=hhmmss2sec(timestr[1])
-    
-    elif campaign in ['CSET', 'SOCRATES']:
-        date=fname[-4]
-        print('input data for '+date)
-        year=date[0:4]
-        month=date[4:6]
-        (time,height,timeunit,hunit,hlongname,cellsize,cellunit)=read_RF_NCAR(filename,'ALT')
-        (time,lat,timeunit,latunit,latlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LAT')
-        (time,lon,timeunit,lonunit,lonlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LON')
-        lon[lon<0]=lon[lon<0]+360
-        timelen = len(time)
-    
-        #%% read in E3SM data
-    for mm in range(len(Model_List)):
-        model=Model_List[mm]  
-        date2 = date[0:4]+'-'+date[4:6]+'-'+date[6:8]
-        filename_input = E3SM_h3_path[mm]+E3SM_h3_filehead[mm]+'.cam.h3.'+date2+'-00000.nc'
-    
-        (timem,lonm,timeunitm,lonmunit,lonmname)=read_E3SM(filename_input,'lon_'+E3SMdomain_range)
-        (timem,latm,timeunitm,latmunit,latmname)=read_E3SM(filename_input,'lat_'+E3SMdomain_range)
-        (timem,z3,timeunitm,latmunit,latmname)=read_E3SM(filename_input,'Z3_'+E3SMdomain_range)
-        # do not use read_E3SM because hyam and hybm don't have units
-        f = Dataset(filename_input,'r')
-        P0 = f.variables['P0'][:]
-        hyam = f.variables['hyam'][:]
-        hybm = f.variables['hybm'][:]
-        T = f.variables['T_'+E3SMdomain_range][:]
-        PS = f.variables['PS_'+E3SMdomain_range][:]
-        num_a1 = f.variables['num_a1_'+E3SMdomain_range][:]
-        num_a2 = f.variables['num_a2_'+E3SMdomain_range][:]
-        num_a3 = f.variables['num_a3_'+E3SMdomain_range][:]
-        num_a4 = f.variables['num_a4_'+E3SMdomain_range][:]
-        dn1 = f.variables['dgnd_a01_'+E3SMdomain_range][:]
-        dn2 = f.variables['dgnd_a02_'+E3SMdomain_range][:]
-        dn3 = f.variables['dgnd_a03_'+E3SMdomain_range][:]
-        dn4 = f.variables['dgnd_a04_'+E3SMdomain_range][:]
-        if model[0:3]=='Nuc':  # with nucleation mode
-            num_a5 = f.variables['num_a5_'+E3SMdomain_range][:]
-            dn5 = f.variables['dgnd_a05_'+E3SMdomain_range][:]
-        f.close()
-        
-        Pres = np.nan*T
-        zlen=T.shape[1]
-        for kk in range(zlen):
-            Pres[:,kk,:] = hyam[kk]*P0 + hybm[kk]*PS
-    
-        #% find the nearest time and height of the aircraft measurements
-        cdaym = timeunit2cday(timeunitm,'noleap')
-        timem = 86400* (timem.data - int(timem[0]))
-        NCNall=np.full((3000,timelen),np.nan)
-        tzx0 = [0,0,0]
-        t0 = 0
-        for tt in range(timelen):
-            t_idx = np.abs(timem-time[tt]).argmin()
-            x_idx = find_nearest(lonm,latm,lon[tt],lat[tt])
-            z_idx = np.abs(z3[t_idx,:,x_idx]-height[tt]).argmin()
-            
-            # copy the same grid to save time
-            if [t_idx,x_idx,z_idx]==tzx0:
-                NCNall[:,tt] = NCNall[:,t0]
-            else:
-                numall = [num_a1[t_idx,z_idx,x_idx],num_a2[t_idx,z_idx,x_idx],num_a3[t_idx,z_idx,x_idx],num_a4[t_idx,z_idx,x_idx]]
-                dnall = [dn1[t_idx,z_idx,x_idx],dn2[t_idx,z_idx,x_idx],dn3[t_idx,z_idx,x_idx],dn4[t_idx,z_idx,x_idx]]
-                if model[0:3]=='Nuc':  # with nucleation mode
-                    numall.append(num_a5[t_idx,z_idx,x_idx])
-                    dnall.append(dn5[t_idx,z_idx,x_idx])
-                NCNall[:,tt] = calc_CNsize_cutoff_0_3000nm(dnall,numall,T[t_idx,z_idx,x_idx],Pres[t_idx,z_idx,x_idx])
-                # update the time of this unique grid
-                tzx0=[t_idx,x_idx,z_idx]
-                t0=tt
-
-        # calculate total CN concentration for CPC (>10nm) and CPCU (>3nm)
-        NUCN = np.nansum(NCNall[3:,:],0)    # >3nm
-        NCN = np.nansum(NCNall[10:,:],0)    # >10nm
-        
-            
-        #%% output extacted file
-        outputname = 'Aircraft_CNsize_'+campaign+'_'+model+'_'+date+'.nc'
-        print('output to this file: '+E3SM_aircraft_path+outputname)
-        
-        # define filename
-        f = Dataset(E3SM_aircraft_path+outputname, 'w', format='NETCDF4')
-        
-        # define dimensions
-        t = f.createDimension('time', None)  # unlimited
-        size=f.createDimension('size',3000)
-        
-        # create variable list
-        time_o = f.createVariable("time","f8",("time",))
-        height_o = f.createVariable("height",'f8',("time",))
-        size_o = f.createVariable("size",'i8',("size"))
-        
-        data_o = f.createVariable('NCNall','f8',("size","time"))
-        ncn_o = f.createVariable("NCN","f8",("time",))
-        nucn_o = f.createVariable("NUCN","f8",("time",))
-        
-        # write data
-        time_o[:] = time
-        height_o[:] = height
-        size_o[:] = np.arange(1,3001)
-        data_o[:,:]=NCNall
-        ncn_o[:]=NCN
-        nucn_o[:]=NUCN
-        
-        # attributes
-        time_o.units = "Seconds since "+date2+' 00:00 UTC'
-        height_o.units = 'm MSL'
-        size_o.units = 'nm'
-        size_o.long_name="0 to 3000nm with 1nm increment"
-        data_o.units = '#/m3'
-        data_o.long_name = 'aerosol size distribution'
-        ncn_o.units = '#/m3'
-        ncn_o.long_name = 'aerosol number concentration for size >10nm'
-        nucn_o.units = '#/m3'
-        nucn_o.long_name = 'aerosol number concentration for size >3nm'
-        
-        # global attributes
-        import time as ttt
-        f.description = model+" extact for aircraft track for "+campaign
-        f.aircraftfile = filename.split('\\')[-1]
-        f.create_time = ttt.ctime(ttt.time())
-        
-        f.close()
-
diff --git a/python/preprocessing/prep_E3SM_profile_allvars.py b/python/preprocessing/prep_E3SM_profile_allvars.py
deleted file mode 100644
index 989d835..0000000
--- a/python/preprocessing/prep_E3SM_profile_allvars.py
+++ /dev/null
@@ -1,138 +0,0 @@
-"""
-# prepare E3SM vertical profiles at ARM sites
-# input data is E3SM regional output
-# output is variables at the nearest column
-"""
-import sys
-sys.path.insert(1, '../subroutines/')
-
-import os
-import numpy as np
-from time_format_change import timeunit2cday, yyyymmdd2cday, cday2mmdd
-from read_netcdf import read_E3SM
-from netCDF4 import Dataset
-
-def find_nearest(xall, yall, x, y):
-    distance = np.square(xall-x) + np.square(yall-y)
-    idx = distance.argmin()
-    return(idx)
-
-#%% settings
-
-from settings import campaign, lat0, lon0,site, start_date, end_date, \
-        E3SM_h3_path, E3SM_h3_filehead, E3SM_profile_path, Model_List
-
-# output height above ground. data will be interpolated into z_f
-z_f = np.hstack((np.arange(0, 500, 50), np.arange(500, 2000, 100), np.arange(2000, 5000, 300),
-                 np.arange(5000, 10000, 500), np.arange(10000, 20001, 1000)))
-zlen = len(z_f)
-
-if not os.path.exists(E3SM_profile_path):
-    os.makedirs(E3SM_profile_path)
-
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date, 'noleap')
-cday2 = yyyymmdd2cday(end_date, 'noleap')
-if start_date[0:4]!=end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-
-#%% set variables for profiles
-variable_names = ['T', 'U', 'V', 'Q', 'RELHUM', 'RHW', 'RHI', 'CLOUD', 'LWC', 'IWC',
-            'CLDLIQ', 'CLDICE', 'NUMLIQ', "AREI",  "AREL",  "ICLDIWP",  "ICLDTWP"]
-varlen = len(variable_names)
-    
-if site=='SGP':
-    E3SMdomain_range = '260e_to_265e_34n_to_39n'    # domain range in E3SM regional output
-elif site=='ENA':
-    E3SMdomain_range = '330e_to_335e_37n_to_42n'   
-else:
-    raise ValueError('data for this site is not specified: ' + site)
-
-for mm in range(len(Model_List)):
-    model = Model_List[mm]
-    
-    #%%  process data
-    for cday in range(cday1, cday2 + 1):
-        mmdd = cday2mmdd(cday)
-        date = year0 + '-' + mmdd[0:2] + '-' + mmdd[2:4]
-        
-        # read in E3SM data
-        variables = list()
-        var_units = list()
-        var_longnames = list()
-         
-        filename_input = E3SM_h3_path[mm] + E3SM_h3_filehead[mm] + '.cam.h3.' + date + '-00000.nc'
-    
-        (timem, lonm, timeunitm, lonmunit, lonmname) = read_E3SM(filename_input, 'lon_' + E3SMdomain_range)
-        (timem, latm, timeunitm, latmunit, latmname) = read_E3SM(filename_input, 'lat_' + E3SMdomain_range)
-        (timem, z3, timeunitm, zunit, zname) = read_E3SM(filename_input, 'Z3_' + E3SMdomain_range)
-        
-        x_idx = find_nearest(lonm, latm, lon0, lat0)
-        zm = z3[:, :, x_idx]
-        
-        # read in all variables
-        (timem, var2d, timeunitm, var2dunit, var2dlongname) = \
-                     read_E3SM(filename_input, [a + '_' + E3SMdomain_range for a in variable_names])
-          
-        tlen = len(timem)
-        for vv in range(varlen):
-            var = var2d[vv][:, :, x_idx]
-            var2 = np.full((tlen, zlen), np.nan)
-            for tt in range(tlen):
-                # interpolate height above sea level to height above ground
-                var2[tt, :] = np.interp(z_f, np.flip(zm[tt, :]-zm[tt, -1]), np.flip(var[tt, :]))
-            variables.append(var2) 
-            var_units.append(var2dunit[vv])
-            var_longnames.append(var2dlongname[vv])      
-    
-        cdaym = timeunit2cday(timeunitm, 'noleap')
-        yearm = timeunitm.split(' ')[2][0:4]
-        time = timem.data - 365*(int(year0)-int(yearm)) + cdaym
-        
-    
-        # %% output extacted file
-        outputname = 'Profile_vars_' + campaign + '_' + model + '.' + date + '.nc'
-        print('output to this file: ' + E3SM_profile_path + outputname)
-        
-        # define filename
-        f = Dataset(E3SM_profile_path + outputname, 'w', format = 'NETCDF4')
-        
-        # define dimensions
-        t = f.createDimension('time', None)  # unlimited
-        z = f.createDimension('height', zlen)
-        
-        # create variable list
-        time_o = f.createVariable("time", "f8", ("time", ))
-        height_o = f.createVariable("height", "f8", ("height", ))
-        lat_o = f.createVariable("lat", "f8", ())
-        lon_o = f.createVariable("lon", "f8", ())
-        var_o = list()
-        for vv in range(varlen):
-            var_o.append (f.createVariable(variable_names[vv], 'f8', ("time", "height")))
-        
-        # write data
-        time_o[:] = time
-        height_o[:] = z_f
-        lat_o[:] = latm[x_idx]
-        lon_o[:] = lonm[x_idx]
-        for vv in range(varlen):
-            var_o[vv][:] = np.array(variables[vv])
-        
-        # attributes
-        time_o.units = "days since " + str(int(year0)-1) + "-12-31 00:00:00 UTC"
-        lat_o.units = "latitude"
-        lon_o.units = "longitude"
-        height_o.units = "gpm above ground"
-        for vv in range(varlen):
-            var_o[vv].units = var_units[vv]
-            var_o[vv].long_name = var_longnames[vv]
-        
-        # global attributes
-        import time as ttt
-        f.description = model + " extact vertical variables for " + campaign
-        f.modeldata = filename_input
-        f.create_time = ttt.ctime(ttt.time())
-        
-        f.close()
-        
diff --git a/python/preprocessing/prep_E3SM_sfc_allvars.py b/python/preprocessing/prep_E3SM_sfc_allvars.py
deleted file mode 100644
index 5501169..0000000
--- a/python/preprocessing/prep_E3SM_sfc_allvars.py
+++ /dev/null
@@ -1,138 +0,0 @@
-"""
-# prepare E3SM surface aerosol properties at ARM sites
-# input data is E3SM regional output
-# output is surface variables at the nearest grid
-"""
-import sys
-sys.path.insert(1, '../subroutines/')
-
-import os
-import numpy as np
-from time_format_change import timeunit2cday, yyyymmdd2cday, cday2mmdd
-from read_netcdf import read_E3SM
-from netCDF4 import Dataset
-
-def find_nearest(xall, yall, x, y):
-    distance = np.square(xall - x) + np.square(yall - y)
-    idx = distance.argmin()
-    return(idx)
-
-#%% settings
-
-from settings import campaign, lat0, lon0,site, start_date, end_date, \
-    E3SM_h3_path, E3SM_h3_filehead, E3SM_sfc_path, Model_List
-
-if not os.path.exists(E3SM_sfc_path):
-    os.makedirs(E3SM_sfc_path)
-
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date, 'noleap')
-cday2 = yyyymmdd2cday(end_date, 'noleap')
-if start_date[0:4] != end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-year0 = start_date[0:4]
-    
-if site == 'SGP':
-    E3SMdomain_range = '260e_to_265e_34n_to_39n'    # domain range in E3SM regional output
-elif site == 'ENA':
-    E3SMdomain_range = '330e_to_335e_37n_to_42n'   
-else:
-    raise ValueError('data for this site is not specified: ' + site)
-    
-    
-#%% set variables
-for mm in range(len(Model_List)):
-    model = Model_List[mm]
-    variable1d_names = ['PS', 'PBLH', 'FLNT', 'FSNT', 'FLNS', 'FSNS', "LHFLX", "SHFLX",
-                        'TREFHT','PRECT','PRECL', "TGCLDLWP", "TGCLDIWP"]
-    variable2d_names = ['T', 'U', 'V', 'Q', 'RELHUM', 'RHW', 'RHI', 'CLOUD',
-                        'CLDLIQ', 'CLDICE', 'NUMLIQ', 'NUMICE', 'NUMRAI', 'NUMSNO', 'RAINQM', 'SNOWQM',
-                        'CCN1', 'CCN3', 'CCN5', "AREI", "AREL", "ICLDIWP", "ICLDTWP",
-                        'bc_a1', 'bc_a3', 'bc_a4', 'dst_a1', 'dst_a3', 'mom_a1', 'mom_a2', 'mom_a3', 'mom_a4',
-                        'ncl_a1', 'ncl_a2', 'ncl_a3', 'pom_a1', 'pom_a3', 'pom_a4', 'so4_a1', 'so4_a2', 'so4_a3',
-                        'soa_a1', 'soa_a2', 'soa_a3', 'num_a1', 'num_a2', 'num_a3', 'num_a4',
-                        'num_c1', 'num_c2', 'num_c3', 'num_c4', "dgnd_a01", "dgnd_a02", "dgnd_a03", "dgnd_a04",
-                        "dgnw_a01", "dgnw_a02", "dgnw_a03", "dgnw_a04", 'EXTINCT', 'ABSORB']
-    if model == 'NucSoaCond': # with so4 and soa in nucleation mode
-        variable2d_names = variable2d_names + ['so4_a5', 'soa_a5', 'num_a5', 'num_c5', "dgnd_a05", "dgnw_a05"]
-    elif model == 'Nuc':      # only with so4 in nucleation mode
-        variable2d_names = variable2d_names + ['so4_a5','num_a5','num_c5', "dgnd_a05", "dgnw_a05"]
-    var1dlen = len(variable1d_names)
-    var2dlen = len(variable2d_names)
-    varlen = var1dlen + var2dlen
-    variable_names = variable1d_names + variable2d_names
-    
-    #%%  process data for each day
-    for cday in range(cday1, cday2 + 1):
-        mmdd = cday2mmdd(cday)
-        date = year0 + '-' + mmdd[0:2] + '-' + mmdd[2:4]
-        
-        filename_input = E3SM_h3_path[mm] + E3SM_h3_filehead[mm] + '.cam.h3.' + date + '-00000.nc'
-        
-        # read in E3SM data
-        variables = list()
-        var_units = list()
-        var_longnames = list()
-        
-        (timem, lonm, timeunitm, lonmunit, lonmname) = read_E3SM(filename_input, 'lon_' + E3SMdomain_range)
-        (timem, latm, timeunitm, latmunit, latmname) = read_E3SM(filename_input, 'lat_' + E3SMdomain_range)
-        x_idx = find_nearest(lonm, latm, lon0, lat0)
-        
-        
-        (timem, var1d, timeunitm, var1dunit, var1dlongname) = \
-                     read_E3SM(filename_input, [a + '_' + E3SMdomain_range for a in variable1d_names])
-        (timem, var2d, timeunitm, var2dunit, var2dlongname) = \
-                     read_E3SM(filename_input, [a + '_' + E3SMdomain_range for a in variable2d_names])
-        for vv in range(var1dlen):
-            variables.append(var1d[vv][:, x_idx])
-        for vv in range(var2dlen):
-            variables.append(var2d[vv][:, -1, x_idx])   # choose the lowest level
-        var_units = var1dunit + var2dunit
-        var_longnames = var1dlongname + var2dlongname
-    
-        cdaym = timeunit2cday(timeunitm, 'noleap')
-        yearm = timeunitm.split(' ')[2][0:4]
-        time = timem.data - 365*(int(year0) - int(yearm))  +  cdaym
-        
-        
-        # %% output extacted file
-        outputname = 'SFC_vars_' + campaign + '_' + model + '_' + date + '.nc'
-        print('output to this file: ' + E3SM_sfc_path + outputname)
-        
-        # define filename
-        f = Dataset(E3SM_sfc_path + outputname, 'w', format = 'NETCDF4')
-        
-        # define dimensions
-        t = f.createDimension('time', None)  # unlimited
-        
-        # create variable list
-        time_o = f.createVariable("time", "f8", ("time", ))
-        lat_o = f.createVariable("lat", "f8", ())
-        lon_o = f.createVariable("lon", "f8", ())
-        var_o = list()
-        for vv in range(varlen):
-            var_o.append(f.createVariable(variable_names[vv], 'f8', ("time", )))
-        
-        # write data
-        time_o[:] = time
-        lat_o[:] = latm[x_idx]
-        lon_o[:] = lonm[x_idx]
-        for vv in range(varlen):
-            var_o[vv][:] = np.array(variables[vv])
-        
-        # attributes
-        time_o.units = "days since " + str(int(year0) - 1) + "-12-31 00:00:00 UTC"
-        lat_o.units = "latitude"
-        lon_o.units = "longitude"
-        for vv in range(varlen):
-            var_o[vv].units = var_units[vv]
-            var_o[vv].long_name = var_longnames[vv]
-        
-        # global attributes
-        import time as ttt
-        f.description = model + " extact surface variables for " + campaign
-        f.modeldata = filename_input
-        f.create_time = ttt.ctime(ttt.time())
-        
-        f.close()
-        
diff --git a/python/preprocessing/prep_E3SM_sfc_bins.py b/python/preprocessing/prep_E3SM_sfc_bins.py
deleted file mode 100644
index 2d5fc30..0000000
--- a/python/preprocessing/prep_E3SM_sfc_bins.py
+++ /dev/null
@@ -1,151 +0,0 @@
-"""
-# prepare E3SM surface aerosol size distribution at ARM sites
-# input data is E3SM regional output
-# output is surface aerosol distribution at the nearest grid
-"""
-import sys
-sys.path.insert(1, '../subroutines/')
-
-import os
-import numpy as np
-from time_format_change import timeunit2cday, yyyymmdd2cday, cday2mmdd
-from read_netcdf import read_E3SM
-from CN_mode_to_size import calc_CNsize_cutoff_0_3000nm
-from netCDF4 import Dataset
-
-def find_nearest(xall, yall, x, y):
-    distance = np.square(xall-x) + np.square(yall-y)
-    idx = distance.argmin()
-    return(idx)
-
-#%% settings
-
-from settings import campaign, lat0, lon0,site, start_date, end_date, \
-    E3SM_h3_path, E3SM_h3_filehead, E3SM_sfc_path, Model_List
-
-if not os.path.exists(E3SM_sfc_path):
-    os.makedirs(E3SM_sfc_path)
-
-# change start date into calendar day
-cday1 = yyyymmdd2cday(start_date, 'noleap')
-cday2 = yyyymmdd2cday(end_date, 'noleap')
-if start_date[0:4] != end_date[0:4]:
-    raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
-
-year0 = start_date[0:4]
-    
-if site == 'SGP':
-    E3SMdomain_range = '260e_to_265e_34n_to_39n'    # domain range in E3SM regional output
-elif site == 'ENA':
-    E3SMdomain_range = '330e_to_335e_37n_to_42n'   
-else:
-    raise ValueError('data for this site is not specified: ' + site)
-    
-#%% process data for each day
-for mm in range(len(Model_List)):
-    model = Model_List[mm]
-    
-    for cday in range(cday1, cday2 + 1):
-        mmdd = cday2mmdd(cday)
-        date = year0 + '-' + mmdd[0:2] + '-' + mmdd[2:4]
-        
-        filename_input = E3SM_h3_path[mm] + E3SM_h3_filehead[mm] + '.cam.h3.' + date + '-00000.nc'
-        
-        (timem, lonm, timeunitm, lonmunit, lonmname) = read_E3SM(filename_input, 'lon_' + E3SMdomain_range)
-        (timem, latm, timeunitm, latmunit, latmname) = read_E3SM(filename_input, 'lat_' + E3SMdomain_range)
-        x_idx = find_nearest(lonm, latm, lon0, lat0)
-        
-        cdaym = timeunit2cday(timeunitm, 'noleap')
-        yearm = timeunitm.split(' ')[2][0:4]
-        time = timem.data - 365*(int(year0)-int(yearm)) + cdaym
-            
-        # do not use read_E3SM because hyam and hybm don't have units
-        f = Dataset(filename_input, 'r')
-        P0 = f.variables['P0'][:]
-        hyam = f.variables['hyam'][:]
-        hybm = f.variables['hybm'][:]
-        T = f.variables['T_' + E3SMdomain_range][:]
-        PS = f.variables['PS_' + E3SMdomain_range][:]
-        num_a1 = f.variables['num_a1_' + E3SMdomain_range][:]
-        num_a2 = f.variables['num_a2_' + E3SMdomain_range][:]
-        num_a3 = f.variables['num_a3_' + E3SMdomain_range][:]
-        num_a4 = f.variables['num_a4_' + E3SMdomain_range][:]
-        dn1 = f.variables['dgnd_a01_' + E3SMdomain_range][:]
-        dn2 = f.variables['dgnd_a02_' + E3SMdomain_range][:]
-        dn3 = f.variables['dgnd_a03_' + E3SMdomain_range][:]
-        dn4 = f.variables['dgnd_a04_' + E3SMdomain_range][:]
-        if model[0:3] == 'Nuc':  # with nucleation mode
-            num_a5 = f.variables['num_a5_' + E3SMdomain_range][:]
-            dn5 = f.variables['dgnd_a05_' + E3SMdomain_range][:]
-        f.close()
-    
-        Pres = np.nan*T
-        zlen = T.shape[1]
-        for kk in range(zlen):
-            Pres[:, kk, :] = hyam[kk]*P0  +  hybm[kk]*PS
-    
-        numall = [num_a1[:, -1, x_idx], num_a2[:, -1, x_idx], num_a3[:, -1, x_idx], num_a4[:, -1, x_idx]]
-        dnall = [dn1[:, -1, x_idx], dn2[:, -1, x_idx], dn3[:, -1, x_idx], dn4[:, -1, x_idx]]
-        if model[0:3] == 'Nuc':  # with nucleation mode
-            numall.append(num_a5[:, -1, x_idx])
-            dnall.append(dn5[:, -1, x_idx])
-           
-        
-        NCNall = calc_CNsize_cutoff_0_3000nm(dnall, numall, T[:, -1, x_idx], Pres[:, -1, x_idx])
-
-        # calculate total CN concentration for CPC (>10nm) and CPCU (>3nm)
-        NUCN = np.nansum(NCNall[3:, :], 0)    # >3nm
-        NCN = np.nansum(NCNall[10:, :], 0)    # >10nm
-        
-        
-    
-        #%% output extacted file
-        outputname = 'SFC_CNsize_' + campaign + '_' + model + '_' + date + '.nc'
-        print('output to this file: ' + E3SM_sfc_path + outputname)
-        
-        # define filename
-        f = Dataset(E3SM_sfc_path + outputname, 'w', format='NETCDF4')
-        
-        # define dimensions
-        t = f.createDimension('time', None)  # unlimited
-        s = f.createDimension('size', 3000)  # unlimited
-        
-        # create variable list
-        time_o = f.createVariable("time", "f8", ("time", ))
-        size_o = f.createVariable("size", "f8", ("size", ))
-        lat_o = f.createVariable("lat", "f8", ())
-        lon_o = f.createVariable("lon", "f8", ())
-        
-        data_o = f.createVariable('NCNall', 'f8', ("size", "time"))
-        ncn_o = f.createVariable("NCN", "f8", ("time", ))
-        nucn_o = f.createVariable("NUCN", "f8", ("time", ))
-        
-        # write data
-        time_o[:] = time
-        lat_o[:] = latm[x_idx]
-        lon_o[:] = lonm[x_idx]
-        size_o[:] = np.arange(1, 3001)
-        data_o[:, :] = NCNall
-        ncn_o[:] = NCN
-        nucn_o[:] = NUCN
-        
-        # attributes
-        time_o.units = "days since " + str(int(year0)-1) + "-12-31 00:00:00 UTC"
-        lat_o.units = "latitude"
-        lon_o.units = "longitude"
-        size_o.units = 'nm'
-        size_o.long_name = "0 to 3000nm with 1nm increment"
-        data_o.units = '#/m3'
-        data_o.long_name = 'aerosol size distribution'
-        ncn_o.units = '#/m3'
-        ncn_o.long_name = 'aerosol number concentration for size >10nm'
-        nucn_o.units = '#/m3'
-        nucn_o.long_name = 'aerosol number concentration for size >3nm'
-        
-        # global attributes
-        import time as ttt
-        f.description = model + " extact surface aerosol size distribution for " + campaign
-        f.modeldata = filename_input
-        f.create_time = ttt.ctime(ttt.time())
-        
-        f.close()
diff --git a/python/preprocessing/prep_E3SM_shiptrack_allvars.py b/python/preprocessing/prep_E3SM_shiptrack_allvars.py
deleted file mode 100644
index 6a4a3cf..0000000
--- a/python/preprocessing/prep_E3SM_shiptrack_allvars.py
+++ /dev/null
@@ -1,237 +0,0 @@
-"""
-# prepare E3SM surface variables at ARM ship-based field campaigns
-# input data is E3SM regional output
-# output is surface variables at the nearest grid of the ship track
-"""
-import sys
-sys.path.insert(1, '../subroutines/')
-
-import os
-import glob
-import numpy as np
-from time_format_change import timeunit2cday, yyyymmdd2cday, cday2mmdd
-from read_netcdf import read_E3SM
-from read_ship import read_marmet
-from read_ARMdata import read_met
-from netCDF4 import Dataset
-
-def find_nearest(xall, yall, x, y):
-    distance = np.square(xall-x) + np.square(yall-y)
-    idx = distance.argmin()
-    return(idx)
-
-#%% settings
-
-from settings import campaign, shipmetpath, E3SM_h3_path, E3SM_h3_filehead, E3SM_ship_path, Model_List
-
-if not os.path.exists(E3SM_ship_path):
-    os.makedirs(E3SM_ship_path)
-
-#%% get all ship data
-if campaign=='MAGIC':
-    lst = glob.glob(shipmetpath+'marmet*.txt')
-    E3SMdomain_range='202e_to_243e_20n_to_35n'    # domain range in E3SM regional output
-elif campaign=='MARCUS':
-    lst = [1, 2, 3, 4]     # there are 4 ship trips (legs) for MARCUS
-    E3SMdomain_range='60e_to_160e_42s_to_70s'   
-else:
-    raise ValueError('data for this field campaign is not specified: ' + campaign)
-
-lst.sort()
-print('total number of ship leg files:'+str(len(lst)))
-
-
-for filename in lst:
-
-    #%% read in ship data
-    
-    if campaign=='MAGIC':
-        # for each ship leg
-        legnum=filename[-6:-4]
-        
-        (shipdata, shipvarlist) = read_marmet(filename)
-        year=[a[1] for a in shipdata]
-        month=[a[2] for a in shipdata]
-        day=[a[3] for a in shipdata]
-        hh=[int(a[4]) for a in shipdata]
-        mm=[int(a[5]) for a in shipdata]
-        ss=[int(a[6]) for a in shipdata]
-        lat=np.array([float(a[7]) for a in shipdata])
-        lon=np.array([float(a[8]) for a in shipdata])
-        
-        # ymd = [year[i]+'-'+month[i]+'-'+day[i] for i in range(len(year))]   # yyyy-mm-dd
-        yyyymmdd = [year[i]+month[i]+day[i] for i in range(len(year))]   # yyyymmdd
-        ymd=list(set(yyyymmdd))  # unique date
-        ymd.sort()
-        
-        
-        time = np.array(hh)/24. + np.array(mm)/1440. + np.array(ss)/86400. 
-        for i in range(len(time)):
-            cday0 = yyyymmdd2cday(yyyymmdd[i], 'noleap') 
-            if year[i]==year[0]:
-                time[i]=time[i]+cday0
-            else:
-                time[i]=time[i]+cday0+365  # next year
-        
-    elif campaign=='MARCUS':
-        legnum=str(filename)
-        if legnum=='1':
-            startdate='2017-10-30'
-            enddate='2017-12-02'
-        elif legnum=='2':
-            startdate='2017-12-13'
-            enddate='2018-01-11'
-        elif legnum=='3':
-            startdate='2018-01-16'
-            enddate='2018-03-04'
-        elif legnum=='4':
-            startdate='2018-03-09'
-            enddate='2018-03-22'
-            
-        cday1=yyyymmdd2cday(startdate, 'noleap')
-        cday2=yyyymmdd2cday(enddate, 'noleap')
-        if startdate[0:4]!=enddate[0:4]:
-            cday2=cday2+365  # cover two years
-
-        time=np.empty(0)
-        lon=np.empty(0)
-        lat=np.empty(0)
-        ymd=[]
-        for cc in range(cday1, cday2+1):
-            if cc<=365:
-                yyyymmdd=startdate[0:4]+cday2mmdd(cc)
-            else:
-                yyyymmdd=enddate[0:4]+cday2mmdd(cc-365)
-                
-            lst0 = glob.glob(shipmetpath+'maraadmetX1.b1.'+yyyymmdd+'*')
-            (time0, lon0, timeunit, lonunit, lon_long_name)=read_met(lst0[0], 'lon')
-            (time0, lat0, timeunit, lonunit, lon_long_name)=read_met(lst0[0], 'lat')
-            ymd0 = timeunit.split()[2]
-            ymd.append(ymd0[0:4]+ymd0[5:7]+ymd0[8:10])
-            
-            time = np.hstack((time, time0/86400. + cc))
-            lat = np.hstack((lat, lat0))
-            lon = np.hstack((lon, lon0))
-    
-    print('date for shipleg '+legnum+': '+ymd[0]+'-'+ymd[-1])
-    
-    #%% set variables to be read
-    for mm in range(len(Model_List)):
-        model=Model_List[mm]
-        variable1d_names = ['PS', 'PBLH', 'FLNT', 'FSNT', 'FLNS', 'FSNS', "LHFLX", "SHFLX",
-                             'TREFHT', 'PRECT', 'PRECL', "TGCLDLWP", "TGCLDIWP"]
-        variable2d_names = ['T', 'U', 'V', 'Q', 'RELHUM', 'RHW', 'RHI', 'CLOUD',
-                        'CLDLIQ', 'CLDICE', 'NUMLIQ', 'NUMICE', 'NUMRAI', 'NUMSNO', 'RAINQM', 'SNOWQM', 
-                         'CCN1', 'CCN3', 'CCN5', "AREI", "AREL", "ICLDIWP", "ICLDTWP",
-                         'bc_a1', 'bc_a3', 'bc_a4', 'dst_a1', 'dst_a3', 'mom_a1', 'mom_a2', 'mom_a3', 'mom_a4',
-                         'ncl_a1', 'ncl_a2', 'ncl_a3', 'pom_a1', 'pom_a3', 'pom_a4', 'so4_a1', 'so4_a2', 'so4_a3',
-                         'soa_a1', 'soa_a2', 'soa_a3', 'num_a1', 'num_a2', 'num_a3', 'num_a4',
-                         'num_c1', 'num_c2', 'num_c3', 'num_c4', "dgnd_a01", "dgnd_a02", "dgnd_a03", "dgnd_a04", 
-                         "dgnw_a01", "dgnw_a02", "dgnw_a03", "dgnw_a04", 'EXTINCT', 'ABSORB']
-        if model=='NucSoaCond': # with so4 and soa in nucleation mode
-            variable2d_names=variable2d_names+['so4_a5', 'soa_a5', 'num_a5', 'num_c5', "dgnd_a05", "dgnw_a05"]
-        elif model=='Nuc':      # only with so4 in nucleation mode
-            variable2d_names=variable2d_names+['so4_a5', 'num_a5', 'num_c5', "dgnd_a05", "dgnw_a05"]
-        var1dlen = len(variable1d_names)
-        var2dlen = len(variable2d_names)
-        variable_names = variable1d_names+variable2d_names
-        varlen = var1dlen+var2dlen     
-    
-        #%% read in E3SM data
-        variables = list()
-        var_units = list()
-        var_longnames = list()
-        
-        # read all days in the ship leg
-        for dd in range(len(ymd)):
-            ymd2 = ymd[dd][0:4]+'-'+ymd[dd][4:6]+'-'+ymd[dd][6:8]
-            print('read this date: '+ymd2)
-            filename_input = E3SM_h3_path[mm]+E3SM_h3_filehead[mm]+'.cam.h3.'+ymd2+'-00000.nc'
-            
-            (timem, lonm, timeunitm, lonmunit, lonmname)=read_E3SM(filename_input, 'lon_'+E3SMdomain_range)
-            (timem, latm, timeunitm, latmunit, latmname)=read_E3SM(filename_input, 'lat_'+E3SMdomain_range)
-            # (timem, psm, timeunitm, psmunit, psmname)=read_E3SM(filename_input, 'PS_'+E3SMdomain_range)
-            
-            cdaym = timeunit2cday(timeunitm, 'noleap')
-            yearm = timeunitm.split(' ')[2][0:4]
-            timem2 = timem.data-365*(int(ymd[0][0:4])-int(yearm)) + cdaym
-            
-            # ship measurement times during the model day
-            timeo = time[np.logical_and(time>=timem2[0], time<timem2[0]+1)]
-            lono = lon[np.logical_and(time>=timem2[0], time<timem2[0]+1)]
-            lato = lat[np.logical_and(time>=timem2[0], time<timem2[0]+1)]
-            
-            (timem, var1d, timeunitm, var1dunit, var1dlongname) = \
-                read_E3SM(filename_input, [a+'_'+E3SMdomain_range for a in variable1d_names])
-            (timem, var2d, timeunitm, var2dunit, var2dlongname) = \
-                read_E3SM(filename_input, [a+'_'+E3SMdomain_range for a in variable2d_names])
-            
-            # allocation variables and attributes
-            if dd==0:
-                for vv in range(varlen):
-                    variables.append([])
-                var_units=var1dunit+var2dunit
-                var_longnames=var1dlongname+var2dlongname
-                
-            for tt in range(len(timeo)):
-                t_idx = np.abs(timem2-timeo[tt]).argmin()
-                if lono[tt]<-900. or lato[tt]<-900:
-                    for vv in range(varlen):
-                        variables[vv].append(-9999.)
-                else:
-                    x_idx = find_nearest(lonm, latm, lono[tt], lato[tt])
-                    for vv in range(var1dlen):
-                        variables[vv].append(var1d[vv][t_idx, x_idx])
-                    for vv in range(var2dlen):
-                        variables[var1dlen+vv].append(var2d[vv][t_idx, -1, x_idx])  # choose the lowest level
-
-        
-        # %% output extacted file
-        outputname = 'Ship_vars_'+campaign+'_'+model+'_shipleg'+legnum+'.nc'
-        print('output to this file: '+E3SM_ship_path+outputname)
-        
-        # define filename
-        f = Dataset(E3SM_ship_path+outputname, 'w', format='NETCDF4')
-        
-        # define dimensions
-        t = f.createDimension('time', None)  # unlimited
-        
-        # create variable list
-        time_o = f.createVariable("time", "f8", ("time", ))
-        lat_o = f.createVariable("lat", "f8", ("time", ))
-        lon_o = f.createVariable("lon", "f8", ("time", ))
-        var_o=list()
-        for vv in range(varlen):
-            var_o.append (f.createVariable(variable_names[vv], 'f8', ("time", )))
-        
-        # write data
-        time_o[:] = time
-        lat[lat<-900]=-9999.
-        lon[lon<-900]=-9999.
-        lat_o[:] = lat
-        lon_o[:] = lon
-        for vv in range(varlen):
-            var_o[vv][:] = np.array(variables[vv])
-        
-        # attributes
-        time_o.units = "days since "+str(int(ymd[0][0:4])-1)+"-12-31 00:00:00 UTC"
-        lat_o.units = "degree north"
-        lon_o.units = "degree east"
-        time_o.long_name = "Calendar Day"
-        lat_o.long_name = "latitude"
-        lon_o.long_name = "longitude"
-        for vv in range(varlen):
-            var_o[vv].units = var_units[vv]
-            var_o[vv].long_name = var_longnames[vv]
-            var_o[vv].missing_value = -9999.
-        
-        # global attributes
-        import time as ttt
-        f.description = model+" extact variables along ship tracks for "+campaign
-        f.shiptrackdata = filename
-        f.modeldata = E3SM_h3_path[mm]+E3SM_h3_filehead[mm]+'.cam.h3.*.nc'
-        f.datanotes = 'variables are set as missing if GPS location is missing'
-        f.create_time = ttt.ctime(ttt.time())
-        
-        f.close()
-        
\ No newline at end of file
diff --git a/python/preprocessing/prep_E3SM_shiptrack_bins.py b/python/preprocessing/prep_E3SM_shiptrack_bins.py
deleted file mode 100644
index c7b8ecb..0000000
--- a/python/preprocessing/prep_E3SM_shiptrack_bins.py
+++ /dev/null
@@ -1,266 +0,0 @@
-"""
-# prepare E3SM surface aerosol size distribution at ARM ship-based field campaigns
-# input data is E3SM regional output
-# output is surface variables at the nearest grid of the ship track
-"""
-import sys
-sys.path.insert(1, '../subroutines/')
-
-import os
-import glob
-import numpy as np
-from time_format_change import timeunit2cday, yyyymmdd2cday, cday2mmdd
-from read_netcdf import read_E3SM
-from read_ship import read_marmet
-from read_ARMdata import read_met
-from CN_mode_to_size import calc_CNsize_cutoff_0_3000nm
-from netCDF4 import Dataset
-
-def find_nearest(xall, yall, x, y):
-    distance = np.square(xall-x) + np.square(yall-y)
-    idx = distance.argmin()
-    return(idx)
-
-#%% settings
-
-from settings import campaign, shipmetpath, E3SM_h3_path, E3SM_h3_filehead, E3SM_ship_path, Model_List
-
-if not os.path.exists(E3SM_ship_path):
-    os.makedirs(E3SM_ship_path)
-    
-
-#%% get all ship data
-if campaign == 'MAGIC':
-    lst = glob.glob(shipmetpath + 'marmet*.txt')
-    E3SMdomain_range = '202e_to_243e_20n_to_35n'    # domain range in E3SM regional output
-elif campaign == 'MARCUS':
-    lst = [1, 2, 3, 4]     # there are 4 ship trips (legs) for MARCUS
-    E3SMdomain_range = '60e_to_160e_42s_to_70s'   
-else:
-    raise ValueError('data for this field campaign is not specified: ' + campaign)
-lst.sort()
-print('total number of ship leg files:' + str(len(lst)))
-
-   
-for filename in lst:
-    
-    
-    #%% read in ship data
-    
-    if campaign == 'MAGIC':
-        # for each ship leg
-        legnum = filename[-6:-4]
-        (shipdata, shipvarlist) = read_marmet(filename)
-        year = [a[1] for a in shipdata]
-        month = [a[2] for a in shipdata]
-        day = [a[3] for a in shipdata]
-        hh = [int(a[4]) for a in shipdata]
-        mm = [int(a[5]) for a in shipdata]
-        ss = [int(a[6]) for a in shipdata]
-        lat = np.array([float(a[7]) for a in shipdata])
-        lon = np.array([float(a[8]) for a in shipdata])
-        
-        # ymd = [year[i] + '-' + month[i] + '-' + day[i] for i in range(len(year))]   # yyyy-mm-dd
-        yyyymmdd = [year[i] + month[i] + day[i] for i in range(len(year))]   # yyyymmdd
-        ymd = list(set(yyyymmdd))  # unique date
-        ymd.sort()
-        
-        
-        time = np.array(hh)/24. + np.array(mm)/1440. + np.array(ss)/86400. 
-        for i in range(len(time)):
-            cday0 = yyyymmdd2cday(yyyymmdd[i], 'noleap') 
-            if year[i] == year[0]:
-                time[i] = time[i] + cday0
-            else:
-                time[i] = time[i] + cday0 + 365  # next year
-
-    elif campaign == 'MARCUS':
-        legnum = str(filename)
-        if legnum == '1':
-            startdate = '2017-10-30'
-            enddate = '2017-12-02'
-        elif legnum == '2':
-            startdate = '2017-12-13'
-            enddate = '2018-01-11'
-        elif legnum == '3':
-            startdate = '2018-01-16'
-            enddate = '2018-03-04'
-        elif legnum == '4':
-            startdate = '2018-03-09'
-            enddate = '2018-03-22'
-            
-        cday1 = yyyymmdd2cday(startdate, 'noleap')
-        cday2 = yyyymmdd2cday(enddate, 'noleap')
-        if startdate[0:4]!=enddate[0:4]:
-            cday2 = cday2 + 365  # cover two years
-
-        time = np.empty(0)
-        lon = np.empty(0)
-        lat = np.empty(0)
-        ymd = []
-        for cc in range(cday1, cday2 + 1):
-            if cc <= 365:
-                yyyymmdd = startdate[0:4] + cday2mmdd(cc)
-            else:
-                yyyymmdd = enddate[0:4] + cday2mmdd(cc-365)
-                
-            lst0 = glob.glob(shipmetpath + 'maraadmetX1.b1.' + yyyymmdd + '*')
-            (time0, lon0, timeunit, lonunit, lon_long_name) = read_met(lst0[0], 'lon')
-            (time0, lat0, timeunit, lonunit, lon_long_name) = read_met(lst0[0], 'lat')
-            ymd0 = timeunit.split()[2]
-            ymd.append(ymd0[0:4] + ymd0[5:7] + ymd0[8:10])
-            
-            time = np.hstack((time,  time0/86400. + cc))
-            lat = np.hstack((lat, lat0))
-            lon = np.hstack((lon, lon0))
-
-    print('date for shipleg ' + legnum + ': ' + ymd[0] + '-' + ymd[-1])
-    
-    #%% read in E3SM data
-    for mm in range(len(Model_List)):
-        model = Model_List[mm]
-        
-        varname = ['T', 'Pres', 'num_a1', 'dgnd_a01', 'num_a2', 'dgnd_a02', 'num_a3', 'dgnd_a03', 'num_a4', 'dgnd_a04']
-        if model == 'Nuc':  # with nucleation mode
-            varname = varname + ['num_a5', 'dgnd_a05']
-        variables = list()
-        var_units = list()
-        var_longnames = list()
-        varlen = len(varname)
-        
-        # read all days in the ship leg
-        for dd in range(len(ymd)):
-            ymd2 = ymd[dd][0:4] + '-' + ymd[dd][4:6] + '-' + ymd[dd][6:8]
-            print('read this date: ' + ymd2)
-            filename_input = E3SM_h3_path[mm] + E3SM_h3_filehead[mm] + '.cam.h3.' + ymd2 + '-00000.nc'
-            
-            (timem, lonm, timeunitm, lonmunit, lonmname) = read_E3SM(filename_input, 'lon_' + E3SMdomain_range)
-            (timem, latm, timeunitm, latmunit, latmname) = read_E3SM(filename_input, 'lat_' + E3SMdomain_range)
-            # (timem, psm, timeunitm, psmunit, psmname) = read_E3SM(filename_input, 'PS_' + E3SMdomain_range)
-            
-            cdaym = timeunit2cday(timeunitm, 'noleap')
-            yearm = timeunitm.split(' ')[2][0:4]
-            timem2 = timem.data - 365*(int(ymd[0][0:4]) - int(yearm)) + cdaym
-            
-            # do not use read_E3SM because hyam and hybm don't have units
-            f = Dataset(filename_input, 'r')
-            P0 = f.variables['P0'][:]
-            hyam = f.variables['hyam'][:]
-            hybm = f.variables['hybm'][:]
-            T = f.variables['T_' + E3SMdomain_range][:]
-            PS = f.variables['PS_' + E3SMdomain_range][:]
-            num_a1 = f.variables['num_a1_' + E3SMdomain_range][:]
-            num_a2 = f.variables['num_a2_' + E3SMdomain_range][:]
-            num_a3 = f.variables['num_a3_' + E3SMdomain_range][:]
-            num_a4 = f.variables['num_a4_' + E3SMdomain_range][:]
-            dn1 = f.variables['dgnd_a01_' + E3SMdomain_range][:]
-            dn2 = f.variables['dgnd_a02_' + E3SMdomain_range][:]
-            dn3 = f.variables['dgnd_a03_' + E3SMdomain_range][:]
-            dn4 = f.variables['dgnd_a04_' + E3SMdomain_range][:]
-            if model == 'Nuc':  # with nucleation mode
-                num_a5 = f.variables['num_a5_' + E3SMdomain_range][:]
-                dn5 = f.variables['dgnd_a05_' + E3SMdomain_range][:]
-            f.close()
-        
-            Pres = np.nan*T
-            zlen = T.shape[1]
-            for kk in range(zlen):
-                Pres[:, kk, :] = hyam[kk]*P0 + hybm[kk]*PS
-        
-            varall = [T, Pres, num_a1, dn1, num_a2, dn2, num_a3, dn3, num_a4, dn4]
-            if model == 'Nuc':  # with nucleation mode
-                varall.append(num_a5)
-                varall.append(dn5)
-                
-            # ship measurement times during the model day
-            timeo = time[np.logical_and(time >= timem2[0], time < timem2[0] + 1)]
-            lono = lon[np.logical_and(time >= timem2[0], time < timem2[0] + 1)]
-            lato = lat[np.logical_and(time >= timem2[0], time < timem2[0] + 1)]
-          
-                
-            # allocation variables and attributes
-            if dd == 0:
-                for vv in range(varlen):
-                    variables.append([])
-                    
-            # extract the data at the time and location of ship
-            for tt in range(len(timeo)):
-                t_idx = np.abs(timem2-timeo[tt]).argmin()
-                if lono[tt]<-900. or lato[tt]<-900:
-                    for vv in range(varlen):
-                        variables[vv].append(np.nan)
-                else:
-                    x_idx = find_nearest(lonm, latm, lono[tt], lato[tt])
-                    for vv in range(varlen):
-                        variables[vv].append(varall[vv][t_idx, -1, x_idx])  # choose the lowest level
-
-        numall = [np.array(a) for a in variables[2::2]]
-        dnall = [np.array(a) for a in variables[3::2]]
-        
-        NCNall = calc_CNsize_cutoff_0_3000nm(dnall, numall, np.array(variables[0]), np.array(variables[1]))
-
-        # calculate total CN concentration for CPC (>10nm) and CPCU (>3nm)
-        NUCN = np.nansum(NCNall[3:, :], 0)    # >3nm
-        NCN = np.nansum(NCNall[10:, :], 0)    # >10nm
-        
-        
-        # %% output extacted file
-        outputname = 'Ship_CNsize_' + campaign + '_' + model + '_shipleg' + legnum + '.nc'
-        print('output to this file: ' + E3SM_ship_path + outputname)
-        
-        # define filename
-        f = Dataset(E3SM_ship_path + outputname, 'w', format='NETCDF4')
-        
-        # define dimensions
-        t = f.createDimension('time', None)  # unlimited
-        size = f.createDimension('size', 3000)
-        
-        # create variable list
-        time_o = f.createVariable("time", "f8", ("time", ))
-        size_o = f.createVariable("size", 'i8', ("size"))
-        lat_o = f.createVariable("lat", "f8", ("time", ))
-        lon_o = f.createVariable("lon", "f8", ("time", ))
-        
-        data_o = f.createVariable('NCNall', 'f8', ("size", "time"))
-        ncn_o = f.createVariable("NCN", "f8", ("time", ))
-        nucn_o = f.createVariable("NUCN", "f8", ("time", ))
-        
-        # write data
-        time_o[:] = time
-        size_o[:] = np.arange(1, 3001)
-        lat[lat<-900] = -9999.
-        lon[lon<-900] = -9999.
-        lat_o[:] = lat
-        lon_o[:] = lon
-        NCNall[np.isnan(NCNall)] = -9999.
-        NCN[np.isnan(NCN)] = -9999.
-        NUCN[np.isnan(NUCN)] = -9999.
-        data_o[:, :] = NCNall
-        ncn_o[:] = NCN
-        nucn_o[:] = NUCN
-        
-        # attributes
-        time_o.units = "days since " + str(int(ymd[0][0:4])-1) + "-12-31 00:00:00 UTC"
-        lat_o.units = "degree north"
-        lon_o.units = "degree east"
-        time_o.long_name = "Calendar Day"
-        lat_o.long_name = "latitude"
-        lon_o.long_name = "longitude"
-        size_o.units = 'nm'
-        size_o.long_name = "0 to 3000nm with 1nm increment"
-        data_o.units = '#/m3'
-        data_o.long_name = 'aerosol size distribution'
-        ncn_o.units = '#/m3'
-        ncn_o.long_name = 'aerosol number concentration for size >10nm'
-        nucn_o.units = '#/m3'
-        nucn_o.long_name = 'aerosol number concentration for size >3nm'
-        
-        # global attributes
-        import time as ttt
-        f.description = model + " calculated aerosol size distribution along ship tracks for " + campaign
-        f.shiptrackdata = filename
-        f.modeldata = E3SM_h3_path[mm] + E3SM_h3_filehead[mm] + '.cam.h3.*.nc'
-        f.datanotes = 'variables are set as missing if GPS location is missing'
-        f.create_time = ttt.ctime(ttt.time())
-        
-        f.close()
\ No newline at end of file
diff --git a/python/preprocessing/prep_E3SM_shiptrack_profiles.py b/python/preprocessing/prep_E3SM_shiptrack_profiles.py
deleted file mode 100644
index ed2cb74..0000000
--- a/python/preprocessing/prep_E3SM_shiptrack_profiles.py
+++ /dev/null
@@ -1,232 +0,0 @@
-"""
-# prepare E3SM vertical profiles at ARM ship-based field campaigns
-# input data is E3SM regional output
-# output is vertical profiles at the nearest grid of the ship track
-"""
-import sys
-sys.path.insert(1, '../subroutines/')
-
-import os
-import glob
-import numpy as np
-from time_format_change import timeunit2cday, yyyymmdd2cday, cday2mmdd
-from read_netcdf import read_E3SM
-from read_ship import read_marmet
-from read_ARMdata import read_met
-from netCDF4 import Dataset
-
-def find_nearest(xall, yall, x, y):
-    distance = np.square(xall-x) + np.square(yall-y)
-    idx = distance.argmin()
-    return(idx)
-
-#%% settings
-
-from settings import campaign, shipmetpath, E3SM_h3_path, E3SM_h3_filehead, E3SM_ship_path, Model_List
-
-if not os.path.exists(E3SM_ship_path):
-    os.makedirs(E3SM_ship_path)
-
-# output height above ground. data will be interpolated into z_f
-z_f = np.hstack((np.arange(0,500,50), np.arange(500,2000,100), np.arange(2000,5000,300),
-                 np.arange(5000,10000,500), np.arange(10000,20001,1000)))
-zlen = len(z_f)    
-    
-#%% set variables to be read
-variable_names = ['T', 'U', 'V', 'Q', 'RELHUM', 'RHW', 'RHI', 'CLOUD', 'LWC', 'IWC',
-                'CLDLIQ', 'CLDICE', 'NUMLIQ', "AREI", "AREL", "ICLDIWP", "ICLDTWP"]
-varlen = len(variable_names)
-
-#%% get all ship data
-if campaign == 'MAGIC':
-    lst = glob.glob(shipmetpath + 'marmet*.txt')
-    E3SMdomain_range = '202e_to_243e_20n_to_35n'    # domain range in E3SM regional output
-elif campaign == 'MARCUS':
-    lst = [1, 2, 3, 4]     # there are 4 ship trips (legs) for MARCUS
-    E3SMdomain_range = '60e_to_160e_42s_to_70s'   
-else:
-    raise ValueError('data for this field campaign is not specified: ' + campaign)
-lst.sort()
-print('total number of ship leg files:' + str(len(lst)))
-
-   
-for filename in lst:
-    
-
-    #%% read in ship data
-    
-    if campaign == 'MAGIC':
-        # for each ship leg
-        legnum = filename[-6:-4]
-    
-        (shipdata, shipvarlist) = read_marmet(filename)
-        year = [a[1] for a in shipdata]
-        month = [a[2] for a in shipdata]
-        day = [a[3] for a in shipdata]
-        hh = [int(a[4]) for a in shipdata]
-        mm = [int(a[5]) for a in shipdata]
-        ss = [int(a[6]) for a in shipdata]
-        lat = np.array([float(a[7]) for a in shipdata])
-        lon = np.array([float(a[8]) for a in shipdata])
-        
-        # ymd = [year[i] + '-' + month[i] + '-' + day[i] for i in range(len(year))]   # yyyy-mm-dd
-        yyyymmdd = [year[i] + month[i] + day[i] for i in range(len(year))]   # yyyymmdd
-        ymd = list(set(yyyymmdd))  # unique date
-        ymd.sort()
-        
-        
-        time = np.array(hh)/24. + np.array(mm)/1440. + np.array(ss)/86400. 
-        for i in range(len(time)):
-            cday0 = yyyymmdd2cday(yyyymmdd[i], 'noleap') 
-            if year[i] == year[0]:
-                time[i] = time[i] + cday0
-            else:
-                time[i] = time[i] + cday0 + 365  # next year
-    
-    elif campaign == 'MARCUS':
-        legnum = str(filename)
-        if legnum == '1':
-            startdate = '2017-10-30'
-            enddate = '2017-12-02'
-        elif legnum == '2':
-            startdate = '2017-12-13'
-            enddate = '2018-01-11'
-        elif legnum == '3':
-            startdate = '2018-01-16'
-            enddate = '2018-03-04'
-        elif legnum == '4':
-            startdate = '2018-03-09'
-            enddate = '2018-03-22'
-            
-        cday1 = yyyymmdd2cday(startdate, 'noleap')
-        cday2 = yyyymmdd2cday(enddate, 'noleap')
-        if startdate[0:4] != enddate[0:4]:
-            cday2 = cday2 + 365  # cover two years
-
-        time = np.empty(0)
-        lon = np.empty(0)
-        lat = np.empty(0)
-        ymd = []
-        for cc in range(cday1, cday2 + 1):
-            if cc <= 365:
-                yyyymmdd = startdate[0:4] + cday2mmdd(cc)
-            else:
-                yyyymmdd = enddate[0:4] + cday2mmdd(cc-365)
-                
-            lst0 = glob.glob(shipmetpath + 'maraadmetX1.b1.' + yyyymmdd + '*')
-            (time0, lon0, timeunit, lonunit, lon_long_name) = read_met(lst0[0], 'lon')
-            (time0, lat0, timeunit, lonunit, lon_long_name) = read_met(lst0[0], 'lat')
-            ymd0 = timeunit.split()[2]
-            ymd.append(ymd0[0:4] + ymd0[5:7] + ymd0[8:10])
-            
-            time = np.hstack((time, time0/86400. + cc))
-            lat = np.hstack((lat, lat0))
-            lon = np.hstack((lon, lon0))
-        
-    print('date for shipleg ' + legnum + ': ' + ymd[0] + '-' + ymd[-1])
-    
-    #%% read in E3SM data
-    for mm in range(len(Model_List)):
-        model = Model_List[mm]
-        variables = list()
-        var_units = list()
-        var_longnames = list()
-        
-        # read all days in the ship leg
-        for dd in range(len(ymd)):
-            ymd2 = ymd[dd][0:4] + '-' + ymd[dd][4:6] + '-' + ymd[dd][6:8]
-            print('read this date: ' + ymd2)
-            filename_input = E3SM_h3_path[mm] + E3SM_h3_filehead[mm] + '.cam.h3.' + ymd2 + '-00000.nc'
-            
-            (timem, lonm, timeunitm, lonmunit, lonmname) = read_E3SM(filename_input, 'lon_' + E3SMdomain_range)
-            (timem, latm, timeunitm, latmunit, latmname) = read_E3SM(filename_input, 'lat_' + E3SMdomain_range)
-            (timem, z3, timeunitm, zunit, zname) = read_E3SM(filename_input, 'Z3_' + E3SMdomain_range)
-            # (timem, psm, timeunitm, psmunit, psmname) = read_E3SM(filename_input, 'PS_' + E3SMdomain_range)
-            
-            cdaym = timeunit2cday(timeunitm, 'noleap')
-            yearm = timeunitm.split(' ')[2][0:4]
-            timem2 = timem.data-365*(int(ymd[0][0:4])-int(yearm)) + cdaym
-            
-            # ship measurement times during the model day
-            timeo = time[np.logical_and(time>=timem2[0], time<timem2[0] + 1)]
-            lono = lon[np.logical_and(time>=timem2[0], time<timem2[0] + 1)]
-            lato = lat[np.logical_and(time>=timem2[0], time<timem2[0] + 1)]
-            
-            (timem, var2d, timeunitm, var2dunit, var2dlongname) = \
-                     read_E3SM(filename_input, [a + '_' + E3SMdomain_range for a in variable_names])
-            
-            # allocation variables and attributes
-            if dd == 0:
-                for vv in range(varlen):
-                    variables.append([])
-                var_units = var2dunit
-                var_longnames = var2dlongname
-                
-            for tt in range(len(timeo)):
-                t_idx = np.abs(timem2-timeo[tt]).argmin()
-                if lono[tt]<-900. or lato[tt]<-900:
-                    for vv in range(varlen):
-                        variables[vv].append(np.full((zlen), -9999.))
-                else:
-                    x_idx = find_nearest(lonm, latm, lono[tt], lato[tt])
-                    zm = z3[t_idx, :, x_idx]
-                    for vv in range(varlen):
-                        var_tmp = var2d[vv][t_idx, :, x_idx]
-                        # interpolate height above sea level to height above ground
-                        var2 = np.interp(z_f, np.flip(zm[:]-zm[-1]), np.flip(var_tmp))
-                        variables[vv].append(var2)
-                        
-        # %% output extacted file
-        outputname = 'Ship_profiles_' + campaign + '_' + model + '_shipleg' + legnum + '.nc'
-        print('output to this file: ' + E3SM_ship_path + outputname)
-        
-        # define filename
-        f = Dataset(E3SM_ship_path + outputname, 'w', format = 'NETCDF4')
-        
-        # define dimensions
-        t = f.createDimension('time', None)  # unlimited
-        z = f.createDimension('height', zlen)
-        
-        # create variable list
-        time_o = f.createVariable("time", "f8", ("time", ))
-        height_o = f.createVariable("height", "f8", ("height", ))
-        lat_o = f.createVariable("lat", "f8", ("time", ))
-        lon_o = f.createVariable("lon", "f8", ("time", ))
-        var_o = list()
-        for vv in range(varlen):
-            var_o.append (f.createVariable(variable_names[vv], 'f8', ("time", "height")))
-        
-        # write data
-        time_o[:] = time
-        height_o[:] = z_f
-        lat[lat<-900] = -9999.
-        lon[lon<-900] = -9999.
-        lat_o[:] = lat
-        lon_o[:] = lon
-        for vv in range(varlen):
-            var_o[vv][:] = np.array(variables[vv])
-        
-        # attributes
-        time_o.units = "days since " + str(int(ymd[0][0:4])-1) + "-12-31 00:00:00 UTC"
-        lat_o.units = "degree north"
-        lon_o.units = "degree east"
-        height_o.units = "gpm above ground"
-        time_o.long_name = "Calendar Day"
-        lat_o.long_name = "latitude"
-        lon_o.long_name = "longitude"
-        height_o.long_name = "height above ground"
-        for vv in range(varlen):
-            var_o[vv].units = var_units[vv]
-            var_o[vv].long_name = var_longnames[vv]
-            var_o[vv].missing_value = -9999.
-        
-        # global attributes
-        import time as ttt
-        f.description = model + " extact vertical profiles along ship tracks for " + campaign
-        f.shiptrackdata = filename
-        f.modeldata = E3SM_h3_path[mm] + E3SM_h3_filehead[mm] + '.cam.h3.*.nc'
-        f.datanotes = 'variables are set as missing if GPS location is missing'
-        f.create_time = ttt.ctime(ttt.time())
-        
-        f.close()
-                        
\ No newline at end of file
diff --git a/python/preprocessing/prep_obs_mergesize_ACEENA.py b/python/preprocessing/prep_obs_mergesize_ACEENA.py
deleted file mode 100644
index 0962249..0000000
--- a/python/preprocessing/prep_obs_mergesize_ACEENA.py
+++ /dev/null
@@ -1,523 +0,0 @@
-"""
-# merge size distribution from FIMS, PCASP and OPC for ACE-ENA
-# revised from size_bin_merge.pro by Jerome Fast
-# Shuaiqi Tang
-# 2020.10.1
-"""
-import sys
-sys.path.insert(1, '../subroutines/')
-
-import os
-import glob
-import re
-import numpy as np
-from read_aircraft import read_fims, read_fims_bin, read_iwg1, read_pcasp, read_opc
-from read_ARMdata import read_cvi_aceena as read_cvi
-from time_format_change import hhmmss2sec
-from netCDF4 import Dataset
-
-#%% set data paths
-
-from settings import iwgpath, fimspath, pcasppath, opcpath, cvipath, merged_size_path
-
-if not os.path.exists(merged_size_path):
-    os.makedirs(merged_size_path)
-
-# %% find all data
-# lst = glob.glob(iwgpath + 'aaf.iwg1001s.g1.hiscale.20160830*.a2.txt')
-lst = glob.glob(iwgpath + '*.a2.txt')
-lst.sort()
-
-# read in fims bin
-(d_fims, dmin_f, dmax_f) = read_fims_bin(fimspath + 'ACEENA_FIMS_bins_R0.dat')
-# change unit to um
-d_fims = [x/1000 for x in d_fims]
-dmin_f = [x/1000 for x in dmin_f]
-dmax_f = [x/1000 for x in dmax_f]
-dlnDp_f = np.empty(len(d_fims))
-for bb in range(len(d_fims)):
-    dlnDp_f[bb] = np.log(dmax_f[bb]/dmin_f[bb])
-dlnDp_f = np.mean(dlnDp_f)
-
-for filename in lst[:]:
-    
-    # get date
-    fname = re.split('aceena.|.a2', filename)
-    date = fname[-2]
-    print(date)
-    
-    #%% read in data
-    # IWG
-    (iwg, iwgvars) = read_iwg1(filename)
-    timelen = len(iwg)
-    # get lat, lon, height, time
-    lon = np.empty(timelen)
-    lat = np.empty(timelen)
-    height = np.empty(timelen)
-    time = np.empty(timelen)
-    cldflag = np.empty(timelen)
-    legnum = np.empty(timelen)
-    T_amb = np.empty(timelen)
-    p_amb = np.empty(timelen)
-    if date == '20180216a':
-        iwg.insert(1403, list(iwg[1403]))
-        tstr = iwg[1403][1]
-        tstr = tstr[0:-1] + str(int(tstr[-1])-1)
-        iwg[1403][1] = tstr
-        del iwg[-1]
-    for t in range(timelen):
-        lat[t] = float(iwg[t][2])
-        lon[t] = float(iwg[t][3])
-        height[t] = float(iwg[t][4])
-        T_amb[t] = float(iwg[t][20])
-        p_amb[t] = float(iwg[t][23])
-        cldflag[t] = int(iwg[t][35])
-        legnum[t] = int(iwg[t][-1])
-        timestr = iwg[t][1].split(' ')
-        time[t] = hhmmss2sec(timestr[1])
-    datestr = timestr[0]
-
-    # FIMS
-    filename_f = glob.glob(fimspath + 'FIMS_G1_' + date[0:8] + '*_001s.ict')
-    # read in data
-    if len(filename_f) == 1:
-        (data0, fimslist) = read_fims(filename_f[0])
-        # remove some unrealistic data    
-        # data2 = data0[1:-2, :]
-        # data2[np.isnan(data2)] = 1e8
-        # data2[:, data2[0, :]>1e4] = 1e8
-        # data2[np.logical_or(data2<0, data2>1e4)] = np.nan
-        time_fims = data0[0, :]
-        # change data from #/dlnDp to number
-        data2 = data0[1:-3, :]*dlnDp_f
-        
-        # TD mode or AMB mode. remove TD mode
-        TD_AMB = data0[-1, :]
-        data2[:, TD_AMB != 0] = -9999.
-        
-        fims = np.empty([30, len(time)])
-        for ii in range(30):
-            fims[ii, :] = np.interp(time, time_fims, data2[ii, :])
-        idx = np.logical_or(time>time_fims[-1], time<time_fims[0])
-        fims[:, idx] = np.nan
-    elif len(filename_f) == 0:
-        time_fims = time
-        fims = np.nan*np.empty([len(d_fims), len(time)])
-    else:
-        raise ValueError('find more than one file: ' + filename_f)
-
-    fims_total = np.nansum(fims, 0)
-    fims_total[fims_total <= 0] = np.nan
-
-    # PCASP    
-    filename_p = glob.glob(pcasppath + 'pcasp_g1_' + date[0:8] + '*' + '_aceena001s.ict')
-
-    binlen = 30
-    dmax_p = [110, 120, 130, 140, 150, 160, 170, 180, 200, 220, 240, 260, 280, 300, \
-            400, 500, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800, 3000, 3200]
-    dmin_p = [100, 110, 120, 130, 140, 150, 160, 170, 180, 200, 220, 240, 260, 280, 300, \
-            400, 500, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800, 3000]
-    dmin_p = [x/1000 for x in dmin_p]
-    dmax_p = [x/1000 for x in dmax_p]
-    # read in data
-    if len(filename_p) == 1:
-        (data0, pcasplist) = read_pcasp(filename_p[0])
-        pcasp2 = data0[1:-5, :]
-        time_pcasp = data0[0, :]
-        d_pcasp = [float(i) for i in pcasplist[1:-5]]
-        pcasp = data0[1:-5, :]
-        flag = data0[-2, :]
-        pcasp_total = data0[-5, :]
-        # remove some questionable data
-        # pcasp[np.isnan(pcasp)] = -9999
-        # pcasp[np.logical_or(pcasp <=0, pcasp>1e6)] = np.nan
-        # pcasp[:, flag != 0] = np.nan
-        if not all(time_pcasp == time):
-            raise ValueError('PCASP time is inconsistent with FIMS')
-    elif len(filename_p) == 0:
-        time_pcasp = time
-        d_pcasp = [(dmin_p[x] + dmax_p[x])/2 for x in range(len(dmin_p))]
-        pcasp = np.full([len(d_pcasp), len(time)], -9999.)
-        pcasp_total = np.full(len(time), -9999.)
-    else:
-        raise ValueError('find more than one file: ' + filename_p)
-
-    # !! PCASP data is for standard T and p (Conc = Conc_orig*[(1013.25/Pamb)*(Tamb/293.15)]), change to ambient T/p
-    pcasp2 = np.array(pcasp)
-    for tt in range(len(time)):
-        pcasp[:, tt] = pcasp[:, tt]/((1013.25/p_amb[tt])*((T_amb[tt] + 273.15)/293.15))
-    
-    # CVI
-    filename_c = glob.glob(cvipath + 'enaaafinletcviF1.c1.' + date[0:8] + '*.nc')
-    filename_c.sort()
-    # read in data
-    if len(filename_c) == 1:
-        (time_c, lon_c, lat_c, alt_c, timeunit_c, cvimode, cvi_inlet, enhance_factor, dilution_factor) = read_cvi(filename_c[0])
-        if date == '20180216a':
-            time_c = np.insert(time_c, 1403, (time_c[1402] + time_c[1403])/2)
-            cvi_inlet=np.insert(cvi_inlet, 1403, cvi_inlet[1403])
-            cvimode=np.insert(cvimode, 1403, cvimode[1403])
-            enhance_factor = np.insert(enhance_factor, 1403, enhance_factor[1403])
-            dilution_factor = np.insert(dilution_factor, 1403, dilution_factor[1403])
-        enhance_factor[enhance_factor<-9000] = np.nan
-        dilution_factor[dilution_factor<-9000] = np.nan
-        if not all(time_c == time):
-            raise ValueError('CVI time is inconsistent with FIMS')
-    elif len(filename_c) == 0:
-        time_c = time
-        cvi_inlet = np.nan*np.empty([len(time)])
-        cvimode = np.nan*np.empty([len(time)])
-        enhance_factor = np.nan*np.empty([len(time)])
-        dilution_factor = np.nan*np.empty([len(time)])
-    else:
-        raise ValueError('find more than one file: ' + filename_c)
-
-    cvi_inlet[cvi_inlet == -9] = 1  # if cvi_inlet is unfunctional, assume it is isokinetic and use fims as good data
-    
-    
-    # read OPC
-    filename_o = glob.glob(opcpath + 'OPCISO_G1_' + date[0:8] + '*.ict')
-    if len(filename_o) == 1:
-        (opc, dmin_o, dmax_o, d_opc, opclist) = read_opc(filename_o[0])
-        time_o = opc[0, :]
-        opc = opc[1:, :]
-        opc[opc<0] = np.nan
-    else:
-        raise ValueError('can not find OPC data or find multiple files: ' + filename_o)
-    if date == '20180216a':
-        time_o = np.hstack((time_o[0:1403], 47873., time_o[1403:]))
-        opc = np.hstack((opc[:, 0:1403], (opc[:, 1402:1403] + opc[:, 1403:1404])/2, opc[:, 1403:]))
-    if any(time_o != time):
-        raise ValueError('OPC time is inconsistent with FIMS')
-    if sum(np.isnan(opc[0, :]))<0.1*len(time_o):
-        for ii in range(len(d_opc)):    # fill missing timesteps
-            opc2 = opc[ii, :]
-            opc[ii, np.isnan(opc2)] = np.interp(time[np.isnan(opc2)], time[~np.isnan(opc2)], opc[ii, ~np.isnan(opc2)])
-    else:
-        print('this date does not fill NaN OPC values')
-        
-    #%% now merge fims and pcasp
-    timelen = len(time)
-    nbin_merge = 67
-    nbin_fims = len(d_fims)
-    nbin_pcasp = len(d_pcasp)
-    nbin_opc = len(d_opc)
-    # low and high range of each bin
-    dia_merge_l = np.full(nbin_merge, np.nan)
-    dia_merge_h = np.full(nbin_merge, np.nan)
-    # from bins 1-30, use FIMS bin
-    for n in range(nbin_fims):
-        dia_merge_l[n] = dmin_f[n]
-        dia_merge_h[n] = dmax_f[n]
-    # for the next bin, use upper range (0.64) of FIMS as low bound and 0.8 of PCASP as high bound
-    idx = dmax_p.index(0.8)
-    dia_merge_l[nbin_fims] = dmax_f[-1]
-    dia_merge_h[nbin_fims] = dmax_p[idx]
-    # next bin uses 0.8 as low bound and high bound of 2nd bin (0.9) of OPC
-    dia_merge_l[31] = 0.8
-    dia_merge_h[31] = 0.9
-    # next few bins are merged two OPC bins
-    for n in range(1, 6):
-        dia_merge_l[31 + n] = dmin_o[n*2]
-        dia_merge_h[31 + n] = dmax_o[n*2 + 1]
-    # other bins follows OPC bins
-    for n in range(12, nbin_opc):
-        dia_merge_l[25 + n] = dmin_o[n]
-        dia_merge_h[25 + n] = dmax_o[n]
-        
-    d_merge = (dia_merge_h + dia_merge_l)/2
-    
-    # merged concentration
-    conc_merge = np.full([timelen, nbin_merge], -9999.)
-    fims[np.isnan(fims)] = -9999.   # do not treat missing as NaN. treat -9999
-    for k in range(timelen):
-        # mask all data with cloud flag on
-        if cldflag[k] != 0:
-            continue
-        # use fims data up to d_fims[24]
-        for n in range(24 + 1):
-            if cvi_inlet[k] == 0:     # in Jerome's code it is 0. looks like it should be 1 (CVI in cloud)
-                fims[n, k] = -9999
-            conc_merge[k, n] = fims[n, k]
-        # overlapping bins
-        idx = dmin_p.index(0.3)   # start merging size. choose the index of pcasp for merging
-        if fims[25, k] >=0:
-            if cvi_inlet[k] == 1:
-                ffac = 0.8
-                pfac = 0.2
-            elif cvi_inlet[k] == 0:
-                ffac = 0.0
-                pfac = 1.0
-            else:
-                raise ValueError('cvi_inlet value is neither 0 nor 1')
-        else:
-            ffac = 0.0
-            pfac = 1.0
-        conc_merge[k, 25] = (fims[25, k]*ffac + pcasp[idx, k]*0.3*pfac)
-        if fims[26, k] >=0:
-            if cvi_inlet[k] == 1:
-                ffac = 0.7
-                pfac = 0.3
-            elif cvi_inlet[k] == 0:
-                ffac = 0.0
-                pfac = 1.0
-            else:
-                raise ValueError('cvi_inlet value is neither 0 nor 1')
-        else:
-            ffac = 0.0
-            pfac = 1.0
-        conc_merge[k, 26] = (fims[26, k]*ffac + (pcasp[idx, k]*0.3 + pcasp[idx + 1, k]*0.2)*pfac)
-        if fims[27, k] >=0:
-            if cvi_inlet[k] == 1:
-                ffac = 0.5
-                pfac = 0.5
-            elif cvi_inlet[k] == 0:
-                ffac = 0.0
-                pfac = 1.0
-            else:
-                raise ValueError('cvi_inlet value is neither 0 nor 1')
-        else:
-            ffac = 0.0
-            pfac = 1.0
-        conc_merge[k, 27] = (fims[27, k]*ffac + (pcasp[idx + 1, k]*0.65)*pfac)
-        if fims[28, k] >=0:
-            if cvi_inlet[k] == 1:
-                ffac = 0.3
-                pfac = 0.7
-            elif cvi_inlet[k] == 0:
-                ffac = 0.0
-                pfac = 1.0
-            else:
-                raise ValueError('cvi_inlet value is neither 0 nor 1')
-        else:
-            ffac = 0.0
-            pfac = 1.0
-        conc_merge[k, 28] = (fims[28, k]*ffac + (pcasp[idx + 1, k]*0.15 + pcasp[idx + 2, k]*0.5)*pfac)
-        if fims[29, k] >=0:
-            if cvi_inlet[k] == 1:
-                ffac = 0.2
-                pfac = 0.8
-            elif cvi_inlet[k] == 0:
-                ffac = 0.0
-                pfac = 1.0
-            else:
-                raise ValueError('cvi_inlet value is neither 0 nor 1')
-        else:
-            ffac = 0.0
-            pfac = 1.0
-        conc_merge[k, 29] = (fims[29, k]*ffac + (pcasp[idx + 2, k]*0.4 + pcasp[idx + 3, k]*0.2)*pfac)
-        conc_merge[k, 30] = pcasp[idx + 3, k]*0.8
-        if not all(pcasp[idx:idx + 4, k] >=0):
-            conc_merge[k, 25:30] = fims[25:30, k]
-            conc_merge[k, 30] = (conc_merge[k, 29] + opc[1, k]*1.4)/2.0
-        # next merge OPC and PCASP, remove PCASP if the values is 10x larger than OPC
-        pcasp2 = pcasp[18, k]*0.5
-        opc2 = opc[1, k]*1.4     # the first bin of OPC contains all small-size particles. not using opc[0, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 31] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 31] = opc2
-        else:
-            conc_merge[k, 31] = (opc2 + pcasp2)/2.0    # assume equal weight
-        pcasp2 = pcasp[18, k]*0.5 + pcasp[19, k]*0.2
-        opc2 = opc[2, k] + opc[3, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 32] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 32] = opc2
-        else:
-            conc_merge[k, 32] = (opc2 + pcasp2)/2.0    # assume equal weight
-        pcasp2 = pcasp[19, k]*0.8
-        opc2 = opc[4, k] + opc[5, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 33] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 33] = opc2
-        else:
-            conc_merge[k, 33] = (opc2 + pcasp2)/2.0    # assume equal weight
-        pcasp2 = pcasp[20, k]*0.9
-        opc2 = opc[6, k] + opc[7, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 34] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 34] = opc2
-        else:
-            conc_merge[k, 34] = (opc2 + pcasp2)/2.0    # assume equal weight
-        pcasp2 = pcasp[20, k]*0.1 + pcasp[21, k]
-        opc2 = opc[8, k] + opc[9, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 35] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 35] = opc2
-        else:
-            conc_merge[k, 35] = (opc2 + pcasp2)/2.0    # assume equal weight
-        pcasp2 = pcasp[22, k] + pcasp[23, k]*0.2
-        opc2 = opc[10, k] + opc[11, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 36] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 36] = opc2
-        else:
-            conc_merge[k, 36] = (opc2 + pcasp2)/2.0    # assume equal weight
-        pcasp2 = pcasp[23, k]*0.7
-        opc2 = opc[12, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 37] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 37] = opc2
-        else:
-            conc_merge[k, 37] = (opc2 + pcasp2)/2.0    # assume equal weight
-        pcasp2 = pcasp[23, k]*0.1 + pcasp[24, k]*0.7
-        opc2 = opc[13, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 38] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 38] = opc2
-        else:
-            conc_merge[k, 38] = (opc2 + pcasp2)/2.0    # assume equal weight
-        pcasp2 = pcasp[24, k]*0.3 + pcasp[25, k]*0.4
-        opc2 = opc[14, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 39] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 39] = opc2
-        else:
-            conc_merge[k, 39] = (opc2 + pcasp2)/2.0    # assume equal weight
-        pcasp2 = pcasp[25, k]*0.6 + pcasp[26, k]*0.3
-        opc2 = opc[15, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 40] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 40] = opc2
-        else:
-            conc_merge[k, 40] = opc2*0.6 + pcasp2*0.4  # gradually reduce the weight of PCASP
-        pcasp2 = pcasp[26, k]*0.7 + pcasp[27, k]*0.2
-        opc2 = opc[16, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 41] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 41] = opc2
-        else:
-            conc_merge[k, 41] = opc2*0.7 + pcasp2*0.3  # gradually reduce the weight of PCASP
-        pcasp2 = pcasp[27, k]*0.8 + pcasp[28, k]*0.2
-        opc2 = opc[17, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 42] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 42] = opc2
-        else:
-            conc_merge[k, 42] = opc2*0.8 + pcasp2*0.2  # gradually reduce the weight of PCASP
-        pcasp2 = pcasp[28, k]*0.8 + pcasp[29, k]*0.3
-        opc2 = opc[18, k]
-        if np.isnan(opc2):             
-            conc_merge[k, 43] = pcasp2
-        elif pcasp2>10*opc2 or pcasp2<0:
-            conc_merge[k, 43] = opc2
-        else:
-            conc_merge[k, 43] = opc2*0.9 + pcasp2*0.1  # gradually reduce the weight of PCASP
-        # using OPC for other bins
-        for n in range(44, nbin_merge):
-            conc_merge[k, n] = opc[n-25, k]
-        
-    
-    #%% output data
-    if not os.path.exists(merged_size_path):
-        os.mkdir(merged_size_path)
-    outfile = merged_size_path + 'merged_bin_fims_pcasp_opc_ACEENA_' + date + '.nc'
-    # define filename
-    f = Dataset(outfile, 'w', format = 'NETCDF4')
-    
-    # define dimensions
-    t = f.createDimension('time', None)  # unlimited
-    s = f.createDimension('size', nbin_merge)  # unlimited
-    
-    # create variable list
-    time_o = f.createVariable("time", "f8", ("time", ))
-    size_o = f.createVariable("size", "f8", ("size", ))
-    sizeh_o = f.createVariable("size_high", "f8", ("size", ))
-    sizel_o = f.createVariable("size_low", "f8", ("size", ))
-    lon_o = f.createVariable("lon", 'f8', ("time", ))
-    lat_o = f.createVariable("lat", 'f8', ("time", ))
-    height_o = f.createVariable("height", 'f8', ("time", ))
-    cflag_o = f.createVariable('cld_flag', 'i8', ("time", ))
-    legnum_o = f.createVariable('leg_number', 'i8', ("time", ))
-    cvi_o = f.createVariable('CVI_inlet', 'i8', ("time", ))
-    cvim_o = f.createVariable('CVI_mode', 'i8', ("time", ))
-    df_o = f.createVariable('CVI_Dilution_Factor', 'f8', ("time", ))
-    ef_o = f.createVariable('CVI_Enhancement_Factor', 'f8', ("time", ))
-    merge_o = f.createVariable('size_distribution_merged', 'f8', ("time", "size"))
-    fims_o = f.createVariable('totalnum_fims', 'f8', ("time", ))
-    pcasp_o = f.createVariable('totalnum_pcasp', 'f8', ("time", ))
-    
-    # write data
-    time_o[:] = time
-    size_o[:] = d_merge
-    sizeh_o[:] = dia_merge_h
-    sizel_o[:] = dia_merge_l
-    lon_o[:] = lon
-    lat_o[:] = lat    
-    height_o[:] = height
-    cflag_o[:] = cldflag
-    legnum_o[:] = legnum
-    cvi_o[:] = cvi_inlet
-    cvim_o[:] = np.array(cvimode)
-    dilution_factor[np.isnan(dilution_factor)] = -9999.
-    df_o[:] = dilution_factor
-    enhance_factor[np.isnan(enhance_factor)] = -9999.
-    ef_o[:] = enhance_factor
-    conc_merge[np.isnan(conc_merge)] = -9999.
-    conc_merge[conc_merge<0] = -9999.
-    merge_o[:, :] = conc_merge
-    fims_total[np.isnan(fims_total)] = -9999.
-    fims_total[fims_total<0] = -9999.
-    fims_o[:] = fims_total
-    pcasp_total[np.isnan(pcasp_total)] = -9999.
-    pcasp_total[pcasp_total<0] = -9999.
-    pcasp_o[:] = pcasp_total
-    
-    # attributes
-    time_o.units = "seconds since " + date[0:4] + '-' + date[4:6] + '-' + date[6:8] + " 00:00:00"
-    size_o.units = 'um'
-    size_o.long_name = 'center of size bin'
-    sizeh_o.units = 'um'
-    sizeh_o.long_name = 'upper bound of size bin'
-    sizel_o.units = 'um'
-    sizel_o.long_name = 'lower bound of size bin'
-    lon_o.units = 'degree east'
-    lon_o.long_name = 'Longitude'
-    lat_o.units = 'degree north'
-    lat_o.long_name = 'Latitude'
-    height_o.units = 'm MSL'
-    height_o.long_name = 'height'
-    cflag_o.units = 'N/A'
-    cflag_o.long_name = 'cloud flag'
-    cflag_o.description = '1-cloud; 0-no cloud'
-    legnum_o.units = 'N/A'
-    legnum_o.long_name = 'leg number'
-    cvi_o.units = 'N/A'
-    cvi_o.long_name = 'CVI inlet status'
-    cvi_o.description = '0-CVI inlet on; 1-Isokinetic inlet on'
-    cvim_o.units = 'N/A'
-    cvim_o.long_name = 'CVI mode flag'
-    cvim_o.description = '0: CVI mode; 1: under-kinetic; -1: transition'
-    df_o.units = 'N/A'
-    df_o.long_name = 'CVI Dilution Factor'
-    df_o.description = 'Dilution Factor after under-kinetic mode. Some measurements such as AMS, need to divide by this number'
-    ef_o.units = 'N/A'
-    ef_o.long_name = 'CVI Enhancement Factor'
-    ef_o.description = 'Enhancement Factor after CVI mode. Some measurements such as AMS, need to divide by this number'
-    merge_o.units = '#/cm3'
-    merge_o.long_name = 'merged size distribution'
-    fims_o.units = '#/cm3'
-    fims_o.long_name = 'total aerosol concentration from FIMS'
-    pcasp_o.units = '#/cm3'
-    pcasp_o.long_name = 'total aerosol concentration from PCASP'
-    
-    # global attributes
-    import time as ttt
-    f.description = "Merged size distribution from FIMS, PCASP and OPC"
-    f.create_time = ttt.ctime(ttt.time())
-    
-    f.close()
-    
-
diff --git a/python/preprocessing/prep_obs_mergesize_HISCALE.py b/python/preprocessing/prep_obs_mergesize_HISCALE.py
deleted file mode 100644
index 302986c..0000000
--- a/python/preprocessing/prep_obs_mergesize_HISCALE.py
+++ /dev/null
@@ -1,392 +0,0 @@
-"""
-# merge size distribution from FIMS and PCASP for Hi-Scale
-# revised from size_bin_merge.pro by Jerome Fast
-# Shuaiqi Tang
-# 2020.10.1
-"""
-import sys
-sys.path.insert(1, '../subroutines/')
-
-import os
-import glob
-import re
-import numpy as np
-from read_aircraft import read_fims, read_fims_bin, read_iwg1, read_pcasp, read_cvi_hiscale as read_cvi
-from time_format_change import hhmmss2sec
-from netCDF4 import Dataset
-
-#%% set data paths
-
-from settings import iwgpath, fimspath, pcasppath, cvipath, merged_size_path
-
-if not os.path.exists(merged_size_path):
-    os.makedirs(merged_size_path)
-    
-
-# %% find all data
-# lst = glob.glob(iwgpath + 'aaf.iwg1001s.g1.hiscale.20160830*.a2.txt')
-lst = glob.glob(iwgpath + '*.a2.txt')
-lst.sort()
-
-# read in fims bin
-(d_fims, dmin_f, dmax_f) = read_fims_bin(fimspath + 'HISCALE_FIMS_bins_R1.dat')
-# change unit to um
-d_fims = [x/1000 for x in d_fims]
-dmin_f = [x/1000 for x in dmin_f]
-dmax_f = [x/1000 for x in dmax_f]
-dlnDp_f = np.empty(len(d_fims))
-for bb in range(len(d_fims)):
-    dlnDp_f[bb] = np.log(dmax_f[bb]/dmin_f[bb])
-dlnDp_f = np.mean(dlnDp_f)
-
-for filename in lst[:]:
-    
-    # get date
-    fname = re.split('hiscale.|.a2', filename)
-    date = fname[-2]
-    print(date)
-    if date[-1] == 'a':
-        flightidx = 1
-    else:
-        flightidx = 2
-    
-    #%% read in data
-    # IWG
-    (iwg, iwgvars) = read_iwg1(filename)
-    timelen = len(iwg)
-    # get lat, lon, height, time
-    lon = np.empty(timelen)
-    lat = np.empty(timelen)
-    height = np.empty(timelen)
-    time = np.empty(timelen)
-    cldflag = np.empty(timelen)
-    legnum = np.empty(timelen)
-    T_amb = np.empty(timelen)
-    p_amb = np.empty(timelen)
-    for t in range(timelen):
-        lat[t] = float(iwg[t][2])
-        lon[t] = float(iwg[t][3])
-        height[t] = float(iwg[t][4])
-        T_amb[t] = float(iwg[t][20])
-        p_amb[t] = float(iwg[t][23])
-        cldflag[t] = int(iwg[t][35])
-        legnum[t] = int(iwg[t][-1])
-        timestr = iwg[t][1].split(' ')
-        time[t] = hhmmss2sec(timestr[1])
-    datestr = timestr[0]
-
-    # FIMS
-    filename_f = glob.glob(fimspath + 'FIMS_G1_' + date[0:8] + '*' + str(flightidx) + '_HISCALE_001s.ict')
-    # read in data
-    if len(filename_f) == 1:
-        (data0, fimslist) = read_fims(filename_f[0])
-        # remove some unrealistic data    
-        fims2 = data0[1:-2, :]
-        data2 = data0[1:-2, :]
-        data2[np.isnan(data2)] = 1e8
-        data2[:, data2[0, :] > 1e4] = 1e8
-        data2[np.logical_or(data2 < 0, data2 > 1e4)] = np.nan
-        data0[1:-2, :] = data2
-        time_fims = data0[0, :]
-        # change data from #/dlnDp to number
-        data2 = data0[1:-2, :]*dlnDp_f
-        fims = np.empty([30, len(time)])
-        for ii in range(30):
-            fims[ii, :] = np.interp(time, time_fims, data2[ii, :])
-        idx = np.logical_or(time > time_fims[-1], time < time_fims[0])
-        fims[:, idx] = np.nan
-    elif len(filename_f) == 0:
-        time_fims = time
-        fims = np.nan*np.empty([len(d_fims), len(time)])
-    else:
-        raise ValueError('find more than one file: ' + filename_f)
-    fims_total = np.nansum(fims, 0)
-    fims_total[fims_total <= 0] = np.nan
-
-    # PCASP    
-    filename_p = glob.glob(pcasppath + 'pcasp_g1_' + date[0:8] + '*' + str(flightidx) + '_hiscale001s.ict.txt')
-    if date[4:6] == '04' or date[4:6] == '05':
-        binlen = 27
-        dmax_p = [130, 140, 150, 160, 170, 180, 200, 220, 240, 260, 280, 300, 400, 500, \
-                600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800, 3000]
-        dmin_p = [120, 130, 140, 150, 160, 170, 180, 200, 220, 240, 260, 280, 300, 400, 500, \
-                600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800]
-    elif date[4:6] == '08' or date[4:6] == '09':
-        binlen = 30
-        dmax_p = [100, 110, 120, 130, 140, 150, 160, 170, 180, 200, 220, 240, 260, 280, 300, \
-                400, 500, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800, 3000]
-        dmin_p = [90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 200, 220, 240, 260, 280, 300, \
-                400, 500, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800]
-    dmin_p = [x/1000 for x in dmin_p]
-    dmax_p = [x/1000 for x in dmax_p]
-    # read in data
-    if len(filename_p) == 1:
-        (data0, pcasplist) = read_pcasp(filename_p[0])
-        pcasp2 = data0[1:-5, :]
-        time_pcasp = data0[0, :]
-        d_pcasp = [float(i) for i in pcasplist[1:-5]]
-        pcasp = data0[1:-5, :]
-        flag = data0[-2, :]
-        pcasp_total = data0[-5, :]
-        # remove some questionable data
-        # pcasp[np.isnan(pcasp)] = -9999
-        # pcasp[np.logical_or(pcasp <= 0, pcasp > 1e6)] = np.nan
-        pcasp[:, flag != 0] = np.nan
-        pcasp[:, cldflag == 1] = np.nan
-        if not all(time_pcasp == time):
-            raise ValueError('PCASP time is inconsistent with FIMS')
-    elif len(filename_p) == 0:
-        time_pcasp = time
-        d_pcasp = [(dmin_p[x] + dmax_p[x])/2 for x in range(len(dmin_p))]
-        pcasp = np.nan*np.empty([len(d_pcasp), len(time)])
-    else:
-        raise ValueError('find more than one file: ' + filename_p)
-    # !! PCASP data is for standard T and p (Conc = Conc_orig*[(1013.25/Pamb)*(Tamb/293.15)]), change to ambient T/p
-    pcasp2 = np.array(pcasp)
-    for tt in range(len(time)):
-        pcasp[:, tt] = pcasp[:, tt]/((1013.25/p_amb[tt])*((T_amb[tt] + 273.15)/293.15))
-        
-        
-    # CVI
-    filename_c = glob.glob(cvipath + 'CVI_G1_' + date[0:8] + '*R4_HISCALE_001s.ict.txt')
-    filename_c.sort()
-    # read in data
-    if len(filename_c) == 1 or len(filename_c) == 2:
-        (cvi, cvilist) = read_cvi(filename_c[flightidx-1])
-        time_cvi = cvi[0, :]
-        cvi_inlet = cvi[-1, :]
-        enhance_factor = cvi[2, :]
-        enhance_factor[enhance_factor < -9000] = np.nan
-        dilution_factor = cvi[3, :]
-        dilution_factor[dilution_factor < -9000] = np.nan
-        cvi_mode = cvi[4, :]
-        cvi_qc = cvi[5, :]
-        if not all(time_cvi == time):
-            raise ValueError('CVI time is inconsistent with FIMS')
-    elif len(filename_c) == 0:
-        time_cvi = time
-        cvi_inlet = np.nan*np.empty([len(time)])
-        cvi_mode = np.nan*np.empty([len(time)])
-        dilution_factor = np.nan*np.empty([len(time)])
-        enhance_factor = np.nan*np.empty([len(time)])
-        cvi_qc = np.nan*np.empty([len(time)])
-    else:
-        raise ValueError('find more than one file: ' + filename_c)
-    cvi_mode[cvi_qc != 0] = -9999
-        
-    #%% now merge fims and pcasp
-    timelen = len(time)
-    nbin_merge = 44
-    nbin_fims = len(d_fims)
-    nbin_pcasp = len(d_pcasp)
-    # low and high range of each bin
-    dia_merge_l = np.empty(nbin_merge)
-    dia_merge_h = np.empty(nbin_merge)
-    for n in range(nbin_fims):
-        dia_merge_l[n] = dmin_f[n]
-        dia_merge_h[n] = dmax_f[n]
-    idx = dmax_p.index(0.5)
-    # use upper range (0.425) of FIMS as low bound and 0.5 of PCASP as high bound
-    dia_merge_l[nbin_fims] = dmax_f[-1]
-    dia_merge_h[nbin_fims] = dmax_p[idx]
-    for n in range(idx + 1, nbin_pcasp):
-        dia_merge_l[nbin_fims + n-idx] = dmin_p[n]
-        dia_merge_h[nbin_fims + n-idx] = dmax_p[n]
-    d_merge = (dia_merge_h + dia_merge_l)/2
-    
-    # merged concentration
-    conc_merge = np.empty([timelen, nbin_merge])
-    fims[np.isnan(fims)] = -9999.   # do not treat missing as NaN. treat -9999
-    for k in range(timelen):
-        # use fims data up to d_fims[23] (~0.19 um)
-        for n in range(23 + 1):
-            if cvi_inlet[k] == 0:     # in Jerome's code it is 0. looks like it should be 1 (CVI in cloud)
-                fims[n, k] = -9999
-            conc_merge[k, n] = fims[n, k]
-        # overlapping bins
-        idx = dmin_p.index(0.2)   # start merging size. corresponding to 10 in IOP2
-        if fims[24, k] > 0:
-            if cvi_inlet[k] == 1:
-                ffac = 0.95
-                pfac = 0.05
-            elif cvi_inlet[k] == 0:
-                ffac = 0.0
-                pfac = 1.0
-            else:
-                raise ValueError('cvi_inlet value is neither 0 nor 1')
-        else:
-            ffac = 0.0
-            pfac = 1.0
-        conc_merge[k, 24] = (fims[24, k]*ffac + (pcasp[idx, k]*1.0 + pcasp[idx + 1, k]*0.25)*pfac)
-        if fims[25, k] > 0:
-            if cvi_inlet[k] == 1:
-                ffac = 0.8
-                pfac = 0.2
-            elif cvi_inlet[k] == 0:
-                ffac = 0.0
-                pfac = 1.0
-            else:
-                raise ValueError('cvi_inlet value is neither 0 nor 1')
-        else:
-            ffac = 0.0
-            pfac = 1.0
-        conc_merge[k, 25] = (fims[25, k]*ffac + (pcasp[idx + 1, k]*0.75 + pcasp[idx + 2, k]*0.8)*pfac)
-        if fims[26, k] > 0:
-            if cvi_inlet[k] == 1:
-                ffac = 0.65
-                pfac = 0.35
-            elif cvi_inlet[k] == 0:
-                ffac = 0.0
-                pfac = 1.0
-            else:
-                raise ValueError('cvi_inlet value is neither 0 nor 1')
-        else:
-            ffac = 0.0
-            pfac = 1.0
-        conc_merge[k, 26] = (fims[26, k]*ffac + (pcasp[idx + 2, k]*0.2 + pcasp[idx + 3, k]*1.0 + pcasp[idx + 4, k]*0.5)*pfac)
-        if fims[27, k] > 0:
-            if cvi_inlet[k] == 1:
-                ffac = 0.35
-                pfac = 0.65
-            elif cvi_inlet[k] == 0:
-                ffac = 0.0
-                pfac = 1.0
-            else:
-                raise ValueError('cvi_inlet value is neither 0 nor 1')
-        else:
-            ffac = 0.0
-            pfac = 1.0
-        conc_merge[k, 27] = (fims[27, k]*ffac + (pcasp[idx + 4, k]*0.5 + pcasp[idx + 5, k]*0.25)*pfac)
-        if fims[28, k] > 0:
-            if cvi_inlet[k] == 1:
-                ffac = 0.2
-                pfac = 0.8
-            elif cvi_inlet[k] == 0:
-                ffac = 0.0
-                pfac = 1.0
-            else:
-                raise ValueError('cvi_inlet value is neither 0 nor 1')
-        else:
-            ffac = 0.0
-            pfac = 1.0
-        conc_merge[k, 28] = (fims[28, k]*ffac + (pcasp[idx + 5, k]*0.5)*pfac)
-        if fims[29, k] > 0:
-            if cvi_inlet[k] == 1:
-                ffac = 0.05
-                pfac = 0.95
-            elif cvi_inlet[k] == 0:
-                ffac = 0.0
-                pfac = 1.0
-            else:
-                raise ValueError('cvi_inlet value is neither 0 nor 1')
-        else:
-            ffac = 0.0
-            pfac = 1.0
-        conc_merge[k, 29] = (fims[29, k]*ffac + (pcasp[idx + 5, k]*0.25 + pcasp[idx + 6, k]*0.25)*pfac)
-        conc_merge[k, 30] = pcasp[idx + 6, k]*0.75
-        # using PCASP for upper bins
-        nn = 31
-        for n in range(idx + 7, nbin_pcasp):
-            conc_merge[k, nn] = pcasp[n, k]
-            nn = nn + 1
-        
-    #%% output data
-    if not os.path.exists(merged_size_path):
-        os.mkdir(merged_size_path)
-    outfile = merged_size_path + 'merged_bin_fims_pcasp_HISCALE_' + date + '.nc'
-    # define filename
-    f = Dataset(outfile, 'w', format = 'NETCDF4')
-    
-    # define dimensions
-    t = f.createDimension('time', None)  # unlimited
-    s = f.createDimension('size', nbin_merge)  # unlimited
-    
-    # create variable list
-    time_o = f.createVariable("time", "f8", ("time", ))
-    size_o = f.createVariable("size", "f8", ("size", ))
-    sizeh_o = f.createVariable("size_high", "f8", ("size", ))
-    sizel_o = f.createVariable("size_low", "f8", ("size", ))
-    lon_o = f.createVariable("lon", 'f8', ("time", ))
-    lat_o = f.createVariable("lat", 'f8', ("time", ))
-    height_o = f.createVariable("height", 'f8', ("time", ))
-    cflag_o = f.createVariable('cld_flag', 'i8', ("time", ))
-    legnum_o = f.createVariable('leg_number', 'i8', ("time", ))
-    cvi_o = f.createVariable('CVI_inlet', 'i8', ("time", ))
-    cvim_o = f.createVariable('CVI_mode', 'i8', ("time", ))
-    df_o = f.createVariable('CVI_Dilution_Factor', 'f8', ("time", ))
-    ef_o = f.createVariable('CVI_Enhancement_Factor', 'f8', ("time", ))
-    merge_o = f.createVariable('size_distribution_merged', 'f8', ("time", "size"))
-    fims_o = f.createVariable('totalnum_fims', 'f8', ("time", ))
-    pcasp_o = f.createVariable('totalnum_pcasp', 'f8', ("time", ))
-    
-    # write data
-    time_o[:] = time
-    size_o[:] = d_merge
-    sizeh_o[:] = dia_merge_h
-    sizel_o[:] = dia_merge_l
-    lon_o[:] = lon
-    lat_o[:] = lat    
-    height_o[:] = height
-    cflag_o[:] = cldflag
-    legnum_o[:] = legnum
-    cvi_o[:] = cvi_inlet
-    cvim_o[:] = np.array(cvi_mode)
-    dilution_factor[np.isnan(dilution_factor)] = -9999.
-    df_o[:] = dilution_factor
-    enhance_factor[np.isnan(enhance_factor)] = -9999.
-    ef_o[:] = enhance_factor
-    conc_merge[np.isnan(conc_merge)] = -9999.
-    conc_merge[conc_merge < 0] = -9999.
-    merge_o[:, :] = conc_merge
-    fims_total[np.isnan(fims_total)] = -9999.
-    fims_total[fims_total < 0] = -9999.
-    fims_o[:] = fims_total
-    pcasp_total[np.isnan(pcasp_total)] = -9999.
-    pcasp_total[pcasp_total < 0] = -9999.
-    pcasp_o[:] = pcasp_total
-    
-    # attributes
-    time_o.units = "seconds since " + date[0:4] + '-' + date[4:6] + '-' + date[6:8] + " 00:00:00"
-    size_o.units = 'um'
-    size_o.long_name = 'center of size bin'
-    sizeh_o.units = 'um'
-    sizeh_o.long_name = 'upper bound of size bin'
-    sizel_o.units = 'um'
-    sizel_o.long_name = 'lower bound of size bin'
-    lon_o.units = 'degree east'
-    lon_o.long_name = 'Longitude'
-    lat_o.units = 'degree north'
-    lat_o.long_name = 'Latitude'
-    height_o.units = 'm MSL'
-    height_o.long_name = 'height'
-    cflag_o.units = 'N/A'
-    cflag_o.long_name = 'cloud flag'
-    cflag_o.description = '1-cloud; 0-no cloud'
-    legnum_o.units = 'N/A'
-    legnum_o.long_name = 'leg number'
-    cvi_o.units = 'N/A'
-    cvi_o.long_name = 'CVI inlet status'
-    cvi_o.description = '0-CVI inlet on; 1-Isokinetic inlet on'
-    cvim_o.units = 'N/A'
-    cvim_o.long_name = 'CVI mode flag'
-    cvim_o.description = '0: CVI mode; 1: under-kinetic; -1: transition'
-    df_o.units = 'N/A'
-    df_o.long_name = 'CVI Dilution Factor'
-    df_o.description = 'Dilution Factor after under-kinetic mode. Some measurements such as AMS, need to divide by this number'
-    ef_o.units = 'N/A'
-    ef_o.long_name = 'CVI Enhancement Factor'
-    ef_o.description = 'Enhancement Factor after CVI mode. Some measurements such as AMS, need to divide by this number'
-    merge_o.units = '#/cm3'
-    merge_o.long_name = 'merged size distribution'
-    fims_o.units = '#/cm3'
-    fims_o.long_name = 'total aerosol concentration from FIMS'
-    pcasp_o.units = '#/cm3'
-    pcasp_o.long_name = 'total aerosol concentration from PCASP'
-    
-    # global attributes
-    import time as ttt
-    f.description = "Merged size distribution from FIMS and PCASP"
-    f.create_time = ttt.ctime(ttt.time())
-    
-    f.close()
-    
diff --git a/python/preprocessing/prep_obs_mergesize_HiScale.py b/python/preprocessing/prep_obs_mergesize_HiScale.py
deleted file mode 100644
index d522900..0000000
--- a/python/preprocessing/prep_obs_mergesize_HiScale.py
+++ /dev/null
@@ -1,400 +0,0 @@
-# merge size distribution from FIMS and PCASP for Hi-Scale
-# revised from size_bin_merge.pro by Jerome Fast
-# Shuaiqi Tang
-# 2020.10.1
-
-import sys
-sys.path.insert(1,'../subroutines/')
-
-import numpy as np
-import glob
-import re
-from read_aircraft import read_fims,read_fims_bin,read_iwg1,read_pcasp, read_cvi_hiscale as read_cvi
-from time_format_change import hhmmss2sec
-from netCDF4 import Dataset
-
-#%% set data paths
-
-from settings import iwgpath, fimspath, pcasppath, cvipath, merged_size_path
-
-import os
-if not os.path.exists(merged_size_path):
-    os.makedirs(merged_size_path)
-    
-
-# %% find all data
-# lst = glob.glob(iwgpath+'aaf.iwg1001s.g1.hiscale.20160830*.a2.txt')
-lst = glob.glob(iwgpath+'*.a2.txt')
-lst.sort()
-
-# read in fims bin
-(d_fims,dmin_f,dmax_f)=read_fims_bin(fimspath+'HISCALE_FIMS_bins_R1.dat')
-# change unit to um
-d_fims = [x/1000 for x in d_fims]
-dmin_f = [x/1000 for x in dmin_f]
-dmax_f = [x/1000 for x in dmax_f]
-dlnDp_f=np.empty(len(d_fims))
-for bb in range(len(d_fims)):
-    dlnDp_f[bb]=np.log(dmax_f[bb]/dmin_f[bb])
-dlnDp_f=np.mean(dlnDp_f)
-
-for filename in lst[:]:
-    
-    # get date
-    fname=re.split('hiscale.|.a2',filename)
-    date=fname[-2]
-    print(date)
-    if date[-1]=='a':
-        flightidx=1
-    else:
-        flightidx=2
-    
-    #%% read in data
-    # IWG
-    (iwg,iwgvars)=read_iwg1(filename)
-    timelen = len(iwg)
-    # get lat, lon, height, time
-    lon=np.empty(timelen)
-    lat=np.empty(timelen)
-    height=np.empty(timelen)
-    time=np.empty(timelen)
-    cldflag=np.empty(timelen)
-    legnum=np.empty(timelen)
-    T_amb=np.empty(timelen)
-    p_amb=np.empty(timelen)
-    for t in range(timelen):
-        lat[t]=float(iwg[t][2])
-        lon[t]=float(iwg[t][3])
-        height[t]=float(iwg[t][4])
-        T_amb[t]=float(iwg[t][20])
-        p_amb[t]=float(iwg[t][23])
-        cldflag[t]=int(iwg[t][35])
-        legnum[t]=int(iwg[t][-1])
-        timestr=iwg[t][1].split(' ')
-        time[t]=hhmmss2sec(timestr[1])
-    datestr=timestr[0]
-
-    # FIMS
-    filename_f=glob.glob(fimspath+'FIMS_G1_'+date[0:8]+'*'+str(flightidx)+'_HISCALE_001s.ict')
-    # read in data
-    if len(filename_f)==1:
-        (data0,fimslist)=read_fims(filename_f[0])
-        # remove some unrealistic data    
-        fims2=data0[1:-2,:]
-        data2=data0[1:-2,:]
-        data2[np.isnan(data2)]=1e8
-        data2[:,data2[0,:]>1e4]=1e8
-        data2[np.logical_or(data2<0,data2>1e4)]=np.nan
-        data0[1:-2,:]=data2
-        time_fims = data0[0,:]
-        # change data from #/dlnDp to number
-        data2=data0[1:-2,:]*dlnDp_f
-        fims = np.empty([30,len(time)])
-        for ii in range(30):
-            fims[ii,:] = np.interp(time,time_fims,data2[ii,:])
-        idx=np.logical_or(time>time_fims[-1],time<time_fims[0])
-        fims[:,idx]=np.nan
-    elif len(filename_f)==0:
-        time_fims=time
-        fims=np.nan*np.empty([len(d_fims),len(time)])
-    else:
-        print('find too many files, check: ')
-        print(filename_f)
-        error
-    fims_total = np.nansum(fims,0)
-    fims_total[fims_total<=0]=np.nan
-
-    # PCASP    
-    filename_p=glob.glob(pcasppath+'pcasp_g1_'+date[0:8]+'*'+str(flightidx)+'_hiscale001s.ict.txt')
-    if date[4:6]=='04' or date[4:6]=='05':
-        binlen=27
-        dmax_p=[130,140,150,160,170,180,200,220,240,260,280,300,400,500,\
-                600,800,1000,1200,1400,1600,1800,2000,2200,2400,2600,2800,3000]
-        dmin_p=[120,130,140,150,160,170,180,200,220,240,260,280,300,400,500,\
-                600,800,1000,1200,1400,1600,1800,2000,2200,2400,2600,2800]
-    elif date[4:6]=='08' or date[4:6]=='09':
-        binlen=30
-        dmax_p=[100,110,120,130,140,150,160,170,180,200,220,240,260,280,300,\
-                400,500,600,800,1000,1200,1400,1600,1800,2000,2200,2400,2600,2800,3000]
-        dmin_p=[90,100,110,120,130,140,150,160,170,180,200,220,240,260,280,300,\
-                400,500,600,800,1000,1200,1400,1600,1800,2000,2200,2400,2600,2800]
-    dmin_p = [x/1000 for x in dmin_p]
-    dmax_p = [x/1000 for x in dmax_p]
-    # read in data
-    if len(filename_p)==1:
-        (data0,pcasplist)=read_pcasp(filename_p[0])
-        pcasp2=data0[1:-5,:]
-        time_pcasp=data0[0,:]
-        d_pcasp=[float(i) for i in pcasplist[1:-5]]
-        pcasp=data0[1:-5,:]
-        flag=data0[-2,:]
-        pcasp_total = data0[-5,:]
-        # remove some questionable data
-        # pcasp[np.isnan(pcasp)]=-9999
-        # pcasp[np.logical_or(pcasp<=0,pcasp>1e6)]=np.nan
-        pcasp[:,flag!=0]=np.nan
-        pcasp[:,cldflag==1]=np.nan
-        if all(time_pcasp==time)==False:
-            print('time is not consistent for PCASP')
-            error
-    elif len(filename_p)==0:
-        time_pcasp=time
-        d_pcasp=[(dmin_p[x]+dmax_p[x])/2 for x in range(len(dmin_p))]
-        pcasp=np.nan*np.empty([len(d_pcasp),len(time)])
-    else:
-        print('find too many files, check: ')
-        print(filename_p)
-        error
-    # !! PCASP data is for standard T and p (Conc=Conc_orig*[(1013.25/Pamb)*(Tamb/293.15)]), change to ambient T/p
-    pcasp2=np.array(pcasp)
-    for tt in range(len(time)):
-        pcasp[:,tt] = pcasp[:,tt]/((1013.25/p_amb[tt])*((T_amb[tt]+273.15)/293.15))
-        
-        
-    # CVI
-    filename_c=glob.glob(cvipath+'CVI_G1_'+date[0:8]+'*R4_HISCALE_001s.ict.txt')
-    filename_c.sort()
-    # read in data
-    if len(filename_c)==1 or len(filename_c)==2:
-        (cvi,cvilist)=read_cvi(filename_c[flightidx-1])
-        time_cvi = cvi[0,:]
-        cvi_inlet=cvi[-1,:]
-        enhance_factor=cvi[2,:]
-        enhance_factor[enhance_factor<-9000]=np.nan
-        dilution_factor=cvi[3,:]
-        dilution_factor[dilution_factor<-9000]=np.nan
-        cvi_mode=cvi[4,:]
-        cvi_qc=cvi[5,:]
-        if all(time_cvi==time)==False:
-            print('time is not consistent for CVI')
-            error
-    elif len(filename_c)==0:
-        time_cvi=time
-        cvi_inlet=np.nan*np.empty([len(time)])
-        cvi_mode=np.nan*np.empty([len(time)])
-        dilution_factor=np.nan*np.empty([len(time)])
-        enhance_factor=np.nan*np.empty([len(time)])
-        cvi_qc=np.nan*np.empty([len(time)])
-    else:
-        print('find too many files, check: ')
-        print(filename_c)
-        error
-    cvi_mode[cvi_qc!=0]=-9999
-        
-    #%% now merge fims and pcasp
-    timelen = len(time)
-    nbin_merge=44
-    nbin_fims=len(d_fims)
-    nbin_pcasp=len(d_pcasp)
-    # low and high range of each bin
-    dia_merge_l=np.empty(nbin_merge)
-    dia_merge_h=np.empty(nbin_merge)
-    for n in range(nbin_fims):
-        dia_merge_l[n] = dmin_f[n]
-        dia_merge_h[n] = dmax_f[n]
-    idx=dmax_p.index(0.5)
-    # use upper range (0.425) of FIMS as low bound and 0.5 of PCASP as high bound
-    dia_merge_l[nbin_fims] = dmax_f[-1]
-    dia_merge_h[nbin_fims] = dmax_p[idx]
-    for n in range(idx+1,nbin_pcasp):
-        dia_merge_l[nbin_fims+n-idx] = dmin_p[n]
-        dia_merge_h[nbin_fims+n-idx] = dmax_p[n]
-    d_merge = (dia_merge_h + dia_merge_l)/2
-    
-    # merged concentration
-    conc_merge = np.empty([timelen,nbin_merge])
-    fims[np.isnan(fims)]=-9999.   # do not treat missing as NaN. treat -9999
-    for k in range(timelen):
-        # use fims data up to d_fims[23] (~0.19 um)
-        for n in range(23+1):
-            if cvi_inlet[k]==0:     # in Jerome's code it is 0. looks like it should be 1 (CVI in cloud)
-                fims[n,k]=-9999
-            conc_merge[k,n]=fims[n,k]
-        # overlapping bins
-        idx=dmin_p.index(0.2)   # start merging size. corresponding to 10 in IOP2
-        if fims[24,k]>0:
-            if cvi_inlet[k]==1:
-                ffac=0.95
-                pfac=0.05
-            elif cvi_inlet[k]==0:
-                ffac=0.0
-                pfac=1.0
-            else:
-                error
-        else:
-            ffac=0.0
-            pfac=1.0
-        conc_merge[k,24] = (fims[24,k]*ffac + (pcasp[idx,k]*1.0 + pcasp[idx+1,k]*0.25)*pfac)
-        if fims[25,k]>0:
-            if cvi_inlet[k]==1:
-                ffac=0.8
-                pfac=0.2
-            elif cvi_inlet[k]==0:
-                ffac=0.0
-                pfac=1.0
-            else:
-                error
-        else:
-            ffac=0.0
-            pfac=1.0
-        conc_merge[k,25] = (fims[25,k]*ffac + (pcasp[idx+1,k]*0.75 + pcasp[idx+2,k]*0.8)*pfac)
-        if fims[26,k]>0:
-            if cvi_inlet[k]==1:
-                ffac=0.65
-                pfac=0.35
-            elif cvi_inlet[k]==0:
-                ffac=0.0
-                pfac=1.0
-            else:
-                error
-        else:
-            ffac=0.0
-            pfac=1.0
-        conc_merge[k,26] = (fims[26,k]*ffac + (pcasp[idx+2,k]*0.2 + pcasp[idx+3,k]*1.0 + pcasp[idx+4,k]*0.5)*pfac)
-        if fims[27,k]>0:
-            if cvi_inlet[k]==1:
-                ffac=0.35
-                pfac=0.65
-            elif cvi_inlet[k]==0:
-                ffac=0.0
-                pfac=1.0
-            else:
-                error
-        else:
-            ffac=0.0
-            pfac=1.0
-        conc_merge[k,27] = (fims[27,k]*ffac + (pcasp[idx+4,k]*0.5 + pcasp[idx+5,k]*0.25)*pfac)
-        if fims[28,k]>0:
-            if cvi_inlet[k]==1:
-                ffac=0.2
-                pfac=0.8
-            elif cvi_inlet[k]==0:
-                ffac=0.0
-                pfac=1.0
-            else:
-                error
-        else:
-            ffac=0.0
-            pfac=1.0
-        conc_merge[k,28] = (fims[28,k]*ffac + (pcasp[idx+5,k]*0.5)*pfac)
-        if fims[29,k]>0:
-            if cvi_inlet[k]==1:
-                ffac=0.05
-                pfac=0.95
-            elif cvi_inlet[k]==0:
-                ffac=0.0
-                pfac=1.0
-            else:
-                error
-        else:
-            ffac=0.0
-            pfac=1.0
-        conc_merge[k,29] = (fims[29,k]*ffac + (pcasp[idx+5,k]*0.25 + pcasp[idx+6,k]*0.25)*pfac)
-        conc_merge[k,30] = pcasp[idx+6,k]*0.75
-        # using PCASP for upper bins
-        nn=31
-        for n in range(idx+7,nbin_pcasp):
-            conc_merge[k,nn] = pcasp[n,k]
-            nn=nn+1
-        
-    #%% output data
-    import os
-    if not os.path.exists(merged_size_path):
-        os.mkdir(merged_size_path)
-    outfile=merged_size_path+'merged_bin_fims_pcasp_HiScale_'+date+'.nc'
-    # define filename
-    f = Dataset(outfile, 'w', format='NETCDF4')
-    
-    # define dimensions
-    t = f.createDimension('time', None)  # unlimited
-    s = f.createDimension('size', nbin_merge)  # unlimited
-    
-    # create variable list
-    time_o = f.createVariable("time","f8",("time",))
-    size_o = f.createVariable("size","f8",("size",))
-    sizeh_o = f.createVariable("size_high","f8",("size",))
-    sizel_o = f.createVariable("size_low","f8",("size",))
-    lon_o = f.createVariable("lon",'f8',("time",))
-    lat_o = f.createVariable("lat",'f8',("time",))
-    height_o = f.createVariable("height",'f8',("time",))
-    cflag_o = f.createVariable('cld_flag','i8',("time",))
-    legnum_o = f.createVariable('leg_number','i8',("time",))
-    cvi_o = f.createVariable('CVI_inlet','i8',("time",))
-    cvim_o = f.createVariable('CVI_mode','i8',("time",))
-    df_o = f.createVariable('CVI_Dilution_Factor','f8',("time",))
-    ef_o = f.createVariable('CVI_Enhancement_Factor','f8',("time",))
-    merge_o = f.createVariable('size_distribution_merged','f8',("time","size"))
-    fims_o = f.createVariable('totalnum_fims','f8',("time",))
-    pcasp_o = f.createVariable('totalnum_pcasp','f8',("time",))
-    
-    # write data
-    time_o[:] = time
-    size_o[:] = d_merge
-    sizeh_o[:] = dia_merge_h
-    sizel_o[:] = dia_merge_l
-    lon_o[:] = lon
-    lat_o[:] = lat    
-    height_o[:] = height
-    cflag_o[:] = cldflag
-    legnum_o[:] = legnum
-    cvi_o[:] = cvi_inlet
-    cvim_o[:] = np.array(cvi_mode)
-    dilution_factor[np.isnan(dilution_factor)]=-9999.
-    df_o[:] = dilution_factor
-    enhance_factor[np.isnan(enhance_factor)]=-9999.
-    ef_o[:] = enhance_factor
-    conc_merge[np.isnan(conc_merge)]=-9999.
-    conc_merge[conc_merge<0]=-9999.
-    merge_o[:,:] = conc_merge
-    fims_total[np.isnan(fims_total)]=-9999.
-    fims_total[fims_total<0]=-9999.
-    fims_o[:] = fims_total
-    pcasp_total[np.isnan(pcasp_total)]=-9999.
-    pcasp_total[pcasp_total<0]=-9999.
-    pcasp_o[:] = pcasp_total
-    
-    # attributes
-    time_o.units = "seconds since "+date[0:4]+'-'+date[4:6]+'-'+date[6:8]+" 00:00:00"
-    size_o.units = 'um'
-    size_o.long_name = 'center of size bin'
-    sizeh_o.units = 'um'
-    sizeh_o.long_name = 'upper bound of size bin'
-    sizel_o.units = 'um'
-    sizel_o.long_name = 'lower bound of size bin'
-    lon_o.units = 'degree east'
-    lon_o.long_name = 'Longitude'
-    lat_o.units = 'degree north'
-    lat_o.long_name = 'Latitude'
-    height_o.units = 'm MSL'
-    height_o.long_name = 'height'
-    cflag_o.units = 'N/A'
-    cflag_o.long_name = 'cloud flag'
-    cflag_o.description = '1-cloud; 0-no cloud'
-    legnum_o.units = 'N/A'
-    legnum_o.long_name = 'leg number'
-    cvi_o.units = 'N/A'
-    cvi_o.long_name = 'CVI inlet status'
-    cvi_o.description = '0-CVI inlet on; 1-Isokinetic inlet on'
-    cvim_o.units = 'N/A'
-    cvim_o.long_name = 'CVI mode flag'
-    cvim_o.description = '0: CVI mode; 1: under-kinetic; -1: transition'
-    df_o.units = 'N/A'
-    df_o.long_name = 'CVI Dilution Factor'
-    df_o.description = 'Dilution Factor after under-kinetic mode. Some measurements such as AMS, need to divide by this number'
-    ef_o.units = 'N/A'
-    ef_o.long_name = 'CVI Enhancement Factor'
-    ef_o.description = 'Enhancement Factor after CVI mode. Some measurements such as AMS, need to divide by this number'
-    merge_o.units = '#/cm3'
-    merge_o.long_name = 'merged size distribution'
-    fims_o.units = '#/cm3'
-    fims_o.long_name = 'total aerosol concentration from FIMS'
-    pcasp_o.units = '#/cm3'
-    pcasp_o.long_name = 'total aerosol concentration from PCASP'
-    
-    # global attributes
-    import time as ttt
-    f.description = "Merged size distribution from FIMS and PCASP"
-    f.history = "Created by Shuaiqi at " + ttt.ctime(ttt.time())
-    
-    f.close()
-    
\ No newline at end of file
diff --git a/python/preprocessing/settings.py b/python/preprocessing/settings.py
deleted file mode 100644
index e4f7929..0000000
--- a/python/preprocessing/settings.py
+++ /dev/null
@@ -1,236 +0,0 @@
-# settings of the aerosol diagnostic package
-
-import numpy as np
-
-#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# global settings
-
-############ these settings will be replaced by the settings in scripts_*.csh #############
-# set field campaign name. More settings on specific field campaigns are in next section
-campaign = 'HISCALE'
-# set model names. up to three
-Model_List = ['EAMv1_CONUS_RRM']
-# set line colors for each model. corresponding to the Model_List
-color_model = ['b','g']
-# set IOP that the statistics, pdf and percentiles are averaged for. Only available for HISCALE and ACEENA
-# IOP1/IOP2 
-IOP = 'IOP2'
-############ these settings will be replaced by the settings in scripts_*.csh #############
-
-
-# path of the diagnostic package
-package_path = '../../'
-
-# path of E3SM model data (h3) for preprocessing. list with the same length of Model_List
-E3SM_h3_path=[]
-E3SM_h3_filehead=[]     # filename before .cam.h3.yyyy-mm-dd.00000.nc
-for mm in Model_List:
-    E3SM_h3_path.append('/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_h3/')
-    if campaign=='MAGIC':
-        E3SM_h3_filehead.append(mm+'_2012-2013')
-    else:
-#        E3SM_h3_filehead.append(mm+'_2014-2018')
-        E3SM_h3_filehead.append(mm)
-    #E3SM_h3_path.append('/qfs/projects/eagles/zhan524/simulations/compy_F20TRC5-CMIP6_ne30_EG1_R2_'+mm+'/h3/')
-    #E3SM_h3_filehead.append('compy_F20TRC5-CMIP6_ne30_EG1_R2_'+mm)
-
-# path of output figures
-figpath_aircraft_timeseries = package_path+'figures/'+campaign+'/aircraft/timeseries/'
-figpath_aircraft_statistics = package_path+'figures/'+campaign+'/aircraft/statistics/'
-figpath_ship_timeseries = package_path+'figures/'+campaign+'/ship/timeseries/'
-figpath_ship_statistics = package_path+'figures/'+campaign+'/ship/statistics/'
-figpath_sfc_timeseries = package_path+'figures/'+campaign+'/surface/timeseries/'
-figpath_sfc_statistics = package_path+'figures/'+campaign+'/surface/statistics/'
-figpath_profile_timeseries = package_path+'figures/'+campaign+'/profile/timeseries/'
-
-
-#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# settings for different field campaigns
-
-# set location and time information
-if campaign=='HISCALE':
-    site='SGP'
-    # lat/lon at SGP
-    lat0 = 36.6059
-    lon0 = 360-97.48792     # 0-360
-    # bin of flight heights to calculate percentiles
-    height_bin = np.arange(300,4300,300)
-    # height_bin = np.arange(400,4300,200)
-    
-    # time periods for IOPs. needed in preprocessing of surface data
-    if IOP=='IOP1':
-        start_date='2016-04-25'
-        end_date='2016-05-29'
-    elif IOP=='IOP2':
-        start_date='2016-08-27'
-        end_date='2016-09-22'
-        
-    # observational data path. 
-    # aircraf measurements merged_bin data are used for all plot_flight_*.py to provide flight/cloud/CVI info
-    merged_size_path=package_path+'data/'+campaign+'/obs/aircraft/merged_bin/'
-    iwgpath = package_path+'data/'+campaign+'/obs/aircraft/mei-iwg1/'
-    fimspath = package_path+'data/'+campaign+'/obs/aircraft/wang-fims/'
-    pcasppath = package_path+'data/'+campaign+'/obs/aircraft/tomlinson-pcasp/'
-    cvipath = package_path+'data/'+campaign+'/obs/aircraft/pekour-cvi/'
-    cpcpath = package_path+'data/'+campaign+'/obs/aircraft/mei-cpc/'
-    ccnpath = package_path+'data/'+campaign+'/obs/aircraft/mei-ccn/'
-    amspath = package_path+'data/'+campaign+'/obs/aircraft/shilling-ams/'
-    wcmpath = package_path+'data/'+campaign+'/obs/aircraft/matthews-wcm/'
-    # surface measurements
-    smps_pnnl_path = package_path+'data/'+campaign+'/obs/surface/pnnl-smps/'
-    smps_bnl_path = package_path+'data/'+campaign+'/obs/surface/bnl-smps/'
-    nanosmps_bnl_path = package_path+'data/'+campaign+'/obs/surface/bnl-nanosmps/'
-    uhsassfcpath = package_path+'data/'+campaign+'/obs/surface/arm-uhsas/'
-    cpcsfcpath = package_path+'data/'+campaign+'/obs/surface/arm-cpc/'
-    cpcusfcpath = package_path+'data/'+campaign+'/obs/surface/arm-cpcu/'
-    ccnsfcpath = package_path+'data/'+campaign+'/obs/surface/arm-ccn/'
-    metpath = package_path+'data/'+campaign+'/obs/surface/arm-met/'
-    acsmpath = package_path+'data/'+campaign+'/obs/surface/arm_acsm/'
-    # vertical profile measurements
-    armbepath = package_path+'data/'+campaign+'/obs/profile/sgparmbecldrad/'
-    
-    # PBLH data needed for plot_flight_pdf_percentile_SeparatePBLH_hiscale.py only
-    pblhpath = package_path+'data/'+campaign+'/obs/profile/arm-pblh/'
-    dlpath = package_path+'data/'+campaign+'/obs/profile/dl-pblh/'
-    
-    # model path
-    # pre-processed model path
-    E3SM_sfc_path = package_path+'data/'+campaign+'/model/surface/'
-    E3SM_aircraft_path = package_path+'data/'+campaign+'/model/flighttrack/'
-    E3SM_profile_path = package_path+'data/'+campaign+'/model/profile/'
-    
-    
-elif campaign=='ACEENA':
-    site='ENA'
-    # lat/lon for ENA
-    lat0 = 39.09527
-    lon0 = 360-28.0339
-    # bin of flight heights to calculate percentiles
-    height_bin = np.arange(100,4300,300)
-    
-    # time periods for IOPs. needed in preprocessing of surface data
-    if IOP=='IOP1':
-        start_date='2017-06-20'
-        end_date='2017-07-20'
-    elif IOP=='IOP2':
-        start_date='2018-01-21'
-        end_date='2018-02-19'
-    
-    # observational data path. 
-    # aircraf measurements merged_bin data are used for all plot_flight_*.py to provide flight/cloud/CVI info
-    merged_size_path=package_path+'data/'+campaign+'/obs/aircraft/merged_bin/'
-    iwgpath = package_path+'data/'+campaign+'/obs/aircraft/IWG/'
-    fimspath = package_path+'data/'+campaign+'/obs/aircraft/FIMS/'
-    pcasppath = package_path+'data/'+campaign+'/obs/aircraft/pcasp_g1/'
-    cvipath = package_path+'data/'+campaign+'/obs/aircraft/inletcvi/'
-    opcpath = package_path+'data/'+campaign+'/obs/aircraft/opciso/'
-    cpcpath = package_path+'data/'+campaign+'/obs/aircraft/cpc_aaf/'
-    ccnpath = package_path+'data/'+campaign+'/obs/aircraft/ccn_aaf/'
-    amspath = package_path+'data/'+campaign+'/obs/aircraft/shilling-hrfams/'
-    wcmpath = package_path+'data/'+campaign+'/obs/aircraft/wcm_ACEENA/'
-    # surface measurements
-    uhsassfcpath = package_path+'data/'+campaign+'/obs/surface/arm_uhsas/'
-    cpcsfcpath = package_path+'data/'+campaign+'/obs/surface/arm_cpcf/'
-    cpcusfcpath = 'N/A'
-    ccnsfcpath = package_path+'data/'+campaign+'/obs/surface/arm_aosccn1/'
-    metpath = package_path+'data/'+campaign+'/obs/surface/arm_met/'
-    acsmpath = package_path+'data/'+campaign+'/obs/surface/arm_acsm/'
-    # vertical profile measurements
-    armbepath = package_path+'data/'+campaign+'/obs/profile/enaarmbecldrad/'
-    
-    # model path
-    # pre-processed model path    
-    E3SM_sfc_path = package_path+'data/'+campaign+'/model/surface/'
-    E3SM_aircraft_path = package_path+'data/'+campaign+'/model/flighttrack/'
-    E3SM_profile_path = package_path+'data/'+campaign+'/model/profile/'
-    
-elif campaign=='MAGIC':
-    site='MAG'
-    
-    # bin of latitude to calculate ship track composite
-    latbin = np.arange(21.5,34,1)
-    
-    # reference lat/lon
-    lat0=30.
-    lon0=230.
-    
-    # observational data path. 
-    # ship measurements
-    shipmetpath=package_path+'data/'+campaign+'/obs/ship/raynolds-marmet/'
-    shipccnpath=package_path+'data/'+campaign+'/obs/ship/magaosccn100M1.a1/'
-    shipcpcpath=package_path+'data/'+campaign+'/obs/ship/magaoscpcfM1.a1/'
-    shipmwrpath=package_path+'data/'+campaign+'/obs/ship/magmwrret1liljclouM1.s2/'
-    shipuhsaspath=package_path+'data/'+campaign+'/obs/ship/magaosuhsasM1.a1/'
-    
-    # model path
-    # pre-processed model path    
-    E3SM_ship_path = package_path+'data/'+campaign+'/model/shiptrack/'
-    E3SM_profile_path = package_path+'data/'+campaign+'/model/profile/'
-    
-elif campaign=='MARCUS':
-    site='MAR'
-    
-    # bin of latitude to calculate ship track composite
-    latbin = np.arange(-68.5,-42,1)
-    
-    # reference lat/lon
-    lat0=-40.
-    lon0=120.
-    
-    
-    # observational data path. 
-    # ship measurements
-    shipmetpath=package_path+'data/'+campaign+'/obs/ship/maraadmetX1.b1/'
-    shipccnpath=package_path+'data/'+campaign+'/obs/ship/maraosccn1colavgM1.b1/'
-    shipcpcpath=package_path+'data/'+campaign+'/obs/ship/maraoscpcf1mM1.b1/'
-    shipmwrpath=package_path+'data/'+campaign+'/obs/ship/marmwrret1liljclouM1.s2/'
-    shipuhsaspath=package_path+'data/'+campaign+'/obs/ship/maraosuhsasM1.a1/'
-    
-    # model path
-    # pre-processed model path    
-    E3SM_ship_path = package_path+'data/'+campaign+'/model/shiptrack/'
-    E3SM_profile_path = package_path+'data/'+campaign+'/model/profile/'
-    
-elif campaign=='CSET':
-    # bin of flight heights to calculate percentiles
-    height_bin = np.arange(200,8000,400)
-    # bin of latitude to calculate composite percentiles, same as MAGIC
-    latbin = np.arange(22.5,39,1)
-    
-    # lat/lon at the airport
-    lat0 = 38.5564
-    lon0 = 360-121.3120
-    
-    # observational data path. 
-    # aircraft measurements
-    RFpath=package_path+'data/'+campaign+'/obs/aircraft/aircraft_lowrate/'
-    ccnpath='N/A'
-    
-    # model path
-    # pre-processed model path    
-    E3SM_aircraft_path = package_path+'data/'+campaign+'/model/flighttrack/'
-    E3SM_profile_path = package_path+'data/'+campaign+'/model/profile/'
-    
-elif campaign=='SOCRATES':
-    # bin of flight heights to calculate percentiles
-    height_bin = np.arange(200,8000,400)
-    # bin of latitude to calculate composite percentiles
-    latbin = np.arange(-63.5,-42,1)
-    # height_bin = np.arange(200,7000,400)
-    # lat/lon at the airport
-    lat0 = -42.8371
-    lon0 = 147.5054
-    
-    # observational data path. 
-    # aircraft measurements
-    RFpath=package_path+'data/'+campaign+'/obs/aircraft/aircraft_lowrate/'
-    ccnpath=package_path+'data/'+campaign+'/obs/aircraft/CCN/'
-    
-    # model path
-    # pre-processed model path    
-    E3SM_aircraft_path = package_path+'data/'+campaign+'/model/flighttrack/'
-    E3SM_profile_path = package_path+'data/'+campaign+'/model/profile/'
-    
-else:
-    raise ValueError("does not recognize this campaign: "+campaign)
-
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..3483feb
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+pip
+matplotlib
+numpy
+scipy
+sphinx
+lxml
+conda-build
+pytest
+pytest-shutil
+ipython
+black
diff --git a/scripts/run_plot.py b/scripts/run_plot.py
new file mode 100644
index 0000000..e33c75e
--- /dev/null
+++ b/scripts/run_plot.py
@@ -0,0 +1,220 @@
+"""
+script to generate all plots
+
+Instruction:
+    edit the first section "user-specified settings" for 
+"""
+import numpy as np
+from esmac_diags.plotting import *
+
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# user-specified settings
+settings = {}
+
+# set field campaign name. More settings on specific field campaigns are in next section
+# HISCALE, ACEENA, CSET, SOCRATES, MAGIC, MARCUS
+settings['campaign'] = 'HISCALE'
+
+# set model names. 
+settings['Model_List'] = ['E3SMv1']
+#settings['Model_List'] = ['E3SMv1','EAMv1_CONUS_RRM']
+
+# set line colors for each model. corresponding to the Model_List
+settings['color_model'] = ['r','b','g']
+
+# set field campaign IOPs. Only used for HISCALE and ACEENA. 
+# IOP1/IOP2 
+settings['IOP'] = 'IOP2'
+
+# Please specify the path of your data and for figure output
+settings['figpath'] = '/global/cscratch1/sd/sqtang/EAGLES/Aerosol_diag_pkg/figures/'
+settings['datapath'] = '/global/cscratch1/sd/sqtang/EAGLES/Aerosol_diag_pkg/data/'
+
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+def add_other_setting(settings):
+    """
+    add other settings for different field campaigns
+
+    Parameters
+    ----------
+    settings : dictionary
+        all setting variables
+
+    Returns
+    -------
+    settings
+
+    """
+    figpath = settings['figpath']+settings['campaign']
+    datapath = settings['datapath']+settings['campaign']
+    
+    # path of output figures
+    settings['figpath_aircraft_timeseries'] = figpath+'/aircraft/timeseries/'
+    settings['figpath_aircraft_statistics'] = figpath+'/aircraft/statistics/'
+    settings['figpath_ship_timeseries'] = figpath+'/ship/timeseries/'
+    settings['figpath_ship_statistics'] = figpath+'/ship/statistics/'
+    settings['figpath_sfc_timeseries'] = figpath+'/surface/timeseries/'
+    settings['figpath_sfc_statistics'] = figpath+'/surface/statistics/'
+    settings['figpath_profile_timeseries'] = figpath+'/profile/timeseries/'
+    
+    # other settings for different field campaigns
+    if settings['campaign']=='HISCALE':
+        settings['site']='SGP'
+        # lat/lon at SGP
+        settings['lat0'] = 36.6059
+        settings['lon0'] = 360-97.48792     # 0-360
+        # bin of flight heights to calculate percentiles
+        settings['height_bin'] = np.arange(300,4300,300)
+        # time periods for IOPs. needed in preprocessing of surface data
+        if settings['IOP']=='IOP1':
+            settings['start_date']='2016-04-25'
+            settings['end_date']='2016-05-29'
+        elif settings['IOP']=='IOP2':
+            settings['start_date']='2016-08-27'
+            settings['end_date']='2016-09-22'
+        #### observational data path. ######
+        settings['merged_size_path'] = datapath+'/obs/aircraft/merged_bin/'
+        settings['iwgpath'] = datapath+'/obs/aircraft/mei-iwg1/'
+        settings['fimspath'] = datapath+'/obs/aircraft/wang-fims/'
+        settings['pcasppath'] = datapath+'/obs/aircraft/tomlinson-pcasp/'
+        settings['cvipath'] = datapath+'/obs/aircraft/pekour-cvi/'
+        settings['cpcpath'] = datapath+'/obs/aircraft/mei-cpc/'
+        settings['ccnpath'] = datapath+'/obs/aircraft/mei-ccn/'
+        settings['amspath'] = datapath+'/obs/aircraft/shilling-ams/'
+        settings['wcmpath'] = datapath+'/obs/aircraft/matthews-wcm/'
+        # surface measurements
+        settings['smps_pnnl_path'] = datapath+'/obs/surface/pnnl-smps/'
+        settings['smps_bnl_path'] = datapath+'/obs/surface/bnl-smps/'
+        settings['nanosmps_bnl_path'] = datapath+'/obs/surface/bnl-nanosmps/'
+        settings['uhsassfcpath'] = datapath+'/obs/surface/arm-uhsas/'
+        settings['cpcsfcpath'] = datapath+'/obs/surface/arm-cpc/'
+        settings['cpcusfcpath'] = datapath+'/obs/surface/arm-cpcu/'
+        settings['ccnsfcpath'] = datapath+'/obs/surface/arm-ccn/'
+        settings['metpath'] = datapath+'/obs/surface/arm-met/'
+        settings['acsmpath'] = datapath+'/obs/surface/arm_acsm/'
+        # vertical profile measurements
+        settings['armbepath'] = datapath+'/obs/profile/sgparmbecldrad/'
+        # PBLH data needed for plot_flight_pdf_percentile_SeparatePBLH_hiscale.py only
+        settings['pblhpath'] = datapath+'/obs/profile/arm-pblh/'
+        settings['dlpath'] = datapath+'/obs/profile/dl-pblh/'
+        #### pre-processed model data path ######
+        settings['E3SM_sfc_path'] = datapath+'/model/surface/'
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ########################
+        
+    elif settings['campaign']=='ACEENA':
+        settings['site']='ENA'
+        # lat/lon for ENA
+        settings['lat0'] = 39.09527
+        settings['lon0'] = 360-28.0339
+        # bin of flight heights to calculate percentiles
+        settings['height_bin'] = np.arange(100,4300,300)
+        # time periods for IOPs. needed in preprocessing of surface data
+        if settings['IOP']=='IOP1':
+            settings['start_date']='2017-06-20'
+            settings['end_date']='2017-07-20'
+        elif settings['IOP']=='IOP2':
+            settings['start_date']='2018-01-21'
+            settings['end_date']='2018-02-19'
+        #### observational data path. ######
+        settings['merged_size_path']=datapath+'/obs/aircraft/merged_bin/'
+        settings['iwgpath'] = datapath+'/obs/aircraft/IWG/'
+        settings['cvipath'] = datapath+'/obs/aircraft/inletcvi/'
+        settings['amspath'] = datapath+'/obs/aircraft/shilling-hrfams/'
+        settings['fimspath'] = datapath+'/obs/aircraft/FIMS/'
+        settings['pcasppath'] = datapath+'/obs/aircraft/pcasp_g1/'
+        settings['opcpath'] = datapath+'/obs/aircraft/opciso/'
+        settings['cpcpath'] = datapath+'/obs/aircraft/cpc_aaf/'
+        settings['ccnpath'] = datapath+'/obs/aircraft/ccn_aaf/'
+        settings['amspath'] = datapath+'/obs/aircraft/shilling-hrfams/'
+        settings['wcmpath'] = datapath+'/obs/aircraft/wcm_ACEENA/'
+        # surface measurements
+        settings['uhsassfcpath'] = datapath+'/obs/surface/arm_uhsas/'
+        settings['cpcsfcpath'] = datapath+'/obs/surface/arm_cpcf/'
+        settings['cpcusfcpath'] = 'N/A'
+        settings['ccnsfcpath'] = datapath+'/obs/surface/arm_aosccn1/'
+        settings['metpath'] = datapath+'/obs/surface/arm_met/'
+        settings['acsmpath'] = datapath+'/obs/surface/arm_acsm/'
+        # vertical profile measurements
+        settings['armbepath'] = datapath+'/obs/profile/enaarmbecldrad/'
+        #### pre-processed model data path ######
+        settings['E3SM_sfc_path'] = datapath+'/model/surface/'
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        #########
+    elif settings['campaign']=='MAGIC':
+        settings['site']='MAG'
+        # bin of latitude to calculate ship track composite
+        settings['latbin'] = np.arange(21.5,34,1)
+        # reference lat/lon
+        settings['lat0']=30.
+        settings['lon0']=230.
+        #### observational data path. ######
+        settings['shipmetpath'] = datapath+'/obs/ship/raynolds-marmet/'
+        settings['shipccnpath'] = datapath+'/obs/ship/magaosccn100M1.a1/'
+        settings['shipcpcpath'] = datapath+'/obs/ship/magaoscpcfM1.a1/'
+        settings['shipmwrpath'] = datapath+'/obs/ship/magmwrret1liljclouM1.s2/'
+        settings['shipuhsaspath'] = datapath+'/obs/ship/magaosuhsasM1.a1/'
+        #### pre-processed model data path ######    
+        settings['E3SM_ship_path'] = datapath+'/model/shiptrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ################
+    elif settings['campaign']=='MARCUS':
+        settings['site'] = 'MAR'
+        # bin of latitude to calculate ship track composite
+        settings['latbin'] = np.arange(-68.5,-42,1)
+        # reference lat/lon
+        settings['lat0'] = -40.
+        settings['lon0'] = 120.
+        #### observational data path. ######
+        settings['shipmetpath'] = datapath+'/obs/ship/maraadmetX1.b1/'
+        settings['shipccnpath'] = datapath+'/obs/ship/maraosccn1colavgM1.b1/'
+        settings['shipcpcpath'] = datapath+'/obs/ship/maraoscpcf1mM1.b1/'
+        settings['shipmwrpath'] = datapath+'/obs/ship/marmwrret1liljclouM1.s2/'
+        settings['shipuhsaspath'] = datapath+'/obs/ship/maraosuhsasM1.a1/'
+        #### pre-processed model data path ######    
+        settings['E3SM_ship_path'] = datapath+'/model/shiptrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ############
+    elif settings['campaign']=='CSET':
+        # bin of flight heights to calculate percentiles
+        settings['height_bin'] = np.arange(200,8000,400)
+        # bin of latitude to calculate composite percentiles, same as MAGIC
+        settings['latbin'] = np.arange(22.5,39,1)
+        # lat/lon at the airport
+        settings['lat0'] = 38.5564
+        settings['lon0'] = 360-121.3120
+        #### observational data path. ######
+        settings['RFpath'] = datapath+'/obs/aircraft/aircraft_lowrate/'
+        settings['ccnpath'] = 'N/A'
+        #### pre-processed model data path ###### 
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ###############
+    elif settings['campaign']=='SOCRATES':
+        # bin of flight heights to calculate percentiles
+        settings['height_bin'] = np.arange(200,8000,400)
+        # bin of latitude to calculate composite percentiles
+        settings['latbin'] = np.arange(-63.5,-42,1)
+        # lat/lon at the airport
+        settings['lat0'] = -42.8371
+        settings['lon0'] = 147.5054
+        #### observational data path. ######
+        settings['RFpath'] = datapath+'/obs/aircraft/aircraft_lowrate/'
+        settings['ccnpath'] = datapath+'/obs/aircraft/CCN/'
+        #### pre-processed model data path ###### 
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ################
+    else:
+        raise ValueError("does not recognize this campaign: "+settings['campaign'])
+    return(settings)
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# running command
+all_settings = add_other_setting(settings)
+plot_flight_track_height.run_plot(all_settings)
+plot_sfc_pdf_AerosolSize.run_plot(all_settings)
diff --git a/scripts/run_plot_all.py b/scripts/run_plot_all.py
new file mode 100644
index 0000000..f6e4cc3
--- /dev/null
+++ b/scripts/run_plot_all.py
@@ -0,0 +1,329 @@
+"""
+script to generate all plots
+
+Instruction:
+    edit the first section "user-specified settings" for 
+"""
+import numpy as np
+from esmac_diags.plotting import *
+
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# user-specified settings
+settings = {}
+
+# set field campaign name. More settings on specific field campaigns are in next section
+# HISCALE, ACEENA, CSET, SOCRATES, MAGIC, MARCUS
+settings['campaign'] = 'MAGIC'
+
+# set model names. 
+settings['Model_List'] = ['E3SMv1']
+# settings['Model_List'] = ['E3SMv1','EAMv1_CONUS_RRM']
+
+# set line colors for each model. corresponding to the Model_List
+settings['color_model'] = ['r','b','g']
+
+# set field campaign IOPs. Only used for HISCALE and ACEENA. 
+# IOP1/IOP2 
+settings['IOP'] = 'IOP1'
+
+########## set filepath for preprocessing. If you don't run preprocessing, ignore this part
+# path of E3SM model data (h3) for preprocessing. same length of Model_List
+# settings['E3SM_hourly_path'] = ['/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_hourly/']
+#settings['E3SM_hourly_path'] = \
+#    ['/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_hourly/', \
+#     '/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_hourly/']
+#settings['E3SM_hourly_filehead'] = ['E3SMv1','EAMv1_CONUS_RRM']
+#############
+
+# Please specify the path of your data and for figure output
+settings['figpath'] = '/global/cscratch1/sd/sqtang/EAGLES/Aerosol_diag_pkg/figures/'
+settings['datapath'] = '/global/cscratch1/sd/sqtang/EAGLES/Aerosol_diag_pkg/data/'
+
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+def add_other_setting(settings):
+    """
+    add other settings for different field campaigns
+
+    Parameters
+    ----------
+    settings : dictionary
+        all setting variables
+
+    Returns
+    -------
+    settings
+
+    """
+    figpath = settings['figpath']+settings['campaign']
+    datapath = settings['datapath']+settings['campaign']
+    
+    # path of output figures
+    settings['figpath_aircraft_timeseries'] = figpath+'/aircraft/timeseries/'
+    settings['figpath_aircraft_statistics'] = figpath+'/aircraft/statistics/'
+    settings['figpath_ship_timeseries'] = figpath+'/ship/timeseries/'
+    settings['figpath_ship_statistics'] = figpath+'/ship/statistics/'
+    settings['figpath_sfc_timeseries'] = figpath+'/surface/timeseries/'
+    settings['figpath_sfc_statistics'] = figpath+'/surface/statistics/'
+    settings['figpath_profile_timeseries'] = figpath+'/profile/timeseries/'
+    
+    # other settings for different field campaigns
+    if settings['campaign']=='HISCALE':
+        settings['site']='SGP'
+        # lat/lon at SGP
+        settings['lat0'] = 36.6059
+        settings['lon0'] = 360-97.48792     # 0-360
+        # bin of flight heights to calculate percentiles
+        settings['height_bin'] = np.arange(300,4300,300)
+        # time periods for IOPs. needed in preprocessing of surface data
+        if settings['IOP']=='IOP1':
+            settings['start_date']='2016-04-25'
+            settings['end_date']='2016-05-29'
+        elif settings['IOP']=='IOP2':
+            settings['start_date']='2016-08-27'
+            settings['end_date']='2016-09-22'
+        #### observational data path. ######
+        settings['merged_size_path'] = datapath+'/obs/aircraft/merged_bin/'
+        settings['iwgpath'] = datapath+'/obs/aircraft/mei-iwg1/'
+        settings['fimspath'] = datapath+'/obs/aircraft/wang-fims/'
+        settings['pcasppath'] = datapath+'/obs/aircraft/tomlinson-pcasp/'
+        settings['cvipath'] = datapath+'/obs/aircraft/pekour-cvi/'
+        settings['cpcpath'] = datapath+'/obs/aircraft/mei-cpc/'
+        settings['ccnpath'] = datapath+'/obs/aircraft/mei-ccn/'
+        settings['amspath'] = datapath+'/obs/aircraft/shilling-ams/'
+        settings['wcmpath'] = datapath+'/obs/aircraft/matthews-wcm/'
+        # surface measurements
+        settings['smps_pnnl_path'] = datapath+'/obs/surface/pnnl-smps/'
+        settings['smps_bnl_path'] = datapath+'/obs/surface/bnl-smps/'
+        settings['nanosmps_bnl_path'] = datapath+'/obs/surface/bnl-nanosmps/'
+        settings['uhsassfcpath'] = datapath+'/obs/surface/arm-uhsas/'
+        settings['cpcsfcpath'] = datapath+'/obs/surface/arm-cpc/'
+        settings['cpcusfcpath'] = datapath+'/obs/surface/arm-cpcu/'
+        settings['ccnsfcpath'] = datapath+'/obs/surface/arm-ccn/'
+        settings['metpath'] = datapath+'/obs/surface/arm-met/'
+        settings['acsmpath'] = datapath+'/obs/surface/arm_acsm/'
+        # vertical profile measurements
+        settings['armbepath'] = datapath+'/obs/profile/sgparmbecldrad/'
+        # PBLH data needed for plot_flight_pdf_percentile_SeparatePBLH_hiscale.py only
+        settings['pblhpath'] = datapath+'/obs/profile/arm-pblh/'
+        settings['dlpath'] = datapath+'/obs/profile/dl-pblh/'
+        #### pre-processed model data path ######
+        settings['E3SM_sfc_path'] = datapath+'/model/surface/'
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ########################
+        
+    elif settings['campaign']=='ACEENA':
+        settings['site']='ENA'
+        # lat/lon for ENA
+        settings['lat0'] = 39.09527
+        settings['lon0'] = 360-28.0339
+        # bin of flight heights to calculate percentiles
+        settings['height_bin'] = np.arange(100,4300,300)
+        # time periods for IOPs. needed in preprocessing of surface data
+        if settings['IOP']=='IOP1':
+            settings['start_date']='2017-06-20'
+            settings['end_date']='2017-07-20'
+        elif settings['IOP']=='IOP2':
+            settings['start_date']='2018-01-21'
+            settings['end_date']='2018-02-19'
+        #### observational data path. ######
+        settings['merged_size_path']=datapath+'/obs/aircraft/merged_bin/'
+        settings['iwgpath'] = datapath+'/obs/aircraft/IWG/'
+        settings['cvipath'] = datapath+'/obs/aircraft/inletcvi/'
+        settings['amspath'] = datapath+'/obs/aircraft/shilling-hrfams/'
+        settings['fimspath'] = datapath+'/obs/aircraft/FIMS/'
+        settings['pcasppath'] = datapath+'/obs/aircraft/pcasp_g1/'
+        settings['opcpath'] = datapath+'/obs/aircraft/opciso/'
+        settings['cpcpath'] = datapath+'/obs/aircraft/cpc_aaf/'
+        settings['ccnpath'] = datapath+'/obs/aircraft/ccn_aaf/'
+        settings['amspath'] = datapath+'/obs/aircraft/shilling-hrfams/'
+        settings['wcmpath'] = datapath+'/obs/aircraft/wcm_ACEENA/'
+        # surface measurements
+        settings['uhsassfcpath'] = datapath+'/obs/surface/arm_uhsas/'
+        settings['cpcsfcpath'] = datapath+'/obs/surface/arm_cpcf/'
+        settings['cpcusfcpath'] = 'N/A'
+        settings['ccnsfcpath'] = datapath+'/obs/surface/arm_aosccn1/'
+        settings['metpath'] = datapath+'/obs/surface/arm_met/'
+        settings['acsmpath'] = datapath+'/obs/surface/arm_acsm/'
+        # vertical profile measurements
+        settings['armbepath'] = datapath+'/obs/profile/enaarmbecldrad/'
+        #### pre-processed model data path ######
+        settings['E3SM_sfc_path'] = datapath+'/model/surface/'
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        #########
+    elif settings['campaign']=='MAGIC':
+        settings['site']='MAG'
+        # bin of latitude to calculate ship track composite
+        settings['latbin'] = np.arange(21.5,34,1)
+        # reference lat/lon
+        settings['lat0']=30.
+        settings['lon0']=230.
+        #### observational data path. ######
+        settings['shipmetpath'] = datapath+'/obs/ship/raynolds-marmet/'
+        settings['shipccnpath'] = datapath+'/obs/ship/magaosccn100M1.a1/'
+        settings['shipcpcpath'] = datapath+'/obs/ship/magaoscpcfM1.a1/'
+        settings['shipmwrpath'] = datapath+'/obs/ship/magmwrret1liljclouM1.s2/'
+        settings['shipuhsaspath'] = datapath+'/obs/ship/magaosuhsasM1.a1/'
+        #### pre-processed model data path ######    
+        settings['E3SM_ship_path'] = datapath+'/model/shiptrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ################
+    elif settings['campaign']=='MARCUS':
+        settings['site'] = 'MAR'
+        # bin of latitude to calculate ship track composite
+        settings['latbin'] = np.arange(-68.5,-42,1)
+        # reference lat/lon
+        settings['lat0'] = -40.
+        settings['lon0'] = 120.
+        #### observational data path. ######
+        settings['shipmetpath'] = datapath+'/obs/ship/maraadmetX1.b1/'
+        settings['shipccnpath'] = datapath+'/obs/ship/maraosccn1colavgM1.b1/'
+        settings['shipcpcpath'] = datapath+'/obs/ship/maraoscpcf1mM1.b1/'
+        settings['shipmwrpath'] = datapath+'/obs/ship/marmwrret1liljclouM1.s2/'
+        settings['shipuhsaspath'] = datapath+'/obs/ship/maraosuhsasM1.a1/'
+        #### pre-processed model data path ######    
+        settings['E3SM_ship_path'] = datapath+'/model/shiptrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ############
+    elif settings['campaign']=='CSET':
+        # bin of flight heights to calculate percentiles
+        settings['height_bin'] = np.arange(200,8000,400)
+        # bin of latitude to calculate composite percentiles, same as MAGIC
+        settings['latbin'] = np.arange(22.5,39,1)
+        # lat/lon at the airport
+        settings['lat0'] = 38.5564
+        settings['lon0'] = 360-121.3120
+        #### observational data path. ######
+        settings['RFpath'] = datapath+'/obs/aircraft/aircraft_lowrate/'
+        settings['ccnpath'] = 'N/A'
+        #### pre-processed model data path ###### 
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ###############
+    elif settings['campaign']=='SOCRATES':
+        # bin of flight heights to calculate percentiles
+        settings['height_bin'] = np.arange(200,8000,400)
+        # bin of latitude to calculate composite percentiles
+        settings['latbin'] = np.arange(-63.5,-42,1)
+        # lat/lon at the airport
+        settings['lat0'] = -42.8371
+        settings['lon0'] = 147.5054
+        #### observational data path. ######
+        settings['RFpath'] = datapath+'/obs/aircraft/aircraft_lowrate/'
+        settings['ccnpath'] = datapath+'/obs/aircraft/CCN/'
+        #### pre-processed model data path ###### 
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ################
+    else:
+        raise ValueError("does not recognize this campaign: "+settings['campaign'])
+    return(settings)
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+def plot_all(settings):
+    """
+    include all plotting code. select for different field campaigns
+
+    Parameters
+    ----------
+    settings : dictionary
+        all setting variables for plotting.
+
+    Returns
+    -------
+    None.
+
+    """
+    print('***** start plotting ********')
+    if settings['campaign'] in ['HISCALE','ACEENA','CSET','SOCRATES']:
+        print('********** plot flight data *****************')
+        # flight information and timeseries
+        plot_flight_track_height.run_plot(settings)
+        plot_flight_timeseries_CN.run_plot(settings)
+        contour_flight_timeseries_AerosolSize.run_plot(settings)
+        plot_flight_timeseries_CCN.run_plot(settings)
+        # vertical distribution
+        plot_flight_percentile_z_CN.run_plot(settings)
+        plot_flight_percentile_z_CCN.run_plot(settings)
+        plot_flight_profile_z_CldFreq.run_plot(settings)
+        plot_flight_profile_z_LWC.run_plot(settings)
+        # mean statistics
+        plot_flight_pdf_AerosolSize.run_plot(settings)
+        calc_statistic_flight_CN.run_plot(settings)
+        if settings['campaign'] in ['HISCALE','ACEENA']:
+            plot_profile_cloud.run_plot(settings)
+            plot_flight_timeseries_AerosolComposition.run_plot(settings)
+            plot_flight_percentile_z_AerosolComposition.run_plot(settings)
+        elif settings['campaign'] in ['CSET','SOCRATES']:
+            plot_flight_percentile_lat_cldfreq.run_plot(settings)
+            plot_flight_percentile_lat_CN.run_plot(settings)
+            plot_flight_percentile_lat_CCN.run_plot(settings)
+        if settings['campaign'] == 'HISCALE':
+            plot_flight_pdf_percentile_SeparatePBLH_hiscale.run_plot(settings)
+        if settings['campaign'] == 'ACEENA':
+            plot_flight_pdf_percentile_SeparateCloud_aceena.run_plot(settings)
+            
+    if settings['campaign'] in ['HISCALE','ACEENA']:
+        print('*********** plot surface data ***************')
+        # time series
+        plot_sfc_timeseries_CN.run_plot(settings)
+        plot_sfc_timeseries_CCN.run_plot(settings)
+        plot_sfc_timeseries_AerosolComposition.run_plot(settings)
+        contour_sfc_timeseries_AerosolSize.run_plot(settings)
+        # diurnal cycle
+        plot_sfc_diurnalcycle_CN.run_plot(settings)
+        plot_sfc_diurnalcycle_CCN.run_plot(settings)
+        plot_sfc_diurnalcycle_AerosolComposition.run_plot(settings)
+        contour_sfc_diurnalcycle_AerosolSize.run_plot(settings)
+        # mean statistics
+        plot_sfc_pdf_AerosolSize.run_plot(settings)
+        plot_sfc_pie_AerosolComposition.run_plot(settings)
+        calc_statistic_sfc_CN.run_plot(settings)
+            
+    if settings['campaign'] in ['MAGIC','MARCUS']:
+        print('*********** plot ship data ***************')
+        # time series
+        plot_ship_timeseries_met.run_plot(settings)
+        plot_ship_timeseries_CN.run_plot(settings)
+        plot_ship_timeseries_CCN.run_plot(settings)
+        # statistics
+        plot_ship_percentile_lat_met.run_plot(settings)
+        plot_ship_percentile_lat_CN.run_plot(settings)
+        plot_ship_percentile_lat_CCN.run_plot(settings)
+        plot_ship_percentile_lat_LWP.run_plot(settings)
+        plot_ship_pdf_AerosolSize.run_plot(settings)
+        contour_ship_timeseries_AerosolSize.run_plot(settings)
+        calc_statistic_ship_CN.run_plot(settings)
+            
+    print('*********** end plotting **************')
+    
+#%% main
+settings['campaign'] = 'MAGIC'
+print('---------- plot for ' + settings['campaign'] + ' -----------')
+plot_all(add_other_setting(settings))
+settings['campaign'] = 'MARCUS'
+print('---------- plot for ' + settings['campaign'] + ' -----------')
+plot_all(add_other_setting(settings))
+settings['campaign'] = 'CSET'
+print('---------- plot for ' + settings['campaign'] + ' -----------')
+plot_all(add_other_setting(settings))
+settings['campaign'] = 'SOCRATES'
+print('---------- plot for ' + settings['campaign'] + ' -----------')
+plot_all(add_other_setting(settings))
+settings['campaign'] = 'ACEENA'
+settings['IOP'] = 'IOP1'
+print('---------- plot for ' + settings['campaign'] + ' IOP1 -----------')
+plot_all(add_other_setting(settings))
+settings['IOP'] = 'IOP2'
+print('---------- plot for ' + settings['campaign'] + ' IOP2 -----------')
+plot_all(add_other_setting(settings))
+settings['campaign'] = 'HISCALE'
+settings['Model_List'] = ['E3SMv1','EAMv1_CONUS_RRM']
+print('---------- plot for ' + settings['campaign'] + ' IOP2 -----------')
+plot_all(add_other_setting(settings))
+settings['IOP'] = 'IOP1'
+print('---------- plot for ' + settings['campaign'] + ' IOP1 -----------')
+plot_all(add_other_setting(settings))
diff --git a/scripts/run_preprocess.py b/scripts/run_preprocess.py
new file mode 100644
index 0000000..102953b
--- /dev/null
+++ b/scripts/run_preprocess.py
@@ -0,0 +1,161 @@
+"""
+script to generate all plots
+
+Instruction:
+    edit the first section "user-specified settings" for 
+"""
+import numpy as np
+from esmac_diags.preprocessing import *
+
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# user-specified settings
+settings = {}
+
+# set field campaign name. More settings on specific field campaigns are in next section
+# HISCALE, ACEENA, CSET, SOCRATES, MAGIC, MARCUS
+settings['campaign'] = 'MAGIC'
+
+# set model names. 
+settings['Model_List'] = ['E3SMv1']
+# settings['Model_List'] = ['E3SMv1','EAMv1_CONUS_RRM']
+
+# set line colors for each model. corresponding to the Model_List
+settings['color_model'] = ['r','b','g']
+
+# set field campaign IOPs. Only used for HISCALE and ACEENA. 
+# IOP1/IOP2 
+settings['IOP'] = 'IOP1'
+
+########## set filepath for preprocessing. If you don't run preprocessing, ignore this part
+# path of E3SM model data (h3) for preprocessing. same length of Model_List
+# settings['E3SM_hourly_path'] = ['/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_hourly/']
+settings['E3SM_hourly_path'] = \
+    ['/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_hourly/', \
+    '/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_hourly/']
+settings['E3SM_hourly_filehead'] = ['E3SMv1','EAMv1_CONUS_RRM']
+#############
+
+# Please specify the path of your data and for figure output
+settings['datapath'] = '/global/homes/s/sqtang/EAGLES/ESMAC_diags/data/'
+
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+def add_other_setting(settings):
+    """
+    add other settings for different field campaigns
+
+    Parameters
+    ----------
+    settings : dictionary
+        all setting variables
+
+    Returns
+    -------
+    settings
+
+    """
+    datapath = settings['datapath']+settings['campaign']
+    
+    
+    # other settings for different field campaigns
+    if settings['campaign']=='HISCALE':
+        settings['site']='SGP'
+        # lat/lon at SGP
+        settings['lat0'] = 36.6059
+        settings['lon0'] = 360-97.48792     # 0-360
+        # time periods for IOPs. needed in preprocessing of surface data
+        if settings['IOP']=='IOP1':
+            settings['start_date']='2016-04-25'
+            settings['end_date']='2016-05-29'
+        elif settings['IOP']=='IOP2':
+            settings['start_date']='2016-08-27'
+            settings['end_date']='2016-09-22'
+        #### observational data path. ######
+        settings['merged_size_path'] = datapath+'/obs/aircraft/merged_bin/'
+        settings['iwgpath'] = datapath+'/obs/aircraft/mei-iwg1/'
+        settings['fimspath'] = datapath+'/obs/aircraft/wang-fims/'
+        settings['pcasppath'] = datapath+'/obs/aircraft/tomlinson-pcasp/'
+        settings['cvipath'] = datapath+'/obs/aircraft/pekour-cvi/'
+        #### pre-processed model data path ######
+        settings['E3SM_sfc_path'] = datapath+'/model/surface/'
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ########################
+        
+    elif settings['campaign']=='ACEENA':
+        settings['site']='ENA'
+        # lat/lon for ENA
+        settings['lat0'] = 39.09527
+        settings['lon0'] = 360-28.0339
+        # time periods for IOPs. needed in preprocessing of surface data
+        if settings['IOP']=='IOP1':
+            settings['start_date']='2017-06-20'
+            settings['end_date']='2017-07-20'
+        elif settings['IOP']=='IOP2':
+            settings['start_date']='2018-01-21'
+            settings['end_date']='2018-02-19'
+        #### observational data path. ######
+        settings['merged_size_path']=datapath+'/obs/aircraft/merged_bin/'
+        settings['iwgpath'] = datapath+'/obs/aircraft/IWG/'
+        settings['cvipath'] = datapath+'/obs/aircraft/inletcvi/'
+        settings['fimspath'] = datapath+'/obs/aircraft/FIMS/'
+        settings['pcasppath'] = datapath+'/obs/aircraft/pcasp_g1/'
+        settings['opcpath'] = datapath+'/obs/aircraft/opciso/'
+        #### pre-processed model data path ######
+        settings['E3SM_sfc_path'] = datapath+'/model/surface/'
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        #########
+    elif settings['campaign']=='MAGIC':
+        settings['site']='MAG'
+        # reference lat/lon
+        settings['lat0']=30.
+        settings['lon0']=230.
+        #### observational data path. ######
+        settings['shipmetpath'] = datapath+'/obs/ship/raynolds-marmet/'
+        #### pre-processed model data path ######    
+        settings['E3SM_ship_path'] = datapath+'/model/shiptrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ################
+    elif settings['campaign']=='MARCUS':
+        settings['site'] = 'MAR'
+        # reference lat/lon
+        settings['lat0'] = -40.
+        settings['lon0'] = 120.
+        #### observational data path. ######
+        settings['shipmetpath'] = datapath+'/obs/ship/maraadmetX1.b1/'
+        #### pre-processed model data path ######    
+        settings['E3SM_ship_path'] = datapath+'/model/shiptrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ############
+    elif settings['campaign']=='CSET':
+        # lat/lon at the airport
+        settings['lat0'] = 38.5564
+        settings['lon0'] = 360-121.3120
+        #### observational data path. ######
+        settings['RFpath'] = datapath+'/obs/aircraft/aircraft_lowrate/'
+        #### pre-processed model data path ###### 
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ###############
+    elif settings['campaign']=='SOCRATES':
+        # lat/lon at the airport
+        settings['lat0'] = -42.8371
+        settings['lon0'] = 147.5054
+        #### observational data path. ######
+        settings['RFpath'] = datapath+'/obs/aircraft/aircraft_lowrate/'
+        #### pre-processed model data path ###### 
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ################
+    else:
+        raise ValueError("does not recognize this campaign: "+settings['campaign'])
+    return(settings)
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# running command
+all_settings = add_other_setting(settings)
+prep_obs_mergesize_HISCALE.run_prep(all_settings)
+prep_E3SM_flighttrack_bins.run_prep(all_settings)
+    
\ No newline at end of file
diff --git a/scripts/run_preprocess_all.py b/scripts/run_preprocess_all.py
new file mode 100644
index 0000000..79c81b6
--- /dev/null
+++ b/scripts/run_preprocess_all.py
@@ -0,0 +1,224 @@
+"""
+script to generate all plots
+
+Instruction:
+    edit the first section "user-specified settings" for 
+"""
+import numpy as np
+from esmac_diags.preprocessing import *
+
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# user-specified settings
+settings = {}
+
+# set field campaign name. More settings on specific field campaigns are in next section
+# HISCALE, ACEENA, CSET, SOCRATES, MAGIC, MARCUS
+settings['campaign'] = 'MAGIC'
+
+# set model names. 
+settings['Model_List'] = ['E3SMv1']
+# settings['Model_List'] = ['E3SMv1','EAMv1_CONUS_RRM']
+
+# set line colors for each model. corresponding to the Model_List
+settings['color_model'] = ['r','b','g']
+
+# set field campaign IOPs. Only used for HISCALE and ACEENA. 
+# IOP1/IOP2 
+settings['IOP'] = 'IOP1'
+
+########## set filepath for preprocessing. If you don't run preprocessing, ignore this part
+# path of E3SM model data (h3) for preprocessing. same length of Model_List
+# settings['E3SM_hourly_path'] = ['/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_hourly/']
+settings['E3SM_hourly_path'] = \
+    ['/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_hourly/', \
+    '/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_hourly/']
+settings['E3SM_hourly_filehead'] = ['E3SMv1','EAMv1_CONUS_RRM']
+#############
+
+# Please specify the path of your data and for figure output
+settings['datapath'] = '/global/homes/s/sqtang/EAGLES/ESMAC_diags/data/'
+
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+def add_other_setting(settings):
+    """
+    add other settings for different field campaigns
+
+    Parameters
+    ----------
+    settings : dictionary
+        all setting variables
+
+    Returns
+    -------
+    settings
+
+    """
+    datapath = settings['datapath']+settings['campaign']
+    
+    
+    # other settings for different field campaigns
+    if settings['campaign']=='HISCALE':
+        settings['site']='SGP'
+        # lat/lon at SGP
+        settings['lat0'] = 36.6059
+        settings['lon0'] = 360-97.48792     # 0-360
+        # time periods for IOPs. needed in preprocessing of surface data
+        if settings['IOP']=='IOP1':
+            settings['start_date']='2016-04-25'
+            settings['end_date']='2016-05-29'
+        elif settings['IOP']=='IOP2':
+            settings['start_date']='2016-08-27'
+            settings['end_date']='2016-09-22'
+        #### observational data path. ######
+        settings['merged_size_path'] = datapath+'/obs/aircraft/merged_bin/'
+        settings['iwgpath'] = datapath+'/obs/aircraft/mei-iwg1/'
+        settings['fimspath'] = datapath+'/obs/aircraft/wang-fims/'
+        settings['pcasppath'] = datapath+'/obs/aircraft/tomlinson-pcasp/'
+        settings['cvipath'] = datapath+'/obs/aircraft/pekour-cvi/'
+        #### pre-processed model data path ######
+        settings['E3SM_sfc_path'] = datapath+'/model/surface/'
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ########################
+        
+    elif settings['campaign']=='ACEENA':
+        settings['site']='ENA'
+        # lat/lon for ENA
+        settings['lat0'] = 39.09527
+        settings['lon0'] = 360-28.0339
+        # time periods for IOPs. needed in preprocessing of surface data
+        if settings['IOP']=='IOP1':
+            settings['start_date']='2017-06-20'
+            settings['end_date']='2017-07-20'
+        elif settings['IOP']=='IOP2':
+            settings['start_date']='2018-01-21'
+            settings['end_date']='2018-02-19'
+        #### observational data path. ######
+        settings['merged_size_path']=datapath+'/obs/aircraft/merged_bin/'
+        settings['iwgpath'] = datapath+'/obs/aircraft/IWG/'
+        settings['cvipath'] = datapath+'/obs/aircraft/inletcvi/'
+        settings['fimspath'] = datapath+'/obs/aircraft/FIMS/'
+        settings['pcasppath'] = datapath+'/obs/aircraft/pcasp_g1/'
+        settings['opcpath'] = datapath+'/obs/aircraft/opciso/'
+        #### pre-processed model data path ######
+        settings['E3SM_sfc_path'] = datapath+'/model/surface/'
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        #########
+    elif settings['campaign']=='MAGIC':
+        settings['site']='MAG'
+        # reference lat/lon
+        settings['lat0']=30.
+        settings['lon0']=230.
+        #### observational data path. ######
+        settings['shipmetpath'] = datapath+'/obs/ship/raynolds-marmet/'
+        #### pre-processed model data path ######    
+        settings['E3SM_ship_path'] = datapath+'/model/shiptrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ################
+    elif settings['campaign']=='MARCUS':
+        settings['site'] = 'MAR'
+        # reference lat/lon
+        settings['lat0'] = -40.
+        settings['lon0'] = 120.
+        #### observational data path. ######
+        settings['shipmetpath'] = datapath+'/obs/ship/maraadmetX1.b1/'
+        #### pre-processed model data path ######    
+        settings['E3SM_ship_path'] = datapath+'/model/shiptrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ############
+    elif settings['campaign']=='CSET':
+        # lat/lon at the airport
+        settings['lat0'] = 38.5564
+        settings['lon0'] = 360-121.3120
+        #### observational data path. ######
+        settings['RFpath'] = datapath+'/obs/aircraft/aircraft_lowrate/'
+        #### pre-processed model data path ###### 
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ###############
+    elif settings['campaign']=='SOCRATES':
+        # lat/lon at the airport
+        settings['lat0'] = -42.8371
+        settings['lon0'] = 147.5054
+        #### observational data path. ######
+        settings['RFpath'] = datapath+'/obs/aircraft/aircraft_lowrate/'
+        #### pre-processed model data path ###### 
+        settings['E3SM_aircraft_path'] = datapath+'/model/flighttrack/'
+        settings['E3SM_profile_path'] = datapath+'/model/profile/'
+        ################
+    else:
+        raise ValueError("does not recognize this campaign: "+settings['campaign'])
+    return(settings)
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+def prep_all(settings):
+    """
+    include all plotting code. select for different field campaigns
+
+    Parameters
+    ----------
+    settings : dictionary
+        all setting variables for plotting.
+
+    Returns
+    -------
+    None.
+
+    """
+    print('***** start preprocessing ********')
+    # prepare merged observed aerosol size distribution
+    if settings['campaign'] == 'HISCALE':
+        prep_obs_mergesize_HISCALE.run_prep(settings)
+    if settings['campaign'] == 'ACEENA':
+        prep_obs_mergesize_ACEENA.run_prep(settings)
+        
+    # prepare model data
+    if settings['campaign'] in ['HISCALE','ACEENA','CSET','SOCRATES']:
+        print('********** preprocess flight data *****************')
+        # flight information and timeseries
+        prep_E3SM_flighttrack_bins.run_prep(settings)
+        prep_E3SM_flighttrack_allvars.run_prep(settings)
+    if settings['campaign'] in ['HISCALE','ACEENA']:
+        print('*********** preprocess surface and profile data ***************')
+        # time series
+        prep_E3SM_sfc_bins.run_prep(settings)
+        prep_E3SM_sfc_allvars.run_prep(settings)
+        prep_E3SM_profile_allvars.run_prep(settings)
+    if settings['campaign'] in ['MAGIC','MARCUS']:
+        print('*********** preprocess ship data ***************')
+        # time series
+        prep_E3SM_shiptrack_allvars.run_prep(settings)
+        prep_E3SM_shiptrack_bins.run_prep(settings)
+        prep_E3SM_shiptrack_profiles.run_prep(settings)
+    print('*********** end preprocessing **************')
+    
+#%% main
+settings['campaign'] = 'MAGIC'
+print('---------- prepare for ' + settings['campaign'] + ' -----------')
+prep_all(add_other_setting(settings))
+settings['campaign'] = 'MARCUS'
+print('---------- plot for ' + settings['campaign'] + ' -----------')
+prep_all(add_other_setting(settings))
+settings['campaign'] = 'CSET'
+print('---------- plot for ' + settings['campaign'] + ' -----------')
+prep_all(add_other_setting(settings))
+settings['campaign'] = 'SOCRATES'
+print('---------- plot for ' + settings['campaign'] + ' -----------')
+prep_all(add_other_setting(settings))
+settings['campaign'] = 'ACEENA'
+settings['IOP'] = 'IOP1'
+print('---------- plot for ' + settings['campaign'] + ' IOP1 -----------')
+prep_all(add_other_setting(settings))
+settings['IOP'] = 'IOP2'
+print('---------- plot for ' + settings['campaign'] + ' IOP2 -----------')
+prep_all(add_other_setting(settings))
+settings['campaign'] = 'HISCALE'
+settings['Model_List'] = ['E3SMv1','EAMv1_CONUS_RRM']
+print('---------- plot for ' + settings['campaign'] + ' IOP2 -----------')
+prep_all(add_other_setting(settings))
+settings['IOP'] = 'IOP1'
+print('---------- plot for ' + settings['campaign'] + ' IOP1 -----------')
+prep_all(add_other_setting(settings))
diff --git a/scripts/run_testcase.py b/scripts/run_testcase.py
new file mode 100644
index 0000000..03b25df
--- /dev/null
+++ b/scripts/run_testcase.py
@@ -0,0 +1,62 @@
+"""
+script to run a test case
+compare the figures generated at testcase/figures/ with testcase/figures_verify
+to makesure testcase works as expected
+"""
+
+from esmac_diags.plotting import plot_flight_timeseries_AerosolComposition, plot_flight_track_height
+
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# user-specified settings
+settings = {}
+
+# Please change the path of the diagnostic package to your own
+package_path = '/global/homes/s/sqtang/EAGLES/ESMAC_diags/'
+
+# set field campaign name. use ACEENA for the test case
+settings['campaign'] = 'ACEENA'
+# set model names. 
+settings['Model_List'] = ['E3SMv1']
+# set line colors for each model. corresponding to the Model_List
+settings['color_model'] = ['r','b','g']
+# set field campaign IOPs. Only used for HISCALE and ACEENA. 
+# IOP1/IOP2 
+settings['IOP'] = 'IOP1'
+
+
+# path of output figures
+settings['figpath_aircraft_timeseries'] = package_path+'testcase/figures/'
+settings['figpath_aircraft_statistics'] = package_path+'testcase/figures/'
+
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# other settings for different field campaigns
+
+# set location and time information
+if  settings['campaign']=='ACEENA':
+    settings['site']='ENA'
+    # lat/lon for ENA
+    settings['lat0'] = 39.09527
+    settings['lon0'] = 360-28.0339
+    
+    # observational data path. 
+    settings['merged_size_path']=package_path+'testcase/data/obs/'
+    settings['iwgpath'] = package_path+'testcase/data/obs/'
+    settings['cvipath'] = package_path+'testcase/data/obs/'
+    settings['amspath'] = package_path+'testcase/data/obs/AMS/'
+    
+    # model path
+    # pre-processed model path    
+    settings['E3SM_aircraft_path'] = package_path+'testcase/data/model/'
+
+else:
+    raise ValueError("Test case should only for ACEENA. Current campaign is: "+settings['campaign'])
+
+#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Long list of blah.run_plot()
+
+### Create our actual plots
+plot_flight_track_height.run_plot(settings)
+plot_flight_timeseries_AerosolComposition.run_plot(settings)
+
diff --git a/scripts/scripts_jobsubmit.csh b/scripts/scripts_jobsubmit.csh
new file mode 100644
index 0000000..fa4caac
--- /dev/null
+++ b/scripts/scripts_jobsubmit.csh
@@ -0,0 +1,16 @@
+#!/bin/csh
+
+#SBATCH --nodes=1
+#SBATCH --time=05:00:00
+#SBATCH --qos=regular
+#SBATCH --constraint=knl
+#SBATCH --account=m3525
+#SBATCH --output=a.out
+#SBATCH --error=a.err
+
+#module load python
+#conda activate esmac_diags
+
+python run_plotting.py
+
+exit
diff --git a/scripts/scripts_plotting.csh b/scripts/scripts_plotting.csh
deleted file mode 100755
index 71238d0..0000000
--- a/scripts/scripts_plotting.csh
+++ /dev/null
@@ -1,213 +0,0 @@
-#!/bin/csh
-
-
-# This script makes user-specified plots comparing model simulations with ARM measurements.
-#
-#
-# ############################################################
-# # Step 1: change settings in settings.py                   #
-# #    such as campaign name and model names                 #
-# ############################################################
-#
-# # load modules. Tested version is Python 3.6.7 (Constance) and Python 3.8.5 (NERSC)
- module load python
- 
-# # this should be consistent with settings.py
-# set field campaign name. More settings on specific field campaigns are in next section
-set campaign = 'HISCALE'   # HISCALE, ACEENA, CSET, SOCRATES, MAGIC, MARCUS
-#foreach campaign ('MAGIC' 'MARCUS' 'CSET' 'SOCRATES')
- 
-# set model names. up to three
-# set Model_List = "['CTRL','Nuc','NucSoaCond']"
-set Model_List = "['E3SMv1','EAMv1_CONUS_RRM']"
-
-# set plotting line colors for each model. corresponding to the Model_List
-# set color_model = "['b','r','g']"
-set color_model = "['r','b','g']"
-
-# set IOP (or flight date) that the statistics, pdf and percentiles are averaged for.
-# options: IOP1, IOP2, ALL, 20160830b
-#set IOP = 'IOP1'
-foreach IOP ('IOP1' 'IOP2')
-
-# ############################################################
-# # Step 2: update settings.py with the above settings       #
-# ############################################################
-
-sed -i "s/^campaign = .*/campaign = '$campaign'/" settings.py
-sed -i "s/^Model_List = .*/Model_List = $Model_List/" settings.py
-sed -i "s/^color_model = .*/color_model = $color_model/" settings.py
-sed -i "s/^IOP = .*/IOP = '$IOP'/" settings.py
-
-# remove ^M in the file
-sed -i "s/\r//g" settings.py
-#
-
-# ############################################################
-# # Step 3: start plotting                                   #
-# ############################################################
-
-cp settings.py ../python/plotting/settings.py
-echo '***** start plotting ********'
-echo 'enter the plotting directory: ../python/plotting/'
-cd ../python/plotting/
-
-############# evaluate with flight measurements ################
-if (($campaign == 'HISCALE') || ($campaign == 'ACEENA') || ($campaign == 'CSET') ||($campaign == 'SOCRATES')) then
-echo '**********************************************'
-echo 'plotting flight infomation'
-python plot_flight_track_height.py
-# timeseries comparison for each flight
-echo '**********************************************'
-echo 'plotting CN number timeseries for flight'
-python plot_flight_timeseries_CN.py
-echo '**********************************************'
-echo 'plotting timeseries of aerosol PDF'
-python contour_flight_timeseries_AerosolSize.py
-echo '**********************************************'
-echo 'plotting CCN number timeseries for flight'
-python plot_flight_timeseries_CCN.py
-# mean statistics for the entire IOP
-echo '**********************************************'
-echo 'calculate statistics of CN number for flight'
-python calc_statistic_flight_CN.py
-echo '**********************************************'
-echo 'plotting mean aerosol PDF'
-python plot_flight_pdf_AerosolSize.py
-# vertical profiles or percentiles
-echo '**********************************************'
-echo 'plotting percentiles of CN number with height'
-python plot_flight_percentile_z_CN.py
-echo '**********************************************'
-echo 'plotting percentiles of CCN number with height'
-python plot_flight_percentile_z_CCN.py
-# vertical profiles or percentiles of cloud
-echo '**********************************************'
-echo 'plotting vertical profile of cloud frequency'
-python plot_flight_profile_z_CldFreq.py
-echo '**********************************************'
-echo 'plotting vertical profile of cloud LWC'
-python plot_flight_profile_z_LWC.py
-if (($campaign == 'CSET') ||($campaign == 'SOCRATES')) then
-    echo '**********************************************'
-    echo 'plotting flight height percentile in latitude bins'
-    python plot_flight_percentile_lat_cldfreq.py
-    echo '**********************************************'
-    echo 'plotting flight CN percentile in latitude bins'
-    python plot_flight_percentile_lat_CN.py
-    echo '**********************************************'
-    echo 'plotting flight CCN percentile in latitude bins'
-    python plot_flight_percentile_lat_CCN.py
-endif
-if (($campaign == 'HISCALE') ||($campaign == 'ACEENA')) then
-    echo '**********************************************'
-    echo 'plotting vertical profile of cloud'
-    python plot_profile_cloud.py
-    echo '**********************************************'
-    echo 'plotting aerosol composition timeseries for flight'
-    python plot_flight_timeseries_AerosolComposition.py
-    echo '**********************************************'
-    echo 'plotting percentiles of Aerosol COmposition with height'
-    python plot_flight_percentile_z_AerosolComposition.py
-endif
-# specific plotting separated by PBLH or clouds
-if ($campaign == 'ACEENA') then
-    echo '**********************************************'
-    echo 'plotting aerosol PDF and percentile separated by near surface, near cloud, above cloud'
-    python plot_flight_pdf_percentile_SeparateCloud_aceena.py
-endif
-if ($campaign == 'HISCALE') then
-    echo '**********************************************'
-    echo 'plotting aerosol PDF and percentile separated by below/above PBLH'
-    python plot_flight_pdf_percentile_SeparatePBLH_hiscale.py
-endif
-endif   # end evaluate with flight measurements
-
-
-############# evaluate with surface measurements ################
-if (($campaign == 'HISCALE') || ($campaign == 'ACEENA')) then
-# timeseries
-echo '**********************************************'
-echo 'plotting CN number  timeseries at surface'
-python plot_sfc_timeseries_CN.py
-echo '**********************************************'
-echo 'plotting CCN number timeseries at surface'
-python plot_sfc_timeseries_CCN.py
-echo '**********************************************'
-echo 'plotting aerosol composition timeseries at surface'
-python plot_sfc_timeseries_AerosolComposition.py
-echo '**********************************************'
-echo 'plotting timeseries of aerosol PDF'
-python contour_sfc_timeseries_AerosolSize.py
-# diurnal cycle
-echo '**********************************************'
-echo 'plotting diurnalcycle of CN number at surface'
-python plot_sfc_diurnalcycle_CN.py
-echo '**********************************************'
-echo 'plotting diurnalcycle of CCN number at surface'
-python plot_sfc_diurnalcycle_CCN.py
-echo '**********************************************'
-echo 'plotting diurnalcycle of Aerosol COmposition at surface'
-python plot_sfc_diurnalcycle_AerosolComposition.py
-echo '**********************************************'
-echo 'plotting diurnal cycle of aerosol PDF'
-python contour_sfc_diurnalcycle_AerosolSize.py
-# mean statistics
-echo '**********************************************'
-echo 'calculate statistics of CN number at surface'
-python calc_statistic_sfc_CN.py
-echo '**********************************************'
-echo 'plotting mean aerosol PDF'
-python plot_sfc_pdf_AerosolSize.py
-echo '**********************************************'
-echo 'plotting fraction of surface aerosol composition'
-python plot_sfc_pie_AerosolComposition.py
-endif # end evaluate with surface measurements
-
-
-############# evaluate with ship measurements ################
-if (($campaign == 'MAGIC') ||($campaign == 'MARCUS')) then
-# timeseries
-echo '**********************************************'
-echo 'plotting meterological fields timeseries for ship measurements'
-python plot_ship_timeseries_met.py
-echo '**********************************************'
-echo 'plotting CN number timeseries for ship measurements'
-python plot_ship_timeseries_CN.py
-echo '**********************************************'
-echo 'plotting CCN number timeseries for ship measurements'
-python plot_ship_timeseries_CCN.py
-# statistics
-echo '**********************************************'
-echo 'plotting meterological fields percentiles in latitude for ship measurements'
-python plot_ship_percentile_lat_met.py
-echo '**********************************************'
-echo 'plotting CN number percentiles in latitude for ship measurements'
-python plot_ship_percentile_lat_CN.py
-echo '**********************************************'
-echo 'plotting CCN number percentiles in latitude for ship measurements'
-python plot_ship_percentile_lat_CCN.py
-echo '**********************************************'
-echo 'plotting LWP composition in latitude for ship measurements'
-python plot_ship_percentile_lat_LWP.py
-echo '**********************************************'
-echo 'plotting mean aerosol size distribution for ship measurements'
-python plot_ship_pdf_AerosolSize.py
-echo '**********************************************'
-echo 'calculate mean statistics of CN for ship measurements'
-python calc_statistic_ship_CN.py
-echo '**********************************************'
-echo 'plotting timeseries of aerosol size distribution for ship measurements'
-python contour_ship_timeseries_AerosolSize.py
-endif # end evaluate with ship measurements
-
-###########################################################
-#   end 
-###########################################################
-echo '*********** end plotting **************'
-cd ../../scripts/
-
-end
-
-exit
-
diff --git a/scripts/scripts_plotting_jobsubmit.csh b/scripts/scripts_plotting_jobsubmit.csh
deleted file mode 100755
index 7922ebf..0000000
--- a/scripts/scripts_plotting_jobsubmit.csh
+++ /dev/null
@@ -1,220 +0,0 @@
-#!/bin/csh
-
-#SBATCH --nodes=1
-#SBATCH --time=03:00:00
-#SBATCH --qos=regular
-#SBATCH --constraint=knl
-#SBATCH --account=m3525
-#SBATCH --output=a.out
-#SBATCH --error=a.err
-
-# This script makes user-specified plots comparing model simulations with ARM measurements.
-#
-#
-# ############################################################
-# # Step 1: change settings in settings.py                   #
-# #    such as campaign name and model names                 #
-# ############################################################
-#
-# # load modules. Tested version is Python 3.6.7 (Constance) and Python 3.8.5 (NERSC)
- module load python
- 
-# # this should be consistent with settings.py
-# set field campaign name. More settings on specific field campaigns are in next section
-#set campaign = 'ACEENA'   # HISCALE, ACEENA, CSET, SOCRATES, MAGIC, MARCUS
-foreach campaign ('MAGIC' 'MARCUS' 'CSET' 'SOCRATES' 'HISCALE' 'ACEENA')
- 
-# set model names. up to three
-# set Model_List = "['CTRL','Nuc','NucSoaCond']"
-set Model_List = "['E3SMv1']"
-
-# set plotting line colors for each model. corresponding to the Model_List
-# set color_model = "['b','r','g']"
-set color_model = "['r','b','g']"
-
-# set IOP (or flight date) that the statistics, pdf and percentiles are averaged for.
-# options: IOP1, IOP2, ALL, 20160830b
-set IOP = 'IOP1'
-#foreach IOP ('IOP1' 'IOP2')
-
-# ############################################################
-# # Step 2: update settings.py with the above settings       #
-# ############################################################
-
-sed -i "s/^campaign = .*/campaign = '$campaign'/" settings.py
-sed -i "s/^Model_List = .*/Model_List = $Model_List/" settings.py
-sed -i "s/^color_model = .*/color_model = $color_model/" settings.py
-sed -i "s/^IOP = .*/IOP = '$IOP'/" settings.py
-
-# remove ^M in the file
-sed -i "s/\r//g" settings.py
-#
-
-# ############################################################
-# # Step 3: start plotting                                   #
-# ############################################################
-
-cp settings.py ../python/plotting/settings.py
-echo '***** start plotting ********'
-echo 'enter the plotting directory: ../python/plotting/'
-cd ../python/plotting/
-
-############# evaluate with flight measurements ################
-if (($campaign == 'HISCALE') || ($campaign == 'ACEENA') || ($campaign == 'CSET') ||($campaign == 'SOCRATES')) then
-echo '**********************************************'
-echo 'plotting flight infomation'
-python plot_flight_track_height.py
-# timeseries comparison for each flight
-echo '**********************************************'
-echo 'plotting CN number timeseries for flight'
-python plot_flight_timeseries_CN.py
-echo '**********************************************'
-echo 'plotting timeseries of aerosol PDF'
-python contour_flight_timeseries_AerosolSize.py
-echo '**********************************************'
-echo 'plotting CCN number timeseries for flight'
-python plot_flight_timeseries_CCN.py
-# mean statistics for the entire IOP
-echo '**********************************************'
-echo 'calculate statistics of CN number for flight'
-python calc_statistic_flight_CN.py
-echo '**********************************************'
-echo 'plotting mean aerosol PDF'
-python plot_flight_pdf_AerosolSize.py
-# vertical profiles or percentiles
-echo '**********************************************'
-echo 'plotting percentiles of CN number with height'
-python plot_flight_percentile_z_CN.py
-echo '**********************************************'
-echo 'plotting percentiles of CCN number with height'
-python plot_flight_percentile_z_CCN.py
-# vertical profiles or percentiles of cloud
-echo '**********************************************'
-echo 'plotting vertical profile of cloud frequency'
-python plot_flight_profile_z_CldFreq.py
-echo '**********************************************'
-echo 'plotting vertical profile of cloud LWC'
-python plot_flight_profile_z_LWC.py
-if (($campaign == 'CSET') ||($campaign == 'SOCRATES')) then
-    echo '**********************************************'
-    echo 'plotting flight height percentile in latitude bins'
-    python plot_flight_percentile_lat_cldfreq.py
-    echo '**********************************************'
-    echo 'plotting flight CN percentile in latitude bins'
-    python plot_flight_percentile_lat_CN.py
-    echo '**********************************************'
-    echo 'plotting flight CCN percentile in latitude bins'
-    python plot_flight_percentile_lat_CCN.py
-endif
-if (($campaign == 'HISCALE') ||($campaign == 'ACEENA')) then
-    echo '**********************************************'
-    echo 'plotting vertical profile of cloud'
-    python plot_profile_cloud.py
-    echo '**********************************************'
-    echo 'plotting aerosol composition timeseries for flight'
-    python plot_flight_timeseries_AerosolComposition.py
-    echo '**********************************************'
-    echo 'plotting percentiles of Aerosol COmposition with height'
-    python plot_flight_percentile_z_AerosolComposition.py
-endif
-# specific plotting separated by PBLH or clouds
-if ($campaign == 'ACEENA') then
-    echo '**********************************************'
-    echo 'plotting aerosol PDF and percentile separated by near surface, near cloud, above cloud'
-    python plot_flight_pdf_percentile_SeparateCloud_aceena.py
-endif
-if ($campaign == 'HISCALE') then
-    echo '**********************************************'
-    echo 'plotting aerosol PDF and percentile separated by below/above PBLH'
-    python plot_flight_pdf_percentile_SeparatePBLH_hiscale.py
-endif
-endif   # end evaluate with flight measurements
-
-
-############# evaluate with surface measurements ################
-if (($campaign == 'HISCALE') || ($campaign == 'ACEENA')) then
-# timeseries
-echo '**********************************************'
-echo 'plotting CN number  timeseries at surface'
-python plot_sfc_timeseries_CN.py
-echo '**********************************************'
-echo 'plotting CCN number timeseries at surface'
-python plot_sfc_timeseries_CCN.py
-echo '**********************************************'
-echo 'plotting aerosol composition timeseries at surface'
-python plot_sfc_timeseries_AerosolComposition.py
-echo '**********************************************'
-echo 'plotting timeseries of aerosol PDF'
-python contour_sfc_timeseries_AerosolSize.py
-# diurnal cycle
-echo '**********************************************'
-echo 'plotting diurnalcycle of CN number at surface'
-python plot_sfc_diurnalcycle_CN.py
-echo '**********************************************'
-echo 'plotting diurnalcycle of CCN number at surface'
-python plot_sfc_diurnalcycle_CCN.py
-echo '**********************************************'
-echo 'plotting diurnalcycle of Aerosol COmposition at surface'
-python plot_sfc_diurnalcycle_AerosolComposition.py
-echo '**********************************************'
-echo 'plotting diurnal cycle of aerosol PDF'
-python contour_sfc_diurnalcycle_AerosolSize.py
-# mean statistics
-echo '**********************************************'
-echo 'calculate statistics of CN number at surface'
-python calc_statistic_sfc_CN.py
-echo '**********************************************'
-echo 'plotting mean aerosol PDF'
-python plot_sfc_pdf_AerosolSize.py
-echo '**********************************************'
-echo 'plotting fraction of surface aerosol composition'
-python plot_sfc_pie_AerosolComposition.py
-endif # end evaluate with surface measurements
-
-
-############# evaluate with ship measurements ################
-if (($campaign == 'MAGIC') ||($campaign == 'MARCUS')) then
-# timeseries
-echo '**********************************************'
-echo 'plotting meterological fields timeseries for ship measurements'
-python plot_ship_timeseries_met.py
-echo '**********************************************'
-echo 'plotting CN number timeseries for ship measurements'
-python plot_ship_timeseries_CN.py
-echo '**********************************************'
-echo 'plotting CCN number timeseries for ship measurements'
-python plot_ship_timeseries_CCN.py
-# statistics
-echo '**********************************************'
-echo 'plotting meterological fields percentiles in latitude for ship measurements'
-python plot_ship_percentile_lat_met.py
-echo '**********************************************'
-echo 'plotting CN number percentiles in latitude for ship measurements'
-python plot_ship_percentile_lat_CN.py
-echo '**********************************************'
-echo 'plotting CCN number percentiles in latitude for ship measurements'
-python plot_ship_percentile_lat_CCN.py
-echo '**********************************************'
-echo 'plotting LWP composition in latitude for ship measurements'
-python plot_ship_percentile_lat_LWP.py
-echo '**********************************************'
-echo 'plotting mean aerosol size distribution for ship measurements'
-python plot_ship_pdf_AerosolSize.py
-echo '**********************************************'
-echo 'calculate mean statistics of CN for ship measurements'
-python calc_statistic_ship_CN.py
-echo '**********************************************'
-echo 'plotting timeseries of aerosol size distribution for ship measurements'
-python contour_ship_timeseries_AerosolSize.py
-endif # end evaluate with ship measurements
-
-###########################################################
-#   end 
-###########################################################
-echo '*********** end plotting **************'
-cd ../../scripts/
-
-end
-
-exit
-
diff --git a/scripts/scripts_preprocess.csh b/scripts/scripts_preprocess.csh
deleted file mode 100755
index 3c19b1d..0000000
--- a/scripts/scripts_preprocess.csh
+++ /dev/null
@@ -1,111 +0,0 @@
-#!/bin/csh
-
-
-# Kai Zhang provides aerosol data from E3SM simulations that are extracted for aircraft tracks and for selected ARM sites. However, those data are column variables and cover ~3 degrees around the ARM sites. This script extract the variables along the flight track, or the surface variables at the ARM site for direct comparison with ARM measurements.
-#
-#
-# ############################################################
-# # Step 1: change settings in settings.py                   #
-# #    such as campaign name and model names                 #
-# #    campaign name also needs to be set here               #
-# ############################################################
-#
-# # load modules. Tested version is Python 3.6.7 (Constance) and Python 3.8.5 (NERSC)
- module load python
- 
-# # this should be consistent with settings.py
-# set field campaign name. More settings on specific field campaigns are in next section
-set campaign = 'HISCALE'   # HISCALE, ACEENA, CSET, SOCRATES, MAGIC, MARCUS
- 
-# set model names. up to three
-# set Model_List = "['CTRL','Nuc','NucSoaCond']"
-set Model_List = "['EAMv1_CONUS_RRM']"
-
-# set plotting line colors for each model. corresponding to the Model_List
-# set color_model = "['b','r','g']"
-set color_model = "['b','g']"
-
-# set IOP (or flight date) that the statistics, pdf and percentiles are averaged for.
-# options: IOP1, IOP2, ALL, 20160830b
-# set IOP = 'IOP1'
-foreach IOP ('IOP1' 'IOP2')
-
-
-# ############################################################
-# # Step 2: update settings.py with the above settings       #
-# ############################################################
-
-sed -i "s/^campaign = .*/campaign = '$campaign'/" settings.py
-sed -i "s/^Model_List = .*/Model_List = $Model_List/" settings.py
-sed -i "s/^color_model = .*/color_model = $color_model/" settings.py
-sed -i "s/^IOP = .*/IOP = '$IOP'/" settings.py
-
-# remove ^M in the file
-sed -i "s/\r//g" settings.py
-
-# ############################################################
-# # Step 3: preprocessing obs and/or model data              #
-# ############################################################
-
-cp settings.py ../python/preprocessing/settings.py
-echo '***** start preprocessing ********'
-echo 'enter the preprocess directory: ../python/preprocessing/'
-cd ../python/preprocessing/
-
-# # for observation
-# merge observed aerosol sizes from several aircraft instruments
-#echo '**** merge aerosol size distribution: ****'
-#if ($campaign == 'HiScale') then
-#    python prep_obs_mergesize_HiScale.py
-#else if ($campaign == 'ACEENA')
-#    python prep_obs_mergesize_ACEENA.py
-#else
-#    echo 'ERROR: not recognize campaign name'
-#endif
-    
-# for models
-if (($campaign == 'HISCALE') || ($campaign == 'ACEENA')) then
-echo '**********************************************'
-echo '**** extract aerosol size distribution at Surface ****'
-python prep_E3SM_sfc_bins.py
-echo '**********************************************'
-echo '**** extract all other variables at Surface ****'
-python prep_E3SM_sfc_allvars.py
-echo '**********************************************'
-echo '**** extract vertical profiles at ARM site ****'
-python prep_E3SM_profile_allvars.py
-endif
-
-if (($campaign == 'HISCALE') || ($campaign == 'ACEENA') || ($campaign == 'CSET') ||($campaign == 'SOCRATES')) then
-echo '**********************************************'
-echo '**** extract aerosol size distribution for aircraft tracks ****'
-python prep_E3SM_flighttrack_bins.py
-echo '**********************************************'
-echo '**** extract all other variables for aircraft tracks ****'
-python prep_E3SM_flighttrack_allvars.py
-endif
-        
-if (($campaign == 'MAGIC') ||($campaign == 'MARCUS')) then
-echo '**********************************************'
-echo '**** extract all other variables along ship tracks ****'
-python prep_E3SM_shiptrack_allvars.py
-echo '**********************************************'
-echo '**** extract aerosol size distribution along ship tracks ****'
-python prep_E3SM_shiptrack_bins.py
-echo '**********************************************'
-echo '**** extract vertical profiles along ship tracks ****'
-python prep_E3SM_shiptrack_profiles.py
-endif
-
-# ############################################################
-# # Step 4: end of preprocessing                             #
-# ############################################################
-
-echo '***** finished ******'
-
-cd ../../scripts/
-
-end
-
-exit
-
diff --git a/scripts/scripts_testcase.csh b/scripts/scripts_testcase.csh
deleted file mode 100755
index a8ec211..0000000
--- a/scripts/scripts_testcase.csh
+++ /dev/null
@@ -1,75 +0,0 @@
-#!/bin/csh
-
-
-# This script makes user-specified plots comparing model simulations with ARM measurements.
-#
-#
-# ############################################################
-# # Step 1: change settings in settings.py                   #
-# #    such as campaign name and model names                 #
-# ############################################################
-#
-# # load modules. Tested version is Python 3.6.7 (Constance) and Python 3.8.5 (NERSC)
- module load python
- 
-# # this should be consistent with settings.py
-# set field campaign name. More settings on specific field campaigns are in next section
-set campaign = 'ACEENA'   # HISCALE, ACEENA, CSET, SOCRATES, MAGIC, MARCUS
-#foreach campaign ('MAGIC' 'MARCUS' 'CSET' 'SOCRATES')
- 
-# set model names. up to three
-# set Model_List = "['CTRL','Nuc','NucSoaCond']"
-set Model_List = "['E3SMv1']"
-
-# set plotting line colors for each model. corresponding to the Model_List
-# set color_model = "['b','r','g']"
-set color_model = "['r','b','g']"
-
-# set IOP (or flight date) that the statistics, pdf and percentiles are averaged for.
-# options: IOP1, IOP2, ALL, 20160830b
-set IOP = 'IOP1'
-# foreach IOP ('IOP1' 'IOP2')
-
-# ############################################################
-# # Step 2: update settings.py with the above settings       #
-# ############################################################
-
-sed -i "s/^campaign = .*/campaign = '$campaign'/" settings_testcase.py
-sed -i "s/^Model_List = .*/Model_List = $Model_List/" settings_testcase.py
-sed -i "s/^color_model = .*/color_model = $color_model/" settings_testcase.py
-sed -i "s/^IOP = .*/IOP = '$IOP'/" settings_testcase.py
-
-# remove ^M in the file
-sed -i "s/\r//g" settings_testcase.py
-#
-
-# ############################################################
-# # Step 3: start plotting                                   #
-# ############################################################
-
-cp settings_testcase.py ../python/plotting/settings.py
-
-echo '***** start testcase ********'
-echo 'enter the plotting directory: ../python/plotting/'
-cd ../python/plotting/
-
-############# evaluate with flight measurements ################
-echo '**********************************************'
-echo 'plotting flight infomation'
-python plot_flight_track_height.py
-# timeseries comparison for each flight
-echo '**********************************************'
-echo 'plotting aerosol composition timeseries for flight'
-python plot_flight_timeseries_AerosolComposition.py
-
-
-###########################################################
-#   end 
-###########################################################
-echo '*********** finishing testcase **************'
-cd ../../scripts/
-
-# end
-
-exit
-
diff --git a/scripts/settings.py b/scripts/settings.py
deleted file mode 100644
index 21d9d9e..0000000
--- a/scripts/settings.py
+++ /dev/null
@@ -1,235 +0,0 @@
-# settings of the aerosol diagnostic package
-
-import numpy as np
-
-#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# global settings
-
-############ these settings will be replaced by the settings in scripts_*.csh #############
-# set field campaign name. More settings on specific field campaigns are in next section
-campaign = 'MAGIC'
-# set model names. up to three
-Model_List = ['E3SMv1']
-# set line colors for each model. corresponding to the Model_List
-color_model = ['r','b','g']
-# set IOP that the statistics, pdf and percentiles are averaged for. Only available for HISCALE and ACEENA
-# IOP1/IOP2 
-IOP = 'IOP1'
-############ these settings will be replaced by the settings in scripts_*.csh #############
-
-
-# path of the diagnostic package
-package_path = '../../'
-
-# path of E3SM model data (h3) for preprocessing. list with the same length of Model_List
-E3SM_h3_path=[]
-E3SM_h3_filehead=[]     # filename before .cam.h3.yyyy-mm-dd.00000.nc
-for mm in Model_List:
-    E3SM_h3_path.append('/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_h3/')
-    if campaign=='MAGIC':
-        E3SM_h3_filehead.append(mm+'_2012-2013')
-    else:
-#        E3SM_h3_filehead.append(mm+'_2014-2018')
-        E3SM_h3_filehead.append(mm)
-    #E3SM_h3_path.append('/qfs/projects/eagles/zhan524/simulations/compy_F20TRC5-CMIP6_ne30_EG1_R2_'+mm+'/h3/')
-    #E3SM_h3_filehead.append('compy_F20TRC5-CMIP6_ne30_EG1_R2_'+mm)
-
-# path of output figures
-figpath_aircraft_timeseries = package_path+'figures/'+campaign+'/aircraft/timeseries/'
-figpath_aircraft_statistics = package_path+'figures/'+campaign+'/aircraft/statistics/'
-figpath_ship_timeseries = package_path+'figures/'+campaign+'/ship/timeseries/'
-figpath_ship_statistics = package_path+'figures/'+campaign+'/ship/statistics/'
-figpath_sfc_timeseries = package_path+'figures/'+campaign+'/surface/timeseries/'
-figpath_sfc_statistics = package_path+'figures/'+campaign+'/surface/statistics/'
-figpath_profile_timeseries = package_path+'figures/'+campaign+'/profile/timeseries/'
-
-
-#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# settings for different field campaigns
-
-# set location and time information
-if campaign=='HISCALE':
-    site='SGP'
-    # lat/lon at SGP
-    lat0 = 36.6059
-    lon0 = 360-97.48792     # 0-360
-    # bin of flight heights to calculate percentiles
-    height_bin = np.arange(300,4300,300)
-    # height_bin = np.arange(400,4300,200)
-    
-    # time periods for IOPs. needed in preprocessing of surface data
-    if IOP=='IOP1':
-        start_date='2016-04-25'
-        end_date='2016-05-29'
-    elif IOP=='IOP2':
-        start_date='2016-08-27'
-        end_date='2016-09-22'
-        
-    # observational data path. 
-    # aircraf measurements merged_bin data are used for all plot_flight_*.py to provide flight/cloud/CVI info
-    merged_size_path=package_path+'data/'+campaign+'/obs/aircraft/merged_bin/'
-    iwgpath = package_path+'data/'+campaign+'/obs/aircraft/mei-iwg1/'
-    fimspath = package_path+'data/'+campaign+'/obs/aircraft/wang-fims/'
-    pcasppath = package_path+'data/'+campaign+'/obs/aircraft/tomlinson-pcasp/'
-    cvipath = package_path+'data/'+campaign+'/obs/aircraft/pekour-cvi/'
-    cpcpath = package_path+'data/'+campaign+'/obs/aircraft/mei-cpc/'
-    ccnpath = package_path+'data/'+campaign+'/obs/aircraft/mei-ccn/'
-    amspath = package_path+'data/'+campaign+'/obs/aircraft/shilling-ams/'
-    wcmpath = package_path+'data/'+campaign+'/obs/aircraft/matthews-wcm/'
-    # surface measurements
-    smps_pnnl_path = package_path+'data/'+campaign+'/obs/surface/pnnl-smps/'
-    smps_bnl_path = package_path+'data/'+campaign+'/obs/surface/bnl-smps/'
-    nanosmps_bnl_path = package_path+'data/'+campaign+'/obs/surface/bnl-nanosmps/'
-    uhsassfcpath = package_path+'data/'+campaign+'/obs/surface/arm-uhsas/'
-    cpcsfcpath = package_path+'data/'+campaign+'/obs/surface/arm-cpc/'
-    cpcusfcpath = package_path+'data/'+campaign+'/obs/surface/arm-cpcu/'
-    ccnsfcpath = package_path+'data/'+campaign+'/obs/surface/arm-ccn/'
-    metpath = package_path+'data/'+campaign+'/obs/surface/arm-met/'
-    acsmpath = package_path+'data/'+campaign+'/obs/surface/arm_acsm/'
-    # vertical profile measurements
-    armbepath = package_path+'data/'+campaign+'/obs/profile/sgparmbecldrad/'
-    
-    # PBLH data needed for plot_flight_pdf_percentile_SeparatePBLH_hiscale.py only
-    pblhpath = package_path+'data/'+campaign+'/obs/profile/arm-pblh/'
-    dlpath = package_path+'data/'+campaign+'/obs/profile/dl-pblh/'
-    
-    # model path
-    # pre-processed model path
-    E3SM_sfc_path = package_path+'data/'+campaign+'/model/surface/'
-    E3SM_aircraft_path = package_path+'data/'+campaign+'/model/flighttrack/'
-    E3SM_profile_path = package_path+'data/'+campaign+'/model/profile/'
-    
-    
-elif campaign=='ACEENA':
-    site='ENA'
-    # lat/lon for ENA
-    lat0 = 39.09527
-    lon0 = 360-28.0339
-    # bin of flight heights to calculate percentiles
-    height_bin = np.arange(100,4300,300)
-    
-    # time periods for IOPs. needed in preprocessing of surface data
-    if IOP=='IOP1':
-        start_date='2017-06-20'
-        end_date='2017-07-20'
-    elif IOP=='IOP2':
-        start_date='2018-01-21'
-        end_date='2018-02-19'
-    
-    # observational data path. 
-    # aircraf measurements merged_bin data are used for all plot_flight_*.py to provide flight/cloud/CVI info
-    merged_size_path=package_path+'data/'+campaign+'/obs/aircraft/merged_bin/'
-    iwgpath = package_path+'data/'+campaign+'/obs/aircraft/IWG/'
-    fimspath = package_path+'data/'+campaign+'/obs/aircraft/FIMS/'
-    pcasppath = package_path+'data/'+campaign+'/obs/aircraft/pcasp_g1/'
-    cvipath = package_path+'data/'+campaign+'/obs/aircraft/inletcvi/'
-    opcpath = package_path+'data/'+campaign+'/obs/aircraft/opciso/'
-    cpcpath = package_path+'data/'+campaign+'/obs/aircraft/cpc_aaf/'
-    ccnpath = package_path+'data/'+campaign+'/obs/aircraft/ccn_aaf/'
-    amspath = package_path+'data/'+campaign+'/obs/aircraft/shilling-hrfams/'
-    wcmpath = package_path+'data/'+campaign+'/obs/aircraft/wcm_ACEENA/'
-    # surface measurements
-    uhsassfcpath = package_path+'data/'+campaign+'/obs/surface/arm_uhsas/'
-    cpcsfcpath = package_path+'data/'+campaign+'/obs/surface/arm_cpcf/'
-    cpcusfcpath = 'N/A'
-    ccnsfcpath = package_path+'data/'+campaign+'/obs/surface/arm_aosccn1/'
-    metpath = package_path+'data/'+campaign+'/obs/surface/arm_met/'
-    acsmpath = package_path+'data/'+campaign+'/obs/surface/arm_acsm/'
-    # vertical profile measurements
-    armbepath = package_path+'data/'+campaign+'/obs/profile/enaarmbecldrad/'
-    
-    # model path
-    # pre-processed model path    
-    E3SM_sfc_path = package_path+'data/'+campaign+'/model/surface/'
-    E3SM_aircraft_path = package_path+'data/'+campaign+'/model/flighttrack/'
-    E3SM_profile_path = package_path+'data/'+campaign+'/model/profile/'
-    
-elif campaign=='MAGIC':
-    site='MAG'
-    
-    # bin of latitude to calculate ship track composite
-    latbin = np.arange(21.5,34,1)
-    
-    # reference lat/lon
-    lat0=30.
-    lon0=230.
-    
-    # observational data path. 
-    # ship measurements
-    shipmetpath=package_path+'data/'+campaign+'/obs/ship/raynolds-marmet/'
-    shipccnpath=package_path+'data/'+campaign+'/obs/ship/magaosccn100M1.a1/'
-    shipcpcpath=package_path+'data/'+campaign+'/obs/ship/magaoscpcfM1.a1/'
-    shipmwrpath=package_path+'data/'+campaign+'/obs/ship/magmwrret1liljclouM1.s2/'
-    shipuhsaspath=package_path+'data/'+campaign+'/obs/ship/magaosuhsasM1.a1/'
-    
-    # model path
-    # pre-processed model path    
-    E3SM_ship_path = package_path+'data/'+campaign+'/model/shiptrack/'
-    E3SM_profile_path = package_path+'data/'+campaign+'/model/profile/'
-    
-elif campaign=='MARCUS':
-    site='MAR'
-    
-    # bin of latitude to calculate ship track composite
-    latbin = np.arange(-68.5,-42,1)
-    
-    # reference lat/lon
-    lat0=-40.
-    lon0=120.
-    
-    
-    # observational data path. 
-    # ship measurements
-    shipmetpath=package_path+'data/'+campaign+'/obs/ship/maraadmetX1.b1/'
-    shipccnpath=package_path+'data/'+campaign+'/obs/ship/maraosccn1colavgM1.b1/'
-    shipcpcpath=package_path+'data/'+campaign+'/obs/ship/maraoscpcf1mM1.b1/'
-    shipmwrpath=package_path+'data/'+campaign+'/obs/ship/marmwrret1liljclouM1.s2/'
-    shipuhsaspath=package_path+'data/'+campaign+'/obs/ship/maraosuhsasM1.a1/'
-    
-    # model path
-    # pre-processed model path    
-    E3SM_ship_path = package_path+'data/'+campaign+'/model/shiptrack/'
-    E3SM_profile_path = package_path+'data/'+campaign+'/model/profile/'
-    
-elif campaign=='CSET':
-    # bin of flight heights to calculate percentiles
-    height_bin = np.arange(200,8000,400)
-    # bin of latitude to calculate composite percentiles, same as MAGIC
-    latbin = np.arange(22.5,39,1)
-    
-    # lat/lon at the airport
-    lat0 = 38.5564
-    lon0 = 360-121.3120
-    
-    # observational data path. 
-    # aircraft measurements
-    RFpath=package_path+'data/'+campaign+'/obs/aircraft/aircraft_lowrate/'
-    ccnpath='N/A'
-    
-    # model path
-    # pre-processed model path    
-    E3SM_aircraft_path = package_path+'data/'+campaign+'/model/flighttrack/'
-    E3SM_profile_path = package_path+'data/'+campaign+'/model/profile/'
-    
-elif campaign=='SOCRATES':
-    # bin of flight heights to calculate percentiles
-    height_bin = np.arange(200,8000,400)
-    # bin of latitude to calculate composite percentiles
-    latbin = np.arange(-63.5,-42,1)
-    # height_bin = np.arange(200,7000,400)
-    # lat/lon at the airport
-    lat0 = -42.8371
-    lon0 = 147.5054
-    
-    # observational data path. 
-    # aircraft measurements
-    RFpath=package_path+'data/'+campaign+'/obs/aircraft/aircraft_lowrate/'
-    ccnpath=package_path+'data/'+campaign+'/obs/aircraft/CCN/'
-    
-    # model path
-    # pre-processed model path    
-    E3SM_aircraft_path = package_path+'data/'+campaign+'/model/flighttrack/'
-    E3SM_profile_path = package_path+'data/'+campaign+'/model/profile/'
-    
-else:
-    raise ValueError("does not recognize this campaign: "+campaign)
diff --git a/scripts/settings_testcase.py b/scripts/settings_testcase.py
deleted file mode 100644
index cecb7f6..0000000
--- a/scripts/settings_testcase.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# settings of the aerosol diagnostic package
-
-import numpy as np
-
-#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# global settings
-
-############ these settings will be replaced by the settings in scripts_*.csh #############
-# set field campaign name. More settings on specific field campaigns are in next section
-campaign = 'ACEENA'
-# set model names. up to three
-Model_List = ['E3SMv1']
-# set line colors for each model. corresponding to the Model_List
-color_model = ['r','b','g']
-# set IOP that the statistics, pdf and percentiles are averaged for. Only available for HISCALE and ACEENA
-# IOP1/IOP2 
-IOP = 'IOP1'
-############ these settings will be replaced by the settings in scripts_*.csh #############
-
-
-# path of the diagnostic package
-package_path = '../../'
-
-# path of E3SM model data (h3) for preprocessing. list with the same length of Model_List
-E3SM_h3_path=[]
-E3SM_h3_filehead=[]     # filename before .cam.h3.yyyy-mm-dd.00000.nc
-for mm in Model_List:
-    E3SM_h3_path.append('/global/cscratch1/sd/sqtang/EAGLES/E3SM_output/E3SMv1_h3/')
-    if campaign=='MAGIC':
-        E3SM_h3_filehead.append(mm+'_2012-2013')
-    else:
-#        E3SM_h3_filehead.append(mm+'_2014-2018')
-        E3SM_h3_filehead.append(mm)
-    #E3SM_h3_path.append('/qfs/projects/eagles/zhan524/simulations/compy_F20TRC5-CMIP6_ne30_EG1_R2_'+mm+'/h3/')
-    #E3SM_h3_filehead.append('compy_F20TRC5-CMIP6_ne30_EG1_R2_'+mm)
-
-# path of output figures
-figpath_aircraft_timeseries = package_path+'testcase/figures/'
-figpath_aircraft_statistics = package_path+'testcase/figures/'
-figpath_ship_timeseries = package_path+'testcase/figures/'
-figpath_ship_statistics = package_path+'testcase/figures/'
-figpath_sfc_timeseries = package_path+'testcase/figures/'
-figpath_sfc_statistics = package_path+'testcase/figures/'
-figpath_profile_timeseries = package_path+'testcase/figures/'
-
-
-#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# settings for different field campaigns
-
-# set location and time information
-if  campaign=='ACEENA':
-    site='ENA'
-    # lat/lon for ENA
-    lat0 = 39.09527
-    lon0 = 360-28.0339
-    # bin of flight heights to calculate percentiles
-    height_bin = np.arange(100,4300,300)
-    
-    # time periods for IOPs. needed in preprocessing of surface data
-    if IOP=='IOP1':
-        start_date='2017-06-30'
-        end_date='2017-06-31'
-    elif IOP=='IOP2':
-        start_date='2018-01-21'
-        end_date='2018-02-19'
-    
-    # observational data path. 
-    # aircraf measurements merged_bin data are used for all plot_flight_*.py to provide flight/cloud/CVI info
-    merged_size_path=package_path+'testcase/data/obs/'
-    iwgpath = package_path+'testcase/data/obs/'
-    cvipath = package_path+'testcase/data/obs/'
-    amspath = package_path+'testcase/data/obs/AMS/'
-    
-    # model path
-    # pre-processed model path    
-    E3SM_aircraft_path = package_path+'testcase/data/model/'
-   
-    
-else:
-    raise ValueError("Test case should only for ACEENA. Current campaign is: "+campaign)
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..06c6e1c
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,26 @@
+[metadata]
+name = esmac_diags
+version = 0.1
+description = ESMAC_Diags diagnostics package
+long_description = file: README.md 
+license = bsd
+classifiers = 
+    Programming Language :: Python :: 3
+
+[options]
+packages = find:
+package_dir =
+    =src
+install_requires =
+    numpy
+    pytest
+    matplotlib
+    netCDF4
+    pip
+
+
+include_package_data = True
+
+[options.packages.find]
+where = src
+include = esmac_diags
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..864b617
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,2 @@
+import setuptools
+setuptools.setup()
\ No newline at end of file
diff --git a/src/esmac_diags/__init__.py b/src/esmac_diags/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/esmac_diags/plotting/__init__.py b/src/esmac_diags/plotting/__init__.py
new file mode 100644
index 0000000..2d659f2
--- /dev/null
+++ b/src/esmac_diags/plotting/__init__.py
@@ -0,0 +1,4 @@
+import glob
+from os.path import basename, dirname, join
+all_files = glob.glob(join(dirname(__file__), '*.py'))
+__all__ = [basename(fname)[:-3] for fname in all_files if not fname.endswith('__.py')]
diff --git a/src/esmac_diags/plotting/calc_statistic_flight_CN.py b/src/esmac_diags/plotting/calc_statistic_flight_CN.py
new file mode 100644
index 0000000..f1d6d6f
--- /dev/null
+++ b/src/esmac_diags/plotting/calc_statistic_flight_CN.py
@@ -0,0 +1,489 @@
+"""
+# calculate statistics (mean, bias, correlation, RMSE) of Aerosol number concentration
+# for aircraft measurements
+# compare models and CPC measurements
+"""
+
+import os
+import glob
+import numpy as np
+import scipy.stats
+from ..subroutines.read_aircraft import read_cpc, read_RF_NCAR
+from ..subroutines.read_netcdf import read_merged_size,read_extractflight
+from ..subroutines.quality_control import qc_cpc_air, qc_remove_neg, qc_mask_takeoff_landing
+
+def run_plot(settings):
+    #%% variables from settings
+
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_statistics = settings['figpath_aircraft_statistics']
+
+    IOP = settings.get('IOP', None)
+    cpcpath = settings.get('cpcpath', None)
+    merged_size_path = settings.get('merged_size_path', None)
+    RFpath = settings.get('RFpath', None)
+
+    #%% other settings
+    if not os.path.exists(figpath_aircraft_statistics):
+        os.makedirs(figpath_aircraft_statistics)
+       
+    missing_value = np.nan
+    
+    #%% find files for flight information
+    
+    lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
+    lst.sort()
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    # choose files for specific IOP
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+    elif campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+            
+    alldates = [x.split('_')[-1].split('.')[0] for x in lst]
+        
+    #%% read all data
+    
+    uhsas100_o = np.empty(0)    # large particles. UHSAS for CSET and SOCRATES, PCASP for ACEENA and HISCALE
+    cpc10_o = np.empty(0)
+    cpc3_o = np.empty(0)
+    ncn100_m = []
+    ncn10_m = []
+    ncn3_m = []
+    nmodels=len(Model_List)
+    for mm in range(nmodels):
+        ncn100_m.append(np.empty(0))
+        ncn10_m.append(np.empty(0))
+        ncn3_m.append(np.empty(0))
+        
+    print('reading '+format(len(alldates))+' files to calculate the statistics: ')
+    
+    for date in alldates:
+        print(date)
+        
+        #%% read in Models
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+        
+            (timem,heightm,cpc_m,timeunitm,ncn_unit,ncn_longname)=read_extractflight(filename_m,'NCN')
+            (timem,heightm,cpcu_m,timeunitm,ncnu_unit,ncnu_longname)=read_extractflight(filename_m,'NUCN')
+            (timem,heightm,ncnall,timeunitm,ncnall_unit,ncnall_longname)=read_extractflight(filename_m,'NCNall')
+            
+            if campaign=='HISCALE':
+                if IOP=='IOP1':  # PCASP for HISCALE IOP1 size from 0.12 to 3 um
+                    ncn100_m[mm] = np.hstack((ncn100_m[mm], np.sum(ncnall[120:,:],0)*1e-6))
+                elif IOP=='IOP2': # PCASP for HISCALE IOP1 size from 0.09 to 3 um
+                    ncn100_m[mm] = np.hstack((ncn100_m[mm], np.sum(ncnall[90:,:],0)*1e-6))
+            else:
+                ncn100_m[mm] = np.hstack((ncn100_m[mm], np.sum(ncnall[100:,:],0)*1e-6))  
+            ncn10_m[mm] = np.hstack((ncn10_m[mm], cpc_m*1e-6))    # #/m3 to #/cm3
+            ncn3_m[mm] = np.hstack((ncn3_m[mm], cpcu_m*1e-6))    # #/m3 to #/cm3
+            
+            
+        #%% read in flight measurements (CPC and PCASP) for HISCALE and ACEENA
+        if campaign in ['HISCALE', 'ACEENA']:
+            if date[-1]=='a':
+                flightidx=1
+            else:
+                flightidx=2
+            if campaign=='HISCALE':
+                filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_HiScale001s.ict.txt')
+                filename_merge = merged_size_path+'merged_bin_fims_pcasp_HISCALE_'+date+'.nc'
+            elif campaign=='ACEENA':
+                filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_ACEENA001s.ict')    
+                filename_merge = merged_size_path+'merged_bin_fims_pcasp_opc_ACEENA_'+date+'.nc'
+            filename_c.sort()
+            
+            # read in CPC
+            if len(filename_c)==1 or len(filename_c)==2: # some days have two flights
+                (cpc,cpclist)=read_cpc(filename_c[flightidx-1])
+                # fill missing timestep
+                if np.logical_and(campaign=='ACEENA', date=='20180216a'):
+                    cpc=np.insert(cpc,1404,(cpc[:,1403]+cpc[:,1404])/2,axis=1) 
+                elif np.logical_and(campaign=='HISCALE', date=='20160425a'):
+                    cpc=np.insert(cpc,0,cpc[:,0],axis=1)
+                    cpc[0,0]=cpc[0,0]-1
+                time_cpc = cpc[0,:]
+                cpc10 = cpc[1,:]
+                cpc3 = cpc[2,:]
+            elif len(filename_c)==0:
+                time_cpc=timem
+                cpc10=np.nan*np.empty([len(timem)])
+                cpc3=np.nan*np.empty([len(timem)])
+            else:
+                raise ValueError('find too many files: '+filename_c)
+            
+            # some quality checks
+            (cpc3,cpc10) = qc_cpc_air(cpc3, cpc10)
+            
+            # read in PCASP
+            (time_merge,size,pcasp,timeunit,pcaspunit,pcasplongname)=read_merged_size(filename_merge,'totalnum_pcasp')
+            pcasp=qc_remove_neg(pcasp)
+            if len(time_merge)!=len(time_cpc):
+                raise ValueError('time dimension is inconsistent ')
+            
+            # exclude 30min after takeoff and before landing
+            cpc3 = qc_mask_takeoff_landing(time_cpc,cpc3)
+            cpc10 = qc_mask_takeoff_landing(time_cpc,cpc10)
+            pcasp = qc_mask_takeoff_landing(time_cpc,pcasp)
+            
+            cpc10_o=np.hstack((cpc10_o, cpc10))
+            cpc3_o=np.hstack((cpc3_o, cpc3))
+            uhsas100_o=np.hstack((uhsas100_o, pcasp))
+        
+        #%% read in flight data (for CSET and SOCRATES)
+        elif campaign in ['CSET', 'SOCRATES']:
+            filename = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
+            if len(filename)==1 or len(filename)==2:  # SOCRATES has two flights in 20180217, choose the later one
+                (time_cpc,cpc10,timeunit,cpc10unit,cpc10longname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'CONCN')
+                if campaign=='CSET':
+                    (time_cpc,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'CONCU100_RWOOU')
+                elif campaign=='SOCRATES':
+                    # there are two variables: CONCU100_CVIU and CONCU100_LWII
+                    (time_cpc,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'CONCU100_LWII')
+            else:
+                raise ValueError('find too many files: '+filename)
+            
+            # some quality checks
+            uhsas100=qc_remove_neg(uhsas100)
+            
+            # exclude 30min after takeoff and before landing
+            cpc10 = qc_mask_takeoff_landing(time_cpc,cpc10)
+            uhsas100 = qc_mask_takeoff_landing(time_cpc,uhsas100)
+            
+            cpc10_o=np.hstack((cpc10_o, cpc10))
+            uhsas100_o=np.hstack((uhsas100_o, uhsas100))
+            
+    
+    #%% calculate statistics
+    
+    # select only valid data in obs and the corresponding data in models
+    idx100 = ~np.isnan(uhsas100_o)
+    idx10 = ~np.isnan(cpc10_o)
+    idx3 = ~np.isnan(cpc3_o)
+    
+    mean100 = [None]*(nmodels+1)
+    mean10 = [None]*(nmodels+1)
+    mean3 = [None]*(nmodels+1)
+    std100 = [None]*(nmodels+1)
+    std10 = [None]*(nmodels+1)
+    std3 = [None]*(nmodels+1)
+    bias100 = [None]*(nmodels)
+    bias10 = [None]*(nmodels)
+    bias3 = [None]*(nmodels)
+    corr100 = [None]*(nmodels)
+    corr10 = [None]*(nmodels)
+    corr3 = [None]*(nmodels)
+    rmse100 = [None]*(nmodels)
+    rmse10 = [None]*(nmodels)
+    rmse3 = [None]*(nmodels)
+    p10_100 = [None]*(nmodels+1)
+    p10_10 = [None]*(nmodels+1)
+    p10_3 = [None]*(nmodels+1)
+    p25_100 = [None]*(nmodels+1)
+    p25_10 = [None]*(nmodels+1)
+    p25_3 = [None]*(nmodels+1)
+    p75_100 = [None]*(nmodels+1)
+    p75_10 = [None]*(nmodels+1)
+    p75_3 = [None]*(nmodels+1)
+    p90_100 = [None]*(nmodels+1)
+    p90_10 = [None]*(nmodels+1)
+    p90_3 = [None]*(nmodels+1)
+        
+    if sum(idx10)/len(idx10)<0.1:   # two few observation available
+        # for obs
+        mean10[nmodels] = missing_value
+        std10[nmodels] = missing_value
+        p10_10[nmodels] = missing_value
+        p25_10[nmodels] = missing_value
+        p75_10[nmodels] = missing_value
+        p90_10[nmodels] = missing_value
+        # for models
+        for mm in range(nmodels):
+            mean10[mm] = np.nanmean(ncn10_m[mm][idx10])
+            std10[mm] = np.nanstd(ncn10_m[mm][idx10])
+            p10_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],10)
+            p25_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],25)
+            p75_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],75)
+            p90_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],90)
+            bias10[mm] =  missing_value
+            corr10[mm] = [missing_value, missing_value]
+            rmse10[mm] = missing_value
+    else:
+        # for obs
+        mean10[nmodels] = np.nanmean(cpc10_o[idx10])
+        std10[nmodels] = np.nanstd(cpc10_o[idx10])
+        p10_10[nmodels] = np.nanpercentile(cpc10_o[idx10],10)
+        p25_10[nmodels] = np.nanpercentile(cpc10_o[idx10],25)
+        p75_10[nmodels] = np.nanpercentile(cpc10_o[idx10],75)
+        p90_10[nmodels] = np.nanpercentile(cpc10_o[idx10],90)
+        # for models
+        for mm in range(nmodels):
+            mean10[mm] = np.nanmean(ncn10_m[mm][idx10])
+            std10[mm] = np.nanstd(ncn10_m[mm][idx10])
+            p10_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],10)
+            p25_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],25)
+            p75_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],75)
+            p90_10[mm] = np.nanpercentile(ncn10_m[mm][idx10],90)
+            bias10[mm] = mean10[mm] - mean10[nmodels]
+            c10 = scipy.stats.pearsonr(ncn10_m[mm][idx10],cpc10_o[idx10])
+            corr10[mm] = [c10[0],c10[1]]
+            rmse10[mm] = np.sqrt(((ncn10_m[mm][idx10]-cpc10_o[idx10])**2).mean())
+            
+    if sum(idx100)/len(idx100)<0.1:   # two few observation available
+        # for obs
+        mean100[nmodels] = missing_value
+        std100[nmodels] = missing_value
+        p10_100[nmodels] = missing_value
+        p25_100[nmodels] = missing_value
+        p75_100[nmodels] = missing_value
+        p90_100[nmodels] = missing_value
+        # for models
+        for mm in range(nmodels):
+            mean100[mm] = np.nanmean(ncn100_m[mm][idx100])
+            std100[mm] = np.nanstd(ncn100_m[mm][idx100])
+            p10_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],10)
+            p25_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],25)
+            p75_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],75)
+            p90_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],90)
+            bias100[mm] =  missing_value
+            corr100[mm] = [missing_value, missing_value]
+            rmse100[mm] = missing_value
+    else:
+        # for obs
+        mean100[nmodels] = np.nanmean(uhsas100_o[idx100])
+        std100[nmodels] = np.nanstd(uhsas100_o[idx100])
+        p10_100[nmodels] = np.nanpercentile(uhsas100_o[idx100],10)
+        p25_100[nmodels] = np.nanpercentile(uhsas100_o[idx100],25)
+        p75_100[nmodels] = np.nanpercentile(uhsas100_o[idx100],75)
+        p90_100[nmodels] = np.nanpercentile(uhsas100_o[idx100],90)
+        # for models
+        for mm in range(nmodels):
+            mean100[mm] = np.nanmean(ncn100_m[mm][idx100])
+            std100[mm] = np.nanstd(ncn100_m[mm][idx100])
+            p10_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],10)
+            p25_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],25)
+            p75_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],75)
+            p90_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],90)
+            bias100[mm] = mean100[mm] - mean100[nmodels]
+            c100 = scipy.stats.pearsonr(ncn100_m[mm][idx100],uhsas100_o[idx100])
+            corr100[mm] = [c100[0],c100[1]]
+            rmse100[mm] = np.sqrt(((ncn100_m[mm][idx100]-uhsas100_o[idx100])**2).mean())
+            
+    if len(idx3)==0 or sum(idx3)/len(idx3)<0.1:   # two few observation available
+        # for obs
+        mean3[nmodels] = missing_value
+        std3[nmodels] = missing_value
+        p10_3[nmodels] = missing_value
+        p25_3[nmodels] = missing_value
+        p75_3[nmodels] = missing_value
+        p90_3[nmodels] = missing_value
+        # for models
+        for mm in range(nmodels):
+            mean3[mm] = np.nanmean(ncn3_m[mm][idx3])
+            std3[mm] = np.nanstd(ncn3_m[mm][idx3])
+            p10_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],10)
+            p25_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],25)
+            p75_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],75)
+            p90_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],90)
+            bias3[mm] =  missing_value
+            corr3[mm] = [missing_value, missing_value]
+            rmse3[mm] = missing_value
+    else:
+        # for obs
+        mean3[nmodels] = np.nanmean(cpc3_o[idx3])
+        std3[nmodels] = np.nanstd(cpc3_o[idx3])
+        p10_3[nmodels] = np.nanpercentile(cpc3_o[idx3],10)
+        p25_3[nmodels] = np.nanpercentile(cpc3_o[idx3],25)
+        p75_3[nmodels] = np.nanpercentile(cpc3_o[idx3],75)
+        p90_3[nmodels] = np.nanpercentile(cpc3_o[idx3],90)
+        # for models
+        for mm in range(nmodels):
+            mean3[mm] = np.nanmean(ncn3_m[mm][idx3])
+            std3[mm] = np.nanstd(ncn3_m[mm][idx3])
+            p10_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],10)
+            p25_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],25)
+            p75_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],75)
+            p90_3[mm] = np.nanpercentile(ncn3_m[mm][idx3],90)
+            bias3[mm] = mean3[mm] - mean3[nmodels]
+            c3 = scipy.stats.pearsonr(ncn3_m[mm][idx3],cpc3_o[idx3])
+            corr3[mm] = [c3[0],c3[1]]
+            rmse3[mm] = np.sqrt(((ncn3_m[mm][idx3]-cpc3_o[idx3])**2).mean())
+    
+    
+    #%% write out files
+    
+    if campaign in ['HISCALE', 'ACEENA']:   
+        outfile = figpath_aircraft_statistics+'statistics_CN10nm_'+campaign+'_'+IOP+'.txt'
+    elif campaign in ['CSET', 'SOCRATES']:
+        outfile = figpath_aircraft_statistics+'statistics_CN10nm_'+campaign+'.txt'
+    
+    print('write statistics to file '+outfile)
+    
+    with open(outfile, 'w') as f:
+        f.write('statistics of Aerosol Number Concentration comparing with CPC(>10nm). sample size '+format(sum(idx10))+'\n')
+        line1 = list(Model_List)
+        line1.insert(0,' --- ')
+        line1.append('OBS')
+        for ii in range(len(line1)):
+            f.write(format(line1[ii],'10s')+', ')
+        # write mean
+        f.write('\n mean,\t')
+        for ii in range(len(mean10)):
+            f.write(format(mean10[ii],'10.2f')+', ')
+        # write std
+        f.write('\n std. dev.,')
+        for ii in range(len(std10)):
+            f.write(format(std10[ii],'10.2f')+', ')
+        # write percentiles
+        f.write('\n 10% percentile: ')
+        for ii in range(len(p10_10)):
+            f.write(format(p10_10[ii],'10.2f')+', ')
+        f.write('\n 25% percentile: ')
+        for ii in range(len(p25_10)):
+            f.write(format(p25_10[ii],'10.2f')+', ')
+        f.write('\n 75% percentile: ')
+        for ii in range(len(p75_10)):
+            f.write(format(p75_10[ii],'10.2f')+', ')
+        f.write('\n 90% percentile: ')
+        for ii in range(len(p90_10)):
+            f.write(format(p90_10[ii],'10.2f')+', ')
+        # write bias
+        f.write('\n bias,\t')
+        for ii in range(len(bias10)):
+            f.write(format(bias10[ii],'10.2f')+', ')
+        # write rmse
+        f.write('\n RMSE,\t')
+        for ii in range(len(rmse10)):
+            f.write(format(rmse10[ii],'10.2f')+', ')
+        # write correlation
+        f.write('\n corrcoef,\t')
+        for ii in range(len(rmse10)):
+            f.write(format(corr10[ii][0],'10.4f')+', ')
+        # write p value of correlation
+        f.write('\n P_corr,\t')
+        for ii in range(len(rmse10)):
+            f.write(format(corr10[ii][1],'10.2f')+', ')
+            
+    
+    if campaign in ['HISCALE', 'ACEENA']:   
+        outfile = figpath_aircraft_statistics+'statistics_CN3nm_'+campaign+'_'+IOP+'.txt'
+        print('write statistics to file '+outfile)
+        with open(outfile, 'w') as f:
+            f.write('statistics of Aerosol Number Concentration comparing with CPC(>3nm). sample size '+format(sum(idx3))+'\n')
+            line1 = list(Model_List)
+            line1.insert(0,' --- ')
+            line1.append('OBS')
+            for ii in range(len(line1)):
+                f.write(format(line1[ii],'10s')+', ')
+            # write mean
+            f.write('\n mean,\t')
+            for ii in range(len(mean3)):
+                f.write(format(mean3[ii],'10.2f')+', ')
+            # write std
+            f.write('\n std. dev.,')
+            for ii in range(len(std3)):
+                f.write(format(std3[ii],'10.2f')+', ')
+            # write percentiles
+            f.write('\n 10% percentile: ')
+            for ii in range(len(p10_3)):
+                f.write(format(p10_3[ii],'10.2f')+', ')
+            f.write('\n 25% percentile: ')
+            for ii in range(len(p25_3)):
+                f.write(format(p25_3[ii],'10.2f')+', ')
+            f.write('\n 75% percentile: ')
+            for ii in range(len(p75_3)):
+                f.write(format(p75_3[ii],'10.2f')+', ')
+            f.write('\n 90% percentile: ')
+            for ii in range(len(p90_3)):
+                f.write(format(p90_3[ii],'10.2f')+', ')
+            # write bias
+            f.write('\n bias,\t')
+            for ii in range(len(bias3)):
+                f.write(format(bias3[ii],'10.2f')+', ')
+            # write rmse
+            f.write('\n RMSE,\t')
+            for ii in range(len(rmse3)):
+                f.write(format(rmse3[ii],'10.2f')+', ')
+            # write correlation
+            f.write('\n corrcoef,\t')
+            for ii in range(len(rmse3)):
+                f.write(format(corr3[ii][0],'10.4f')+', ')
+            # write p value of correlation
+            f.write('\n P_corr,\t')
+            for ii in range(len(rmse3)):
+                f.write(format(corr3[ii][1],'10.2f')+', ')
+            
+            
+    if campaign in ['HISCALE', 'ACEENA']:   
+        outfile = figpath_aircraft_statistics+'statistics_CN100nm_'+campaign+'_'+IOP+'.txt'
+    elif campaign in ['CSET', 'SOCRATES']:
+        outfile = figpath_aircraft_statistics+'statistics_CN100nm_'+campaign+'.txt'
+    print('write statistics to file '+outfile)
+    
+    with open(outfile, 'w') as f:
+        if campaign in ['CSET', 'SOCRATES']:
+            f.write('statistics of Aerosol Number Concentration comparing with UHSAS(>100nm). sample size '+format(sum(idx100))+'\n')
+        elif campaign=='ACEENA':
+            f.write('statistics of Aerosol Number Concentration comparing with PCASP(>100nm). sample size '+format(sum(idx100))+'\n')
+        elif campaign=='HISCALE':
+            f.write('statistics of Aerosol Number Concentration comparing with PCASP(>120nm for IOP1, >90nm for IOP2). sample size '+format(sum(idx100))+'\n')
+        line1 = list(Model_List)
+        line1.insert(0,' --- ')
+        line1.append('OBS')
+        for ii in range(len(line1)):
+            f.write(format(line1[ii],'10s')+', ')
+        # write mean
+        f.write('\n mean,\t')
+        for ii in range(len(mean100)):
+            f.write(format(mean100[ii],'10.2f')+', ')
+        # write std
+        f.write('\n std. dev.,')
+        for ii in range(len(std100)):
+            f.write(format(std100[ii],'10.2f')+', ')
+        # write percentiles
+        f.write('\n 10% percentile: ')
+        for ii in range(len(p10_100)):
+            f.write(format(p10_100[ii],'10.2f')+', ')
+        f.write('\n 25% percentile: ')
+        for ii in range(len(p25_100)):
+            f.write(format(p25_100[ii],'10.2f')+', ')
+        f.write('\n 75% percentile: ')
+        for ii in range(len(p75_100)):
+            f.write(format(p75_100[ii],'10.2f')+', ')
+        f.write('\n 90% percentile: ')
+        for ii in range(len(p90_100)):
+            f.write(format(p90_100[ii],'10.2f')+', ')
+        # write bias
+        f.write('\n bias,\t')
+        for ii in range(len(bias100)):
+            f.write(format(bias100[ii],'10.2f')+', ')
+        # write rmse
+        f.write('\n RMSE,\t')
+        for ii in range(len(rmse100)):
+            f.write(format(rmse100[ii],'10.2f')+', ')
+        # write correlation
+        f.write('\n corrcoef,\t')
+        for ii in range(len(rmse100)):
+            f.write(format(corr100[ii][0],'10.4f')+', ')
+        # write p value of correlation
+        f.write('\n P_corr,\t')
+        for ii in range(len(rmse100)):
+            f.write(format(corr100[ii][1],'10.2f')+', ')
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/calc_statistic_sfc_CN.py b/src/esmac_diags/plotting/calc_statistic_sfc_CN.py
new file mode 100644
index 0000000..d474e0d
--- /dev/null
+++ b/src/esmac_diags/plotting/calc_statistic_sfc_CN.py
@@ -0,0 +1,542 @@
+"""
+# calculate statistics (mean, bias, correlation, RMSE) of Aerosol number concentration
+# for surface measurements
+# compare models and CPC measurements
+"""
+
+# import matplotlib.pyplot as plt
+import os
+import glob
+import numpy as np
+import scipy.stats
+from ..subroutines.time_format_change import yyyymmdd2cday, cday2mmdd, timeunit2cday
+from ..subroutines.read_ARMdata import read_cpc,read_uhsas
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.quality_control import  qc_remove_neg, qc_mask_qcflag_cpc,qc_mask_qcflag
+from ..subroutines.specific_data_treatment import avg_time_1d
+
+
+def run_plot(settings):
+    #%% variables from settings
+
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    cpcsfcpath = settings['cpcsfcpath']
+    cpcusfcpath = settings['cpcusfcpath']
+    uhsassfcpath = settings['uhsassfcpath']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+    figpath_sfc_statistics = settings['figpath_sfc_statistics']
+
+    IOP = settings.get('IOP', None)
+    
+    #%% other settings
+    # set time range you want to average
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date,'noleap')
+    cday2 = yyyymmdd2cday(end_date,'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+    
+            
+    if not os.path.exists(figpath_sfc_statistics):
+        os.makedirs(figpath_sfc_statistics)
+    
+    missing_value = np.nan
+    
+    
+    #%% read in obs data
+    if campaign=='ACEENA':
+        # cpc
+        if IOP=='IOP1':
+            lst = glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.2017062*')+glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201707*')
+        elif IOP=='IOP2':
+            lst = glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201801*')+glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201802*')
+        lst.sort()
+        t_cpc=np.empty(0)
+        cpc=np.empty(0)
+        for filename in lst:
+            (time,data,qc,timeunit,cpcunit)=read_cpc(filename)
+            data = qc_mask_qcflag(data,qc)
+            timestr=timeunit.split(' ')
+            date=timestr[2]
+            cday=yyyymmdd2cday(date,'noleap')
+            # average in time for consistent comparison with model
+            time2=np.arange(0,86400,3600)
+            data2 = avg_time_1d(np.array(time),np.array(data),time2)
+            t_cpc=np.hstack((t_cpc, cday+time2/86400))
+            cpc=np.hstack((cpc, data2))
+        # fill missing days
+        t_cpc2=np.arange(cday1*24,cday2*24+0.01,1)/24.
+        cpc2=avg_time_1d(t_cpc,cpc,t_cpc2)
+        cpc=cpc2
+        t_cpc=t_cpc2
+        cpc = qc_remove_neg(cpc)
+        # no cpcu
+        t_cpcu = np.array(np.nan)
+        cpcu = np.array(np.nan)
+        
+        # uhsas
+        if IOP=='IOP1':
+            lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.2017062*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201707*')
+        elif IOP=='IOP2':
+            lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201801*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201802*')
+        lst.sort()
+        t_uhsas=np.empty(0)
+        uhsas=np.empty(0)
+        for filename in lst:
+            (time,dmin,dmax,data,timeunit,uhsasunit,long_name)=read_uhsas(filename)
+            # sum up uhsas data for size >100nm
+            data=np.ma.filled(data,np.nan)
+            idx100 = dmin>=100
+            data1=np.nansum(data[:,idx100],1)
+            # average in time for consistent comparison with model
+            time2=np.arange(0,86400,3600)
+            data2 = avg_time_1d(np.array(time),np.array(data1),time2)
+            t_uhsas=np.hstack((t_uhsas, timeunit2cday(timeunit)+time2/86400))
+            uhsas=np.hstack((uhsas, data2))
+        # fill missing days
+        t_uhsas2=np.arange(cday1*24,cday2*24+0.01,1)/24.
+        uhsas2=avg_time_1d(t_uhsas,uhsas,t_uhsas2)
+        uhsas=uhsas2
+        t_uhsas=t_uhsas2
+        
+        
+    elif campaign=='HISCALE':  
+        # cpc
+        if IOP=='IOP1':
+            lst = glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201604*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201605*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201606*')
+        elif IOP=='IOP2':
+            lst = glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201608*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201609*')
+        lst.sort()
+        t_cpc=np.empty(0)
+        cpc=np.empty(0)
+        if len(lst)==0:
+            t_cpc = np.array(np.nan)
+            cpc = np.array(np.nan)
+        else:
+            for filename in lst:
+                (time,data,qc,timeunit,cpcunit)=read_cpc(filename)
+                data = qc_mask_qcflag_cpc(data,qc)
+                timestr=timeunit.split(' ')
+                date=timestr[2]
+                cday=yyyymmdd2cday(date,'noleap')
+                t_cpc= np.hstack((t_cpc,cday+time/86400))
+                cpc=np.hstack((cpc,data))
+            # average in time for consistent comparison with model
+            t_cpc2=np.arange(cday1*24,cday2*24+0.01,1)/24.
+            cpc2=avg_time_1d(t_cpc,cpc,t_cpc2)
+            cpc=cpc2
+            t_cpc=t_cpc2
+            cpc = qc_remove_neg(cpc)
+      
+        # cpcu
+        if IOP=='IOP1':
+            lst = glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201604*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201605*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201606*')
+        elif IOP=='IOP2':
+            lst = glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201608*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201609*')
+        lst.sort()
+        t_cpcu=np.empty(0)
+        cpcu=np.empty(0)
+        if len(lst)==0:
+            t_cpcu = np.array(np.nan)
+            cpcu = np.array(np.nan)
+        else:
+            for filename in lst:
+                (time,data,qc,timeunit,cpcuunit)=read_cpc(filename)
+                data = qc_mask_qcflag_cpc(data,qc)
+                timestr=timeunit.split(' ')
+                date=timestr[2]
+                cday=yyyymmdd2cday(date,'noleap')
+                # t_cpcu= np.hstack((t_cpcu,cday+time/86400))
+                # cpcu=np.hstack((cpcu,data))
+                # average in time for consistent comparison with model
+                time2=np.arange(0,86400,3600)
+                data2 = avg_time_1d(np.array(time),np.array(data),time2)
+                t_cpcu=np.hstack((t_cpcu, cday+time2/86400))
+                cpcu=np.hstack((cpcu, data2))
+            cpcu = qc_remove_neg(cpcu)
+            # # average in time for consistent comparison with model
+            # t_cpcu2=np.arange(t_cpcu[0]*24,t_cpcu[-1]*24,1)/24.
+            # cpcu2=avg_time_1d(t_cpcu,cpcu,t_cpcu2)
+            # cpcu=cpcu2
+            # t_cpcu=t_cpcu2
+            
+        # uhsas
+        if IOP=='IOP1':
+            lst = glob.glob(uhsassfcpath+'sgpaosuhsasS01.a1.201604*')+glob.glob(uhsassfcpath+'sgpaosuhsasS01.a1.201605*')+glob.glob(uhsassfcpath+'sgpaosuhsasS01.a1.201606*')
+        elif IOP=='IOP2':
+            lst = glob.glob(uhsassfcpath+'sgpaosuhsasS01.a1.201608*')+glob.glob(uhsassfcpath+'sgpaosuhsasS01.a1.201609*')
+        lst.sort()
+        t_uhsas=np.empty(0)
+        uhsas=np.empty(0)
+        for filename in lst:
+            (time,dmin,dmax,data,timeunit,uhsasunit,long_name)=read_uhsas(filename)
+            # sum up uhsas data for size >100nm
+            data=np.ma.filled(data,np.nan)
+            idx100 = dmin>=100
+            data1=np.nansum(data[:,idx100],1)
+            # average in time for consistent comparison with model
+            time2=np.arange(0,86400,3600)
+            data2 = avg_time_1d(np.array(time),np.array(data1),time2)
+            t_uhsas=np.hstack((t_uhsas, timeunit2cday(timeunit)+time2/86400))
+            uhsas=np.hstack((uhsas, data2))
+        # fill missing days
+        t_uhsas2=np.arange(cday1*24,cday2*24+0.01,1)/24.
+        uhsas2=avg_time_1d(t_uhsas,uhsas,t_uhsas2)
+        uhsas=uhsas2
+        t_uhsas=t_uhsas2
+        
+        
+    #%% read in models
+    ncn100_m = []
+    ncn_m = []
+    nucn_m = []
+    nmodels = len(Model_List)
+    for mm in range(nmodels):
+        tmp_ncn100=np.empty(0)
+        tmp_ncn=np.empty(0)
+        tmp_nucn=np.empty(0)
+        timem=np.empty(0)
+        for cday in range(cday1,cday2+1):
+            mmdd=cday2mmdd(cday)
+            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
+            
+            filename_input = E3SM_sfc_path+'SFC_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            (time,ncn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCN')
+            (time,nucn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NUCN')
+            (time,ncnall,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCNall')
+            
+            timem = np.hstack((timem,time))
+            tmp_ncn = np.hstack((tmp_ncn,ncn*1e-6))
+            tmp_nucn = np.hstack((tmp_nucn,nucn*1e-6))
+            tmp_ncn100 = np.hstack((tmp_ncn100, np.sum(ncnall[100:,:],0)*1e-6))  
+        
+        ncn100_m.append(tmp_ncn100)
+        ncn_m.append(tmp_ncn)
+        nucn_m.append(tmp_nucn)
+        
+    #%% calculate statistics
+    
+    # only choose the prescribed time range
+    idx = np.logical_and(t_cpc>=cday1, t_cpc<=cday2)
+    cpc=cpc[idx]
+    t_cpc=t_cpc[idx]
+    idx = np.logical_and(t_cpcu>=cday1, t_cpcu<=cday2)
+    cpcu=cpcu[idx]
+    t_cpcu=t_cpcu[idx]
+    idx = np.logical_and(t_uhsas>=cday1, t_uhsas<=cday2)
+    uhsas=uhsas[idx]
+    t_uhsas=t_uhsas[idx]
+    idx = np.logical_and(timem>=cday1, timem<=cday2)
+    for mm in range(nmodels):
+        ncn100_m[mm]=ncn100_m[mm][idx]
+        ncn_m[mm]=ncn_m[mm][idx]
+        nucn_m[mm]=nucn_m[mm][idx]
+    timem=timem[idx]
+    
+    
+    
+    # select only valid data in obs and the corresponding data in models
+    idx100 = ~np.isnan(uhsas)
+    idx10 = ~np.isnan(cpc)
+    idx3 = ~np.isnan(cpcu)
+    
+    mean100 = [None]*(nmodels+1)
+    mean10 = [None]*(nmodels+1)
+    mean3 = [None]*(nmodels+1)
+    std100 = [None]*(nmodels+1)
+    std10 = [None]*(nmodels+1)
+    std3 = [None]*(nmodels+1)
+    bias100 = [None]*(nmodels)
+    bias10 = [None]*(nmodels)
+    bias3 = [None]*(nmodels)
+    corr100 = [None]*(nmodels)
+    corr10 = [None]*(nmodels)
+    corr3 = [None]*(nmodels)
+    rmse100 = [None]*(nmodels)
+    rmse10 = [None]*(nmodels)
+    rmse3 = [None]*(nmodels)
+    p10_100 = [None]*(nmodels+1)
+    p10_10 = [None]*(nmodels+1)
+    p10_3 = [None]*(nmodels+1)
+    p25_100 = [None]*(nmodels+1)
+    p25_10 = [None]*(nmodels+1)
+    p25_3 = [None]*(nmodels+1)
+    p75_100 = [None]*(nmodels+1)
+    p75_10 = [None]*(nmodels+1)
+    p75_3 = [None]*(nmodels+1)
+    p90_100 = [None]*(nmodels+1)
+    p90_10 = [None]*(nmodels+1)
+    p90_3 = [None]*(nmodels+1)
+        
+    if len(idx100)==0 or sum(idx100)/len(idx100)<0.1:   # two few observation available
+        # for obs
+        mean100[nmodels] = missing_value
+        std100[nmodels] = missing_value
+        p10_100[nmodels] = missing_value
+        p25_100[nmodels] = missing_value
+        p75_100[nmodels] = missing_value
+        p90_100[nmodels] = missing_value
+        # for models
+        for mm in range(nmodels):
+            mean100[mm] = np.nanmean(ncn100_m[mm][idx100])
+            std100[mm] = np.nanstd(ncn100_m[mm][idx100])
+            p10_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],10)
+            p25_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],25)
+            p75_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],75)
+            p90_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],90)
+            bias100[mm] =  missing_value
+            corr100[mm] = [missing_value, missing_value]
+            rmse100[mm] = missing_value
+    else:
+        # for obs
+        mean100[nmodels] = np.nanmean(uhsas[idx100])
+        std100[nmodels] = np.nanstd(uhsas[idx100])
+        p10_100[nmodels] = np.nanpercentile(uhsas[idx100],10)
+        p25_100[nmodels] = np.nanpercentile(uhsas[idx100],25)
+        p75_100[nmodels] = np.nanpercentile(uhsas[idx100],75)
+        p90_100[nmodels] = np.nanpercentile(uhsas[idx100],90)
+        # for models
+        for mm in range(nmodels):
+            mean100[mm] = np.nanmean(ncn100_m[mm][idx100])
+            std100[mm] = np.nanstd(ncn100_m[mm][idx100])
+            p10_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],10)
+            p25_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],25)
+            p75_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],75)
+            p90_100[mm] = np.nanpercentile(ncn100_m[mm][idx100],90)
+            bias100[mm] = mean100[mm] - mean100[nmodels]
+            c100 = scipy.stats.pearsonr(ncn100_m[mm][idx100],uhsas[idx100])
+            corr100[mm] = [c100[0],c100[1]]
+            rmse100[mm] = np.sqrt(((ncn100_m[mm][idx100]-uhsas[idx100])**2).mean())
+    
+    if len(idx10)==0 or sum(idx10)/len(idx10)<0.1:   # two few observation available
+        # for obs
+        mean10[nmodels] = missing_value
+        std10[nmodels] = missing_value
+        p10_10[nmodels] = missing_value
+        p25_10[nmodels] = missing_value
+        p75_10[nmodels] = missing_value
+        p90_10[nmodels] = missing_value
+        # for models
+        for mm in range(nmodels):
+            mean10[mm] = np.nanmean(ncn_m[mm][idx10])
+            std10[mm] = np.nanstd(ncn_m[mm][idx10])
+            p10_10[mm] = np.nanpercentile(ncn_m[mm][idx10],10)
+            p25_10[mm] = np.nanpercentile(ncn_m[mm][idx10],25)
+            p75_10[mm] = np.nanpercentile(ncn_m[mm][idx10],75)
+            p90_10[mm] = np.nanpercentile(ncn_m[mm][idx10],90)
+            bias10[mm] =  missing_value
+            corr10[mm] = [missing_value, missing_value]
+            rmse10[mm] = missing_value
+    else:
+        # for obs
+        mean10[nmodels] = np.nanmean(cpc[idx10])
+        std10[nmodels] = np.nanstd(cpc[idx10])
+        p10_10[nmodels] = np.nanpercentile(cpc[idx10],10)
+        p25_10[nmodels] = np.nanpercentile(cpc[idx10],25)
+        p75_10[nmodels] = np.nanpercentile(cpc[idx10],75)
+        p90_10[nmodels] = np.nanpercentile(cpc[idx10],90)
+        # for models
+        for mm in range(nmodels):
+            mean10[mm] = np.nanmean(ncn_m[mm][idx10])
+            std10[mm] = np.nanstd(ncn_m[mm][idx10])
+            p10_10[mm] = np.nanpercentile(ncn_m[mm][idx10],10)
+            p25_10[mm] = np.nanpercentile(ncn_m[mm][idx10],25)
+            p75_10[mm] = np.nanpercentile(ncn_m[mm][idx10],75)
+            p90_10[mm] = np.nanpercentile(ncn_m[mm][idx10],90)
+            bias10[mm] = mean10[mm] - mean10[nmodels]
+            c10 = scipy.stats.pearsonr(ncn_m[mm][idx10],cpc[idx10])
+            corr10[mm] = [c10[0],c10[1]]
+            rmse10[mm] = np.sqrt(((ncn_m[mm][idx10]-cpc[idx10])**2).mean())
+    
+    if len(idx3)==0 or sum(idx3)/len(idx3)<0.1:   # two few observation available
+        # for obs
+        mean3[nmodels] = missing_value
+        std3[nmodels] = missing_value
+        p10_3[nmodels] = missing_value
+        p25_3[nmodels] = missing_value
+        p75_3[nmodels] = missing_value
+        p90_3[nmodels] = missing_value
+        # for models
+        for mm in range(nmodels):
+            mean3[mm] = np.nanmean(nucn_m[mm][idx3])
+            std3[mm] = np.nanstd(nucn_m[mm][idx3])
+            p10_3[mm] = np.nanpercentile(nucn_m[mm][idx3],10)
+            p25_3[mm] = np.nanpercentile(nucn_m[mm][idx3],25)
+            p75_3[mm] = np.nanpercentile(nucn_m[mm][idx3],75)
+            p90_3[mm] = np.nanpercentile(nucn_m[mm][idx3],90)
+            bias3[mm] =  missing_value
+            corr3[mm] = [missing_value, missing_value]
+            rmse3[mm] = missing_value
+    else:
+        # for obs
+        mean3[nmodels] = np.nanmean(cpcu[idx3])
+        std3[nmodels] = np.nanstd(cpcu[idx3])
+        p10_3[nmodels] = np.nanpercentile(cpcu[idx3],10)
+        p25_3[nmodels] = np.nanpercentile(cpcu[idx3],25)
+        p75_3[nmodels] = np.nanpercentile(cpcu[idx3],75)
+        p90_3[nmodels] = np.nanpercentile(cpcu[idx3],90)
+        # for models
+        for mm in range(nmodels):
+            mean3[mm] = np.nanmean(nucn_m[mm][idx3])
+            std3[mm] = np.nanstd(nucn_m[mm][idx3])
+            p10_3[mm] = np.nanpercentile(nucn_m[mm][idx3],10)
+            p25_3[mm] = np.nanpercentile(nucn_m[mm][idx3],25)
+            p75_3[mm] = np.nanpercentile(nucn_m[mm][idx3],75)
+            p90_3[mm] = np.nanpercentile(nucn_m[mm][idx3],90)
+            bias3[mm] = mean3[mm] - mean3[nmodels]
+            c3 = scipy.stats.pearsonr(nucn_m[mm][idx3],cpcu[idx3])
+            corr3[mm] = [c3[0],c3[1]]
+            rmse3[mm] = np.sqrt(((nucn_m[mm][idx3]-cpcu[idx3])**2).mean())
+    
+    
+    #%% write out files
+    
+    outfile = figpath_sfc_statistics+'statistics_CN10nm_'+campaign+'_'+IOP+'.txt'
+    print('write statistics to file '+outfile)
+    
+    with open(outfile, 'w') as f:
+        f.write('statistics of Aerosol Number Concentration comparing with CPC(>10nm). sample size '+format(sum(idx10))+'\n')
+        line1 = list(Model_List)
+        line1.insert(0,' --- ')
+        line1.append('OBS')
+        for ii in range(len(line1)):
+            f.write(format(line1[ii],'10s')+', ')
+        # write mean
+        f.write('\n mean,\t')
+        for ii in range(len(mean10)):
+            f.write(format(mean10[ii],'10.2f')+', ')
+        # write std
+        f.write('\n std. dev.,')
+        for ii in range(len(std10)):
+            f.write(format(std10[ii],'10.2f')+', ')
+        # write percentiles
+        f.write('\n 10% percentile: ')
+        for ii in range(len(p10_10)):
+            f.write(format(p10_10[ii],'10.2f')+', ')
+        f.write('\n 25% percentile: ')
+        for ii in range(len(p25_10)):
+            f.write(format(p25_10[ii],'10.2f')+', ')
+        f.write('\n 75% percentile: ')
+        for ii in range(len(p75_10)):
+            f.write(format(p75_10[ii],'10.2f')+', ')
+        f.write('\n 90% percentile: ')
+        for ii in range(len(p90_10)):
+            f.write(format(p90_10[ii],'10.2f')+', ')
+        # write bias
+        f.write('\n bias,\t')
+        for ii in range(len(bias10)):
+            f.write(format(bias10[ii],'10.2f')+', ')
+        # write rmse
+        f.write('\n RMSE,\t')
+        for ii in range(len(rmse10)):
+            f.write(format(rmse10[ii],'10.2f')+', ')
+        # write correlation
+        f.write('\n corrcoef,\t')
+        for ii in range(len(rmse10)):
+            f.write(format(corr10[ii][0],'10.4f')+', ')
+        # write p value of correlation
+        f.write('\n P_corr,\t')
+        for ii in range(len(rmse10)):
+            f.write(format(corr10[ii][1],'10.2f')+', ')
+            
+            
+    outfile = figpath_sfc_statistics+'statistics_CN3nm_'+campaign+'_'+IOP+'.txt'
+    print('write statistics to file '+outfile)
+    
+    with open(outfile, 'w') as f:
+        f.write('statistics of Aerosol Number Concentration comparing with CPC(>3nm). sample size '+format(sum(idx3))+'\n')
+        line1 = list(Model_List)
+        line1.insert(0,' --- ')
+        line1.append('OBS')
+        for ii in range(len(line1)):
+            f.write(format(line1[ii],'10s')+', ')
+        # write mean
+        f.write('\n mean,\t')
+        for ii in range(len(mean3)):
+            f.write(format(mean3[ii],'10.2f')+', ')
+        # write std
+        f.write('\n std. dev.,')
+        for ii in range(len(std3)):
+            f.write(format(std3[ii],'10.2f')+', ')
+        # write percentiles
+        f.write('\n 10% percentile: ')
+        for ii in range(len(p10_3)):
+            f.write(format(p10_3[ii],'10.2f')+', ')
+        f.write('\n 25% percentile: ')
+        for ii in range(len(p25_3)):
+            f.write(format(p25_3[ii],'10.2f')+', ')
+        f.write('\n 75% percentile: ')
+        for ii in range(len(p75_3)):
+            f.write(format(p75_3[ii],'10.2f')+', ')
+        f.write('\n 90% percentile: ')
+        for ii in range(len(p90_3)):
+            f.write(format(p90_3[ii],'10.2f')+', ')
+        # write bias
+        f.write('\n bias,\t')
+        for ii in range(len(bias3)):
+            f.write(format(bias3[ii],'10.2f')+', ')
+        # write rmse
+        f.write('\n RMSE,\t')
+        for ii in range(len(rmse3)):
+            f.write(format(rmse3[ii],'10.2f')+', ')
+        # write correlation
+        f.write('\n corrcoef,\t')
+        for ii in range(len(rmse3)):
+            f.write(format(corr3[ii][0],'10.4f')+', ')
+        # write p value of correlation
+        f.write('\n P_corr,\t')
+        for ii in range(len(rmse3)):
+            f.write(format(corr3[ii][1],'10.2f')+', ')
+        
+    
+    outfile = figpath_sfc_statistics+'statistics_CN100nm_'+campaign+'_'+IOP+'.txt'
+    print('write statistics to file '+outfile)
+    
+    with open(outfile, 'w') as f:
+        f.write('statistics of Aerosol Number Concentration comparing with UHSAS (>100nm). sample size '+format(sum(idx100))+'\n')
+        line1 = list(Model_List)
+        line1.insert(0,' --- ')
+        line1.append('OBS')
+        for ii in range(len(line1)):
+            f.write(format(line1[ii],'10s')+', ')
+        # write mean
+        f.write('\n mean,\t')
+        for ii in range(len(mean100)):
+            f.write(format(mean100[ii],'10.2f')+', ')
+        # write std
+        f.write('\n std. dev.,')
+        for ii in range(len(std100)):
+            f.write(format(std100[ii],'10.2f')+', ')
+        # write percentiles
+        f.write('\n 10% percentile: ')
+        for ii in range(len(p10_100)):
+            f.write(format(p10_100[ii],'10.2f')+', ')
+        f.write('\n 25% percentile: ')
+        for ii in range(len(p25_100)):
+            f.write(format(p25_100[ii],'10.2f')+', ')
+        f.write('\n 75% percentile: ')
+        for ii in range(len(p75_100)):
+            f.write(format(p75_100[ii],'10.2f')+', ')
+        f.write('\n 90% percentile: ')
+        for ii in range(len(p90_100)):
+            f.write(format(p90_100[ii],'10.2f')+', ')
+        # write bias
+        f.write('\n bias,\t')
+        for ii in range(len(bias100)):
+            f.write(format(bias100[ii],'10.2f')+', ')
+        # write rmse
+        f.write('\n RMSE,\t')
+        for ii in range(len(rmse100)):
+            f.write(format(rmse100[ii],'10.2f')+', ')
+        # write correlation
+        f.write('\n corrcoef,\t')
+        for ii in range(len(rmse100)):
+            f.write(format(corr100[ii][0],'10.4f')+', ')
+        # write p value of correlation
+        f.write('\n P_corr,\t')
+        for ii in range(len(rmse100)):
+            f.write(format(corr100[ii][1],'10.2f')+', ')
diff --git a/src/esmac_diags/plotting/calc_statistic_ship_CN.py b/src/esmac_diags/plotting/calc_statistic_ship_CN.py
new file mode 100644
index 0000000..8d7a982
--- /dev/null
+++ b/src/esmac_diags/plotting/calc_statistic_ship_CN.py
@@ -0,0 +1,395 @@
+"""
+# calculate statistics (mean, bias, correlation, RMSE) of Aerosol number concentration
+# for ship measurements
+# compare models and CPC/UHSAS measurements
+"""
+
+import os
+import glob
+import numpy as np
+import scipy.stats
+from ..subroutines.read_ARMdata import read_cpc, read_uhsas
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.time_format_change import  cday2mmdd
+from ..subroutines.specific_data_treatment import mask_model_ps, avg_time_1d
+from ..subroutines.quality_control import qc_mask_qcflag, qc_remove_neg
+
+
+def run_plot(settings):
+    #%% variables from settings
+
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    shipcpcpath = settings['shipcpcpath']
+    shipmetpath = settings['shipmetpath']
+    shipuhsaspath = settings['shipuhsaspath']
+    E3SM_ship_path = settings['E3SM_ship_path']
+    figpath_ship_statistics = settings['figpath_ship_statistics']
+
+    #%% other settings
+    if not os.path.exists(figpath_ship_statistics):
+        os.makedirs(figpath_ship_statistics)
+    missing_value = np.nan
+    
+    #%% find files
+    lst = glob.glob(E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[0]+'_shipleg*.nc')
+    lst.sort()
+    
+    nmodels = len(Model_List)
+    cpcall = np.empty(0)
+    uhsasall = np.empty(0)
+    ncn10all = []
+    ncn100all = []
+    for mm in range(nmodels):
+        ncn10all.append(np.empty(0))
+        ncn100all.append(np.empty(0))
+        
+    for ll in range(len(lst)):
+        
+        if campaign=='MAGIC':
+            legnum=lst[ll][-5:-3]
+        elif campaign=='MARCUS':
+            legnum=lst[ll][-4]
+        else: 
+            raise ValueError('please check campaign name: '+campaign)
+        print('legnum '+format(legnum))
+        
+        #%% read in model
+        datam = list()
+        databins = list()
+        for mm in range(nmodels):
+            filenamem = E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
+        
+            (timem,NCNall,timeunitm,datamunit,datamlongname)=read_E3SM(filenamem,'NCNall')
+            (timem,data,timeunitm,datamunit,datamlongname)=read_E3SM(filenamem,'NCN')
+        
+            datam.append(data*1e-6)    # change unit from 1/m3 to 1/cm3
+            databins.append(NCNall*1e-6)    # change unit from 1/m3 to 1/cm3
+            
+            # mask data where model grid is not at ocean surface (Ps is too different than obs)
+            filenamem = E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
+            (timem,psm,timeunitx,psmunit,psmlongname)=read_E3SM(filenamem,'PS')
+            datamask = mask_model_ps(timem,0.01*psm,legnum,campaign,shipmetpath)
+            
+            datam[mm][datamask]=np.nan
+            
+        year0 = str(int(timeunitm.split()[2][0:4])+1)
+        
+        #%% read in observations
+        # find the days related to the ship leg
+        day = [int(a) for a in timem]
+        day = list(set(day))
+        day.sort()
+    
+        # CPC    
+        t_cpc=np.empty(0)
+        cpc=np.empty(0)
+        for dd in day:
+            
+            if campaign=='MAGIC':
+                if int(legnum)<=9:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*')
+                    else:
+                        filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*')
+                else:
+                    filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*')
+                if len(filenameo)==0:
+                    continue  # some days may be missing
+            elif campaign=='MARCUS':
+                if int(legnum)<=2:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
+                    else:
+                        filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
+                else:
+                    filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
+                if len(filenameo)==0:
+                    continue  # some days may be missing
+                    
+                    
+            (time,obs,qc,timeunit,dataunit)=read_cpc(filenameo[0])
+            obs = qc_mask_qcflag(obs,qc)
+            t_cpc=np.hstack((t_cpc, dd+time/86400))
+            cpc=np.hstack((cpc, obs))
+            
+        # if time expands two years, add 365 days to the second year
+        if t_cpc[0]>t_cpc[-1]:
+            t_cpc[t_cpc<=t_cpc[-1]]=t_cpc[t_cpc<=t_cpc[-1]]+365
+    
+        # UHSAS
+        t_uh=np.empty(0)
+        uhsas=np.empty(0)
+        for dd in day:
+            
+            if campaign=='MAGIC':
+                if int(legnum)<=9:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
+                    else:
+                        filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*.cdf')
+                else:
+                    filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
+            elif campaign=='MARCUS':
+                if int(legnum)<=2:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
+                    else:
+                        filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
+                else:
+                    filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
+            
+            if len(filenameo)==0:
+                continue  # some days may be missing
+            if len(filenameo)>1:
+                raise ValueError('find too many files: '+filenameo)
+                
+            (time,dmin,dmax,obs,timeunit,uhunit,uhlongname)=read_uhsas(filenameo[0])
+            obs=np.ma.filled(obs)
+            obs=qc_remove_neg(obs)
+            uhsas=np.hstack((uhsas, np.nansum(obs,1)))
+            t_uh = np.hstack((t_uh,time/86400+dd))
+            
+        # if no obs available, fill one data with NaN
+        if len(t_uh)==0:
+            t_uh=[timem[0],timem[1]]
+            uhsas=np.full((2),np.nan)
+            
+        # if time expands two years, add 365 days to the second year
+        if t_uh[0]>t_uh[-1]:
+            t_uh[t_uh<=t_uh[-1]]=t_uh[t_uh<=t_uh[-1]]+365
+        
+        
+        #%% Calculate model aerosol number concentration for UHSAS size range
+        b1 = int(dmin[0])
+        b2 = int(dmax[-1])
+        datam2=list()
+        for mm in range(nmodels):
+            datam2.append(np.nansum(databins[mm][b1-1:b2,:],0))
+            datam2[mm][datamask]=np.nan
+        
+        #%% average into 1hr resolution
+        time0 = np.arange(timem[0],timem[-1],1./24)
+        cpc = avg_time_1d(t_cpc,cpc,time0)
+        uhsas = avg_time_1d(t_uh,uhsas,time0)
+        for mm in range(nmodels):
+            datam[mm] = avg_time_1d(timem,datam[mm],time0)
+            datam2[mm] = avg_time_1d(timem,datam2[mm],time0)
+            
+        #%% 
+        cpcall = np.hstack((cpcall,cpc))
+        uhsasall = np.hstack((uhsasall,uhsas))
+        for mm in range(nmodels):
+            ncn10all[mm] = np.hstack((ncn10all[mm],datam[mm]))
+            ncn100all[mm] = np.hstack((ncn100all[mm],datam2[mm]))
+            
+             
+    #%% calculate statistics
+    
+    if ncn10all[0].shape != cpcall.shape or ncn100all[0].shape != uhsasall.shape:
+        raise ValueError('observation and model dimensions are inconsitent ')
+    
+    # select only valid data in obs and the corresponding data in models (all data are not NAN)
+    idx10 = sum(np.vstack((~np.isnan(ncn10all),~np.isnan(cpcall))))==nmodels+1
+    idx100 = sum(np.vstack((~np.isnan(ncn100all),~np.isnan(uhsasall))))==nmodels+1
+    
+    mean10 = [None]*(nmodels+1)
+    mean100 = [None]*(nmodels+1)
+    std10 = [None]*(nmodels+1)
+    std100 = [None]*(nmodels+1)
+    bias10 = [None]*(nmodels)
+    bias100 = [None]*(nmodels)
+    corr10 = [None]*(nmodels)
+    corr100 = [None]*(nmodels)
+    rmse10 = [None]*(nmodels)
+    rmse100 = [None]*(nmodels)
+    p10_100 = [None]*(nmodels+1)
+    p10_10 = [None]*(nmodels+1)
+    p25_100 = [None]*(nmodels+1)
+    p25_10 = [None]*(nmodels+1)
+    p75_100 = [None]*(nmodels+1)
+    p75_10 = [None]*(nmodels+1)
+    p90_100 = [None]*(nmodels+1)
+    p90_10 = [None]*(nmodels+1)
+        
+    
+    if len(idx10)==0 or sum(idx10)/len(idx10)<0.1:   # two few observation available
+        # for obs
+        mean10[nmodels] = missing_value
+        std10[nmodels] = missing_value
+        p10_10[nmodels] = missing_value
+        p25_10[nmodels] = missing_value
+        p75_10[nmodels] = missing_value
+        p90_10[nmodels] = missing_value
+        # for models
+        for mm in range(nmodels):
+            mean10[mm] = np.nanmean(ncn10all[mm][idx10])
+            std10[mm] = np.nanstd(ncn10all[mm][idx10])
+            p10_10[mm] = np.nanpercentile(ncn10all[mm][idx10],10)
+            p25_10[mm] = np.nanpercentile(ncn10all[mm][idx10],25)
+            p75_10[mm] = np.nanpercentile(ncn10all[mm][idx10],75)
+            p90_10[mm] = np.nanpercentile(ncn10all[mm][idx10],90)
+            bias10[mm] =  missing_value
+            corr10[mm] = [missing_value, missing_value]
+            rmse10[mm] = missing_value
+    else:
+        # for obs
+        mean10[nmodels] = np.nanmean(cpcall[idx10])
+        std10[nmodels] = np.nanstd(cpcall[idx10])
+        p10_10[nmodels] = np.nanpercentile(cpcall[idx10],10)
+        p25_10[nmodels] = np.nanpercentile(cpcall[idx10],25)
+        p75_10[nmodels] = np.nanpercentile(cpcall[idx10],75)
+        p90_10[nmodels] = np.nanpercentile(cpcall[idx10],90)
+        # for models
+        for mm in range(nmodels):
+            mean10[mm] = np.nanmean(ncn10all[mm][idx10])
+            std10[mm] = np.nanstd(ncn10all[mm][idx10])
+            p10_10[mm] = np.nanpercentile(ncn10all[mm][idx10],10)
+            p25_10[mm] = np.nanpercentile(ncn10all[mm][idx10],25)
+            p75_10[mm] = np.nanpercentile(ncn10all[mm][idx10],75)
+            p90_10[mm] = np.nanpercentile(ncn10all[mm][idx10],90)
+            bias10[mm] = mean10[mm] - mean10[nmodels]
+            c10 = scipy.stats.pearsonr(ncn10all[mm][idx10],cpcall[idx10])
+            corr10[mm] = [c10[0],c10[1]]
+            rmse10[mm] = np.sqrt(((ncn10all[mm][idx10]-cpcall[idx10])**2).mean())
+    
+    if len(idx100)==0 or sum(idx100)/len(idx100)<0.1:   # two few observation available
+        # for obs
+        mean100[nmodels] = missing_value
+        std100[nmodels] = missing_value
+        p10_100[nmodels] = missing_value
+        p25_100[nmodels] = missing_value
+        p75_100[nmodels] = missing_value
+        p90_100[nmodels] = missing_value
+        # for models
+        for mm in range(nmodels):
+            mean100[mm] = np.nanmean(ncn100all[mm][idx100])
+            std100[mm] = np.nanstd(ncn100all[mm][idx100])
+            p10_100[mm] = np.nanpercentile(ncn100all[mm][idx100],10)
+            p25_100[mm] = np.nanpercentile(ncn100all[mm][idx100],25)
+            p75_100[mm] = np.nanpercentile(ncn100all[mm][idx100],75)
+            p90_100[mm] = np.nanpercentile(ncn100all[mm][idx100],90)
+            bias100[mm] =  missing_value
+            corr100[mm] = [missing_value, missing_value]
+            rmse100[mm] = missing_value
+    else:
+        # for obs
+        mean100[nmodels] = np.nanmean(uhsasall[idx100])
+        std100[nmodels] = np.nanstd(uhsasall[idx100])
+        p10_100[nmodels] = np.nanpercentile(uhsasall[idx100],10)
+        p25_100[nmodels] = np.nanpercentile(uhsasall[idx100],25)
+        p75_100[nmodels] = np.nanpercentile(uhsasall[idx100],75)
+        p90_100[nmodels] = np.nanpercentile(uhsasall[idx100],90)
+        # for models
+        for mm in range(nmodels):
+            mean100[mm] = np.nanmean(ncn100all[mm][idx100])
+            std100[mm] = np.nanstd(ncn100all[mm][idx100])
+            p10_100[mm] = np.nanpercentile(ncn100all[mm][idx100],10)
+            p25_100[mm] = np.nanpercentile(ncn100all[mm][idx100],25)
+            p75_100[mm] = np.nanpercentile(ncn100all[mm][idx100],75)
+            p90_100[mm] = np.nanpercentile(ncn100all[mm][idx100],90)
+            bias100[mm] = mean100[mm] - mean100[nmodels]
+            c100 = scipy.stats.pearsonr(ncn100all[mm][idx100],uhsasall[idx100])
+            corr100[mm] = [c100[0],c100[1]]
+            rmse100[mm] = np.sqrt(((ncn100all[mm][idx100]-uhsasall[idx100])**2).mean())
+    
+    
+    #%% write out files
+    
+    outfile = figpath_ship_statistics+'statistics_CN10nm_'+campaign+'.txt'
+    print('write statistics to file '+outfile)
+    
+    with open(outfile, 'w') as f:
+        f.write('statistics of Aerosol Number Concentration comparing with CPC(>10nm). sample size '+format(sum(idx10))+'\n')
+        line1 = list(Model_List)
+        line1.insert(0,' --- ')
+        line1.append('OBS')
+        for ii in range(len(line1)):
+            f.write(format(line1[ii],'10s')+', ')
+        # write mean
+        f.write('\n mean,\t')
+        for ii in range(len(mean10)):
+            f.write(format(mean10[ii],'10.2f')+', ')
+        # write std
+        f.write('\n std. dev.,')
+        for ii in range(len(std10)):
+            f.write(format(std10[ii],'10.2f')+', ')
+        # write percentiles
+        f.write('\n 10% percentile: ')
+        for ii in range(len(p10_10)):
+            f.write(format(p10_10[ii],'10.2f')+', ')
+        f.write('\n 25% percentile: ')
+        for ii in range(len(p25_10)):
+            f.write(format(p25_10[ii],'10.2f')+', ')
+        f.write('\n 75% percentile: ')
+        for ii in range(len(p75_10)):
+            f.write(format(p75_10[ii],'10.2f')+', ')
+        f.write('\n 90% percentile: ')
+        for ii in range(len(p90_10)):
+            f.write(format(p90_10[ii],'10.2f')+', ')
+        # write bias
+        f.write('\n bias,\t')
+        for ii in range(len(bias10)):
+            f.write(format(bias10[ii],'10.2f')+', ')
+        # write rmse
+        f.write('\n RMSE,\t')
+        for ii in range(len(rmse10)):
+            f.write(format(rmse10[ii],'10.2f')+', ')
+        # write correlation
+        f.write('\n corrcoef,\t')
+        for ii in range(len(rmse10)):
+            f.write(format(corr10[ii][0],'10.4f')+', ')
+        # write p value of correlation
+        f.write('\n P_corr,\t')
+        for ii in range(len(rmse10)):
+            f.write(format(corr10[ii][1],'10.2f')+', ')
+            
+            
+    outfile = figpath_ship_statistics+'statistics_CN100nm_'+campaign+'.txt'
+    print('write statistics to file '+outfile)
+    
+    with open(outfile, 'w') as f:
+        f.write('statistics of Aerosol Number Concentration comparing with UHSAS100(>100nm). sample size '+format(sum(idx100))+'\n')
+        line1 = list(Model_List)
+        line1.insert(0,' --- ')
+        line1.append('OBS')
+        for ii in range(len(line1)):
+            f.write(format(line1[ii],'10s')+', ')
+        # write mean
+        f.write('\n mean,\t')
+        for ii in range(len(mean100)):
+            f.write(format(mean100[ii],'10.2f')+', ')
+        # write std
+        f.write('\n std. dev.,')
+        for ii in range(len(std100)):
+            f.write(format(std100[ii],'10.2f')+', ')
+        # write percentiles
+        f.write('\n 10% percentile: ')
+        for ii in range(len(p10_100)):
+            f.write(format(p10_100[ii],'10.2f')+', ')
+        f.write('\n 25% percentile: ')
+        for ii in range(len(p25_100)):
+            f.write(format(p25_100[ii],'10.2f')+', ')
+        f.write('\n 75% percentile: ')
+        for ii in range(len(p75_100)):
+            f.write(format(p75_100[ii],'10.2f')+', ')
+        f.write('\n 90% percentile: ')
+        for ii in range(len(p90_100)):
+            f.write(format(p90_100[ii],'10.2f')+', ')
+        # write bias
+        f.write('\n bias,\t')
+        for ii in range(len(bias100)):
+            f.write(format(bias100[ii],'10.2f')+', ')
+        # write rmse
+        f.write('\n RMSE,\t')
+        for ii in range(len(rmse100)):
+            f.write(format(rmse100[ii],'10.2f')+', ')
+        # write correlation
+        f.write('\n corrcoef,\t')
+        for ii in range(len(rmse100)):
+            f.write(format(corr100[ii][0],'10.4f')+', ')
+        # write p value of correlation
+        f.write('\n P_corr,\t')
+        for ii in range(len(rmse100)):
+            f.write(format(corr100[ii][1],'10.2f')+', ')
+
diff --git a/src/esmac_diags/plotting/contour_flight_timeseries_AerosolSize.py b/src/esmac_diags/plotting/contour_flight_timeseries_AerosolSize.py
new file mode 100644
index 0000000..09de4bd
--- /dev/null
+++ b/src/esmac_diags/plotting/contour_flight_timeseries_AerosolSize.py
@@ -0,0 +1,181 @@
+"""
+# plot aircraft track data
+# timeseries of aerosol size distribution
+# compare models and aircraft measurements
+"""
+
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_aircraft import read_RF_NCAR
+from ..subroutines.specific_data_treatment import lwc2cflag, avg_time_2d
+# from time_format_change import yyyymmdd2cday, hhmmss2sec
+from ..subroutines.read_netcdf import read_merged_size,read_extractflight
+
+def run_plot(settings):
+    
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_timeseries = settings['figpath_aircraft_timeseries']
+
+    IOP = settings.get('IOP', None)
+    merged_size_path = settings.get('merged_size_path', None)
+    RFpath = settings.get('RFpath', None)
+
+    #%% other settings
+    if not os.path.exists(figpath_aircraft_timeseries):
+        os.makedirs(figpath_aircraft_timeseries)
+        
+    #%% find files for flight information
+    lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
+    lst.sort()
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    # choose files for specific IOP
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+    elif campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+            
+    alldates = [x.split('_')[-1].split('.')[0] for x in lst]
+        
+    # dN/dlnDp for model
+    dlnDp_m = np.empty((3000))
+    for bb in range(3000):
+        dlnDp_m[bb]=np.log((bb+2)/(bb+1))
+    
+    for date in alldates:
+        
+        #%% read in Models
+        nmodels=len(Model_List)
+        data_m = []
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            (timem,heightm,datam,timeunitm,datamunit,datamlongname)=read_extractflight(filename_m,'NCNall')
+            datam=datam*1e-6    # #/m3 to #/cm3
+            # average in time for quicker plot
+            time2 = np.arange(timem[0],timem[-1],60)
+            data2 = avg_time_2d(timem,datam.T,time2)
+            datam = data2.T
+            # change to dN/dlnDp
+            for tt in range(len(time2)):
+                datam[:,tt]=datam[:,tt]/dlnDp_m
+            data_m.append(datam) 
+            
+        # timem = (np.array(timem)-int(timem[0]))*24
+        timem = time2/3600.
+        
+        
+        #%% read observation        
+        if campaign in ['HISCALE', 'ACEENA']:
+
+            if campaign=='HISCALE':
+                filename = merged_size_path+'merged_bin_fims_pcasp_'+campaign+'_'+date+'.nc'
+            elif campaign=='ACEENA':
+                filename = merged_size_path+'merged_bin_fims_pcasp_opc_'+campaign+'_'+date+'.nc'
+            #% read in flight information
+            (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
+            (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
+            (time,size,height,timeunit,zunit,long_name)=read_merged_size(filename,'height')
+            (time,size,sizeh,timeunit,dataunit,long_name)=read_merged_size(filename,'size_high')
+            (time,size,sizel,timeunit,dataunit,long_name)=read_merged_size(filename,'size_low')
+            (time,size,merge,timeunit,dataunit,long_name)=read_merged_size(filename,'size_distribution_merged')
+            time=np.ma.compressed(time)
+            size=size*1000.
+        
+        elif campaign in ['CSET', 'SOCRATES']:
+            filename = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
+            # cloud flag
+            (time,lwc,timeunit,lwcunit,lwclongname,size,cellunit)=read_RF_NCAR(filename[-1],'PLWCC')
+            # calculate cloud flag based on LWC
+            cflag=lwc2cflag(lwc,lwcunit)
+            if campaign=='CSET':
+                (time,uhsas,timeunit,dataunit,long_name,size,cellunit)=read_RF_NCAR(filename[-1],'CUHSAS_RWOOU')
+            elif campaign=='SOCRATES':
+                # there are two variables: CUHSAS_CVIU and CUHSAS_LWII
+                (time,uhsas,timeunit,dataunit,long_name,size,cellunit)=read_RF_NCAR(filename[-1],'CUHSAS_LWII')
+            merge = uhsas[:,0,:]
+            size=size*1000.
+            sizeh = size
+            sizel = np.hstack((2*size[0]-size[1],  size[0:-1]))
+            
+        # merge=merge.T
+        # time=time/3600.
+        ## average in time for quicker plot
+        time2=np.arange(time[0],time[-1],60)
+        data2 = avg_time_2d(time,merge,time2)
+        merge = data2.T
+        time=time2/3600.
+    
+        # change to dN/dlnDp
+        for bb in range(len(size)):
+            dlnDp=np.log(sizeh[bb]/sizel[bb])
+            merge[bb,:]=merge[bb,:]/dlnDp
+            
+            
+        
+        #%% make plot
+        
+        figname = figpath_aircraft_timeseries+'AerosolSize_'+campaign+'_'+date+'.png'
+        print('plotting figures to '+figname)
+        
+        #fig = plt.figure()
+        fig,ax = plt.subplots(nmodels+1,1,figsize=(8,2*(nmodels+1)))   # figsize in inches
+        plt.tight_layout(h_pad=1.1)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        plt.subplots_adjust(right=0.9,bottom=0.1)
+        
+        leveltick=[0.1,1,10,100,1000,10000]
+        levellist=np.arange(np.log(leveltick[0]),11,.5)
+        
+        merge[merge<0.01]=0.01
+        h1 = ax[0].contourf(time,size,np.log(merge),levellist,cmap=plt.get_cmap('jet'))
+        
+        d_mam=np.arange(1,3001)
+        h2=[]
+        for mm in range(nmodels):
+            datam = data_m[mm]
+            datam[datam<0.01]=0.01
+            h_m = ax[mm+1].contourf(timem,d_mam,np.log(datam),levellist,cmap=plt.get_cmap('jet'))
+            h2.append(h_m)
+    
+        # colorbar
+        cax = plt.axes([0.95, 0.2, 0.02, 0.6])
+        cbar=fig.colorbar(h2[0], cax=cax, ticks=np.log(leveltick))
+        cbar.ax.set_yticklabels(leveltick, fontsize=14)
+        
+        # set axis
+        for ii in range(nmodels+1):
+            ax[ii].set_xlim(timem[0],timem[-1])
+            ax[ii].set_yscale('log')
+            ax[ii].set_ylim(3, 5000)
+            ax[ii].set_yticks([10,100,1000])
+            ax[ii].tick_params(color='k',labelsize=14)
+            if ii==0:
+                ax[ii].text(0.01, 0.94, 'OBS', fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
+            else:
+                ax[ii].text(0.01, 0.94, Model_List[ii-1], fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
+            
+        ax[1].set_ylabel('Diameter (nm)',fontsize=14)
+        ax[0].set_title('Size Distribution (#/dlnDp, cm-3)',fontsize=15)
+        ax[nmodels].set_xlabel('time (hour UTC) in '+date,fontsize=14)
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        plt.close()
diff --git a/src/esmac_diags/plotting/contour_sfc_diurnalcycle_AerosolSize.py b/src/esmac_diags/plotting/contour_sfc_diurnalcycle_AerosolSize.py
new file mode 100644
index 0000000..0782d00
--- /dev/null
+++ b/src/esmac_diags/plotting/contour_sfc_diurnalcycle_AerosolSize.py
@@ -0,0 +1,238 @@
+"""
+# plot surface diurnal cycle of aerosol size distribution
+# compare models and surface measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import yyyymmdd2cday, cday2mmdd
+from ..subroutines.read_surface import read_smpsb_pnnl,read_smps_bin
+from ..subroutines.read_ARMdata import read_uhsas, read_smps_bnl
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.specific_data_treatment import  avg_time_2d
+from ..subroutines.quality_control import qc_mask_qcflag, qc_remove_neg,qc_correction_nanosmps
+
+
+def run_plot(settings):
+    #%% variables from settings
+
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+    figpath_sfc_timeseries = settings['figpath_sfc_timeseries']
+
+    IOP = settings.get('IOP', None)
+    uhsassfcpath = settings.get('uhsassfcpath', None)
+    
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            smps_bnl_path = settings['smps_bnl_path']
+            nanosmps_bnl_path = settings['nanosmps_bnl_path']
+        elif IOP=='IOP2':
+            smps_pnnl_path = settings['smps_pnnl_path']
+    
+    #%% other settings
+    if not os.path.exists(figpath_sfc_timeseries):
+        os.makedirs(figpath_sfc_timeseries)
+        
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date,'noleap')
+    cday2 = yyyymmdd2cday(end_date,'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+        
+    #%% read in obs data
+    if campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.2017062*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201707*')
+        elif IOP=='IOP2':
+            lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201801*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201802*')
+        lst.sort()
+        t_uhsas=np.empty(0)
+        uhsas=np.empty((0,99))
+        for filename in lst:
+            (time,dmin,dmax,data,timeunit,dataunit,long_name) = read_uhsas(filename)
+            timestr=timeunit.split(' ')
+            date=timestr[2]
+            cday=yyyymmdd2cday(date,'noleap')
+            # average in time for quicker plot
+            time2=np.arange(300,86400,600)
+            data2 = avg_time_2d(time,data,time2)
+            t_uhsas=np.hstack((t_uhsas, cday+time2/86400))
+            uhsas=np.vstack((uhsas, data2))
+        size_u = (dmin+dmax)/2
+        uhsas=qc_remove_neg(uhsas)
+        # change to dN/dlogDp
+        dlnDp_u=np.empty(99)
+        for bb in range(len(size_u)):
+            dlnDp_u[bb]=np.log10(dmax[bb]/dmin[bb])
+            uhsas[:,bb]=uhsas[:,bb]/dlnDp_u[bb]
+        
+        time0 = np.array(t_uhsas)
+        size = np.array(size_u)
+        obs = np.array(uhsas.T)
+        
+    elif campaign=='HISCALE':    
+        if IOP=='IOP1':
+            lst = glob.glob(smps_bnl_path+'*.nc')
+            lst.sort()
+            t_smps=np.empty(0)
+            smps=np.empty((0,192))
+            for filename in lst:
+                (time,size,flag,timeunit,dataunit,smps_longname)=read_smps_bnl(filename,'status_flag')
+                (time,size,data,timeunit,smpsunit,smps_longname)=read_smps_bnl(filename,'number_size_distribution')
+                data=qc_mask_qcflag(data,flag)
+                data=qc_remove_neg(data)
+                timestr=timeunit.split(' ')
+                date=timestr[2]
+                cday=yyyymmdd2cday(date,'noleap')
+                t_smps=np.hstack((t_smps, cday+time/86400))
+                smps=np.vstack((smps, data))
+            smps=smps.T
+            # combine with nanoSMPS
+            lst2 = glob.glob(nanosmps_bnl_path+'*.nc')
+            lst2.sort()
+            t_nano=np.empty(0)
+            nanosmps=np.empty((0,192))
+            for filename2 in lst2:
+                (timen,sizen,flagn,timenunit,datanunit,long_name)=read_smps_bnl(filename2,'status_flag')
+                (timen,sizen,datan,timenunit,nanounit,nanoname)=read_smps_bnl(filename2,'number_size_distribution')
+                datan=qc_mask_qcflag(datan,flagn)
+                datan=qc_remove_neg(datan)
+                timestr=timenunit.split(' ')
+                date=timestr[2]
+                cday=yyyymmdd2cday(date,'noleap')
+                t_nano=np.hstack((t_nano, cday+timen/86400))
+                nanosmps=np.vstack((nanosmps, datan))
+            # nanosmps is overcounting, adjust nanosmps value for smooth transition to SMPS
+            nanosmps=qc_correction_nanosmps(nanosmps.T)
+            for tt in range(smps.shape[1]):
+                if any(t_nano==t_smps[tt]):
+                    smps[0:80,tt]=nanosmps[0:80,t_nano==t_smps[tt]].reshape(80)
+            
+        elif IOP=='IOP2':
+            data=read_smpsb_pnnl(smps_pnnl_path+'HiScaleSMPSb_SGP_20160827_R1.ict')
+            size=read_smps_bin(smps_pnnl_path+'NSD_column_size_chart.txt')
+            time=data[0,:]
+            smps=data[1:-1,:]
+            flag=data[-1,:]
+            cday=yyyymmdd2cday('2016-08-27','noleap')
+            t_smps=cday+time/86400
+            smps=qc_mask_qcflag(smps.T,flag).T
+            
+        time0 = np.array(t_smps)
+        size = np.array(size)
+        obs = np.array(smps)  
+        
+        # SMPS is already divided by log10
+        
+    else:
+        raise ValueError('please check campaign name: '+campaign)
+    
+    # only choose the time period between start_date and end_date
+    obs=obs[:,np.logical_and(time0>=cday1, time0<cday2+1)]
+    time0=time0[np.logical_and(time0>=cday1, time0<cday2+1)]
+    
+    
+    #%% read in models
+    model = []
+    nmodels = len(Model_List)
+    for mm in range(nmodels):
+        tmp_data=np.empty((3000,0))
+        timem=np.empty(0)
+        for cday in range(cday1,cday2+1):
+            mmdd=cday2mmdd(cday)
+            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
+            
+            filename_input = E3SM_sfc_path+'SFC_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            (time,ncn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCNall')
+            
+            timem = np.hstack((timem,time))
+            tmp_data = np.hstack((tmp_data,ncn*1e-6))
+        
+        # change to dN/dlog10Dp
+        for bb in range(3000):
+            dlnDp=np.log10((bb+2)/(bb+1))
+            tmp_data[bb,:]=tmp_data[bb,:]/dlnDp
+        
+        model.append(tmp_data)
+    
+    sizem = np.arange(1,3001)
+        
+    
+    #%% calculate diurnal cycle
+    days = np.arange(cday1, cday2+1)
+    
+    time_dc = np.arange(30,1440.,60)
+    obs_dc = np.full((len(size),len(time_dc),len(days)),np.nan)
+    n_valid = list()
+    for dd in range(len(days)):
+        nn=0
+        for tt in range(len(time_dc)):
+            time_tmp = days[dd]+time_dc[tt]/1440.
+            idx = np.abs(time0-time_tmp).argmin()
+            if (time0[idx]-time_tmp)*1440 <= 30:    
+                obs_dc[:,tt,dd] = obs[:,idx]
+    obs_dc = np.nanmean(obs_dc,2)
+    
+    # for E3SM data
+    model_dc = []
+    for mm in range(nmodels):
+        tmp_model = np.full((3000,24,len(days)),np.nan)
+        for dd in range(len(days)):
+            idx=np.logical_and(timem>=days[dd], timem<days[dd]+1)
+            tmp_model[:,:,dd] = model[mm][:,idx]
+        model_dc.append(np.nanmean(tmp_model,2))
+    
+    #%% make plot
+    
+    figname = figpath_sfc_timeseries+'diurnalcycle_AerosolSize_'+campaign+'_'+IOP+'.png'
+    print('plotting figures to '+figname)
+    
+    #fig = plt.figure()
+    fig,ax = plt.subplots(nmodels+1,1,figsize=(6,2*(nmodels+1)))   # figsize in inches
+    plt.tight_layout(h_pad=1.1)   #pad=0.4, w_pad=0.5, h_pad=1.0
+    plt.subplots_adjust(right=0.9,bottom=0.1)
+    
+    leveltick=[0.1,1,10,100,1000,10000,100000]
+    levellist=np.arange(np.log(leveltick[0]),12,.5)
+    
+    obs_dc[obs_dc<0.01]=0.01
+    h1 = ax[0].contourf(time_dc/60,size,np.log(obs_dc),levellist,cmap=plt.get_cmap('jet'))
+    
+    h2=[]
+    for mm in range(nmodels):
+        datam = model_dc[mm]
+        datam[datam<0.01]=0.01
+        h_m = ax[mm+1].contourf(np.arange(0,24),sizem,np.log(datam),levellist,cmap=plt.get_cmap('jet'))
+        h2.append(h_m)
+    
+    # colorbar
+    cax = plt.axes([0.95, 0.2, 0.02, 0.6])
+    cbar=fig.colorbar(h2[0], cax=cax, ticks=np.log(leveltick))
+    cbar.ax.set_yticklabels(leveltick, fontsize=14)
+    
+    # set axis
+    for ii in range(nmodels+1):
+        ax[ii].set_xlim(0,24)
+        ax[ii].set_xticks(np.arange(0,24,3))
+        ax[ii].set_yscale('log')
+        ax[ii].set_ylim(1, 5000)
+        ax[ii].set_yticks([1,10,100,1000])
+        ax[ii].tick_params(color='k',labelsize=14)
+        if ii==0:
+            ax[ii].text(0.01, 0.94, 'OBS', fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
+        else:
+            ax[ii].text(0.01, 0.94, Model_List[ii-1], fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
+        
+    ax[1].set_ylabel('Diameter (nm)',fontsize=14)
+    ax[0].set_title('Size Distribution (#/dlogDp, cm-3) '+campaign+' '+IOP,fontsize=15)
+    ax[nmodels].set_xlabel('Hour UTC',fontsize=14)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
diff --git a/src/esmac_diags/plotting/contour_sfc_timeseries_AerosolSize.py b/src/esmac_diags/plotting/contour_sfc_timeseries_AerosolSize.py
new file mode 100644
index 0000000..e717f7e
--- /dev/null
+++ b/src/esmac_diags/plotting/contour_sfc_timeseries_AerosolSize.py
@@ -0,0 +1,222 @@
+"""
+# plot surface timeseries of aerosol size distribution
+# compare models and surface measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import yyyymmdd2cday, cday2mmdd
+from ..subroutines.read_surface import read_smpsb_pnnl,read_smps_bin
+from ..subroutines.read_ARMdata import read_uhsas, read_smps_bnl
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.specific_data_treatment import  avg_time_2d
+from ..subroutines.quality_control import qc_mask_qcflag, qc_remove_neg,qc_correction_nanosmps
+
+def run_plot(settings):
+    #%% variables from settings
+
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+    figpath_sfc_timeseries = settings['figpath_sfc_timeseries']
+
+    IOP = settings.get('IOP', None)
+    uhsassfcpath = settings.get('uhsassfcpath', None)
+    
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            smps_bnl_path = settings['smps_bnl_path']
+            nanosmps_bnl_path = settings['nanosmps_bnl_path']
+        elif IOP=='IOP2':
+            smps_pnnl_path = settings['smps_pnnl_path']
+
+    #%% other settings
+    if not os.path.exists(figpath_sfc_timeseries):
+        os.makedirs(figpath_sfc_timeseries)
+        
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date,'noleap')
+    cday2 = yyyymmdd2cday(end_date,'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+    
+    # set time resolution for plotting. longer time needs coarser resolution to prevent memory error
+    dt_res = 3600  # in sec
+    
+    
+    #%% read in obs data
+    if campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.2017062*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201707*')
+        elif IOP=='IOP2':
+            lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201801*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201802*')
+        lst.sort()
+        t_uhsas=np.empty(0)
+        uhsas=np.empty((0,99))
+        for filename in lst:
+            (time,dmin,dmax,data,timeunit,dataunit,long_name) = read_uhsas(filename)
+            timestr=timeunit.split(' ')
+            date=timestr[2]
+            cday=yyyymmdd2cday(date,'noleap')
+            # average in time for quicker plot
+            time2=np.arange(0,86400,dt_res)
+            data2 = avg_time_2d(time,data,time2)
+            t_uhsas=np.hstack((t_uhsas, cday+time2/86400))
+            uhsas=np.vstack((uhsas, data2))
+        size_u = (dmin+dmax)/2
+        uhsas=qc_remove_neg(uhsas)
+        # change to dN/dlogDp
+        dlnDp_u=np.empty(99)
+        for bb in range(len(size_u)):
+            dlnDp_u[bb]=np.log10(dmax[bb]/dmin[bb])
+            uhsas[:,bb]=uhsas[:,bb]/dlnDp_u[bb]
+        
+        timeo = np.array(t_uhsas)
+        size = np.array(size_u)
+        obs = np.array(uhsas.T)
+        
+    elif campaign=='HISCALE':    
+        if IOP=='IOP1':
+            lst = glob.glob(smps_bnl_path+'*.nc')
+            lst.sort()
+            t_smps=np.empty(0)
+            smps=np.empty((0,192))
+            for filename in lst:
+                (time,size,flag,timeunit,dataunit,smps_longname)=read_smps_bnl(filename,'status_flag')
+                (time,size,data,timeunit,smpsunit,smps_longname)=read_smps_bnl(filename,'number_size_distribution')
+                data=qc_mask_qcflag(data,flag)
+                data=qc_remove_neg(data)
+                timestr=timeunit.split(' ')
+                date=timestr[2]
+                cday=yyyymmdd2cday(date,'noleap')
+                # average in time for quicker plot
+                time2=np.arange(0,86400,dt_res)
+                data2 = avg_time_2d(time,data,time2)
+                t_smps=np.hstack((t_smps, cday+time2/86400))
+                smps=np.vstack((smps, data2))
+            smps=smps.T
+            # combine with nanoSMPS
+            lst2 = glob.glob(nanosmps_bnl_path+'*.nc')
+            lst2.sort()
+            t_nano=np.empty(0)
+            nanosmps=np.empty((0,192))
+            for filename2 in lst2:
+                (timen,sizen,flagn,timenunit,datanunit,long_name)=read_smps_bnl(filename2,'status_flag')
+                (timen,sizen,datan,timenunit,nanounit,nanoname)=read_smps_bnl(filename2,'number_size_distribution')
+                datan=qc_mask_qcflag(datan,flagn)
+                datan=qc_remove_neg(datan)
+                timestr=timenunit.split(' ')
+                date=timestr[2]
+                cday=yyyymmdd2cday(date,'noleap')
+                # average in time for quicker plot
+                time2=np.arange(0,86400,dt_res)
+                data2 = avg_time_2d(timen,datan,time2)
+                t_nano=np.hstack((t_nano, cday+time2/86400))
+                nanosmps=np.vstack((nanosmps, data2))
+            # nanosmps is overcounting, adjust nanosmps value for smooth transition to SMPS
+            nanosmps=qc_correction_nanosmps(nanosmps.T)
+            for tt in range(smps.shape[1]):
+                if any(t_nano==t_smps[tt]):
+                    smps[0:80,tt]=nanosmps[0:80,t_nano==t_smps[tt]].reshape(80)
+            
+        elif IOP=='IOP2':
+            data=read_smpsb_pnnl(smps_pnnl_path+'HiScaleSMPSb_SGP_20160827_R1.ict')
+            size=read_smps_bin(smps_pnnl_path+'NSD_column_size_chart.txt')
+            time=data[0,:]
+            smps=data[1:-1,:]
+            flag=data[-1,:]
+            smps=qc_mask_qcflag(smps.T,flag).T
+            cday=yyyymmdd2cday('2016-08-27','noleap')
+            # average in time for quicker plot
+            time2 = np.arange(time[0],time[-1],dt_res)
+            smps = avg_time_2d(time,smps.T,time2)
+            smps = smps.T
+            t_smps=cday+time2/86400
+            
+        timeo = np.array(t_smps)
+        size = np.array(size)
+        obs = np.array(smps)  
+        
+        # SMPS is already divided by log10
+        
+    else:
+        raise ValueError('please check campaign name: '+campaign)
+        
+    #%% read in models
+    model = []
+    nmodels = len(Model_List)
+    for mm in range(nmodels):
+        tmp_data=np.empty((3000,0))
+        timem=np.empty(0)
+        for cday in range(cday1,cday2+1):
+            mmdd=cday2mmdd(cday)
+            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
+            
+            filename_input = E3SM_sfc_path+'SFC_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            (time,ncn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCNall')
+            
+            timem = np.hstack((timem,time))
+            tmp_data = np.hstack((tmp_data,ncn*1e-6))
+        
+        # change to dN/dlog10Dp
+        for bb in range(3000):
+            dlnDp=np.log10((bb+2)/(bb+1))
+            tmp_data[bb,:]=tmp_data[bb,:]/dlnDp
+        
+        model.append(tmp_data)
+    
+    sizem = np.arange(1,3001)
+    
+    #%% make plot
+    
+    figname = figpath_sfc_timeseries+'timeseries_AerosolSize_'+campaign+'_'+IOP+'.png'
+    print('plotting figures to '+figname)
+    
+    #fig = plt.figure()
+    fig,ax = plt.subplots(nmodels+1,1,figsize=(8,2*(nmodels+1)))   # figsize in inches
+    plt.tight_layout(h_pad=1.1)   #pad=0.4, w_pad=0.5, h_pad=1.0
+    plt.subplots_adjust(right=0.9,bottom=0.1)
+    
+    leveltick=[0.1,1,10,100,1000,10000,100000]
+    levellist=np.arange(np.log(leveltick[0]),12,.5)
+    
+    obs[obs<0.01]=0.01
+    h1 = ax[0].contourf(timeo,size,np.log(obs),levellist,cmap=plt.get_cmap('jet'))
+    
+    # d_mam=np.arange(1,3001)
+    h2=[]
+    for mm in range(nmodels):
+        datam = model[mm]
+        datam[datam<0.01]=0.01
+        h_m = ax[mm+1].contourf(timem,sizem,np.log(datam),levellist,cmap=plt.get_cmap('jet'))
+        h2.append(h_m)
+    
+    # colorbar
+    cax = plt.axes([0.95, 0.2, 0.02, 0.6])
+    cbar=fig.colorbar(h2[0], cax=cax, ticks=np.log(leveltick))
+    cbar.ax.set_yticklabels(leveltick, fontsize=14)
+    
+    # set axis
+    for ii in range(nmodels+1):
+        ax[ii].set_xlim(cday1,cday2)
+        ax[ii].set_yscale('log')
+        ax[ii].set_ylim(1, 5000)
+        ax[ii].set_yticks([1,10,100,1000])
+        ax[ii].tick_params(color='k',labelsize=14)
+        if ii==0:
+            ax[ii].text(0.01, 0.94, 'OBS', fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
+        else:
+            ax[ii].text(0.01, 0.94, Model_List[ii-1], fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
+        ax[ii].set_ylabel('Diameter (nm)',fontsize=14)
+        
+    ax[0].set_title('Size Distribution (#/dlogDp, cm$^{-3}$) '+campaign+' '+IOP,fontsize=15)
+    ax[nmodels].set_xlabel('Calendar Day',fontsize=14)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
+
diff --git a/src/esmac_diags/plotting/contour_ship_timeseries_AerosolSize.py b/src/esmac_diags/plotting/contour_ship_timeseries_AerosolSize.py
new file mode 100644
index 0000000..c0c6d01
--- /dev/null
+++ b/src/esmac_diags/plotting/contour_ship_timeseries_AerosolSize.py
@@ -0,0 +1,174 @@
+# plot timeseries of surface aerosol size distribution along each ship leg
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_ARMdata import read_uhsas
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.time_format_change import cday2mmdd
+from ..subroutines.specific_data_treatment import  avg_time_2d
+from ..subroutines.quality_control import qc_remove_neg
+
+def run_plot(settings):
+    #%% variables from settings
+
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    shipuhsaspath = settings['shipuhsaspath']
+    E3SM_ship_path = settings['E3SM_ship_path']
+    figpath_ship_timeseries = settings['figpath_ship_timeseries']
+
+    #%% other settings
+    
+    if not os.path.exists(figpath_ship_timeseries):
+        os.makedirs(figpath_ship_timeseries)
+    
+    lst = glob.glob(E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[0]+'_shipleg*.nc')
+    lst.sort()
+    
+    for ll in range(len(lst)):
+        if campaign=='MAGIC':
+            legnum=lst[ll][-5:-3]
+        elif campaign=='MARCUS':
+            legnum=lst[ll][-4]
+        
+    
+        #%% read in model
+        nmodels=len(Model_List)
+        datam = list()
+        for mm in range(nmodels):
+            filenamem = E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
+        
+            (timem0,data,timeunitm,datamunit,datamlongname)=read_E3SM(filenamem,'NCNall')
+        
+            # if time expands two years, add 365 days to the second year
+            if timem0[0]>timem0[-1]:
+                timem0[timem0<=timem0[-1]]=timem0[timem0<=timem0[-1]]+365
+            
+            # average in time for quicker plot
+            timem=np.arange(timem0[0]-0.1,timem0[-1]+0.1,1/24.)
+            data2 = avg_time_2d(timem0,data.T,timem)
+            data2 = data2.T
+            
+            # change to dN/dlnDp
+            for bb in range(3000):
+                dlnDp=np.log((bb+2)/(bb+1))
+                data2[bb,:]=data2[bb,:]/dlnDp
+            datam.append(data2*1e-6)    # change unit from 1/m3 to 1/cm3
+            
+        year0 = str(int(timeunitm.split()[2][0:4])+1)
+        
+        #%% read in observations
+        # find the days related to the ship leg
+        day = [int(a) for a in timem]
+        day = list(set(day))
+        day.sort()
+        
+        nbins = 99 # for UHSAS at MAGIC
+        t_uh=np.empty(0)
+        uhsasall=np.empty((0,nbins))
+        for dd in day:
+            if campaign=='MAGIC':
+                if int(legnum)<=9:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
+                    else:
+                        filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*.cdf')
+                else:
+                    filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
+            elif campaign=='MARCUS':
+                if int(legnum)<=2:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
+                    else:
+                        filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
+                else:
+                    filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
+            
+            if len(filenameo)==0:
+                continue  # some days may be missing
+            if len(filenameo)>1:
+                raise ValueError('find too many files: ' + filenameo)
+            
+            
+            (time,dmin,dmax,uhsas,timeunit,uhunit,uhlongname)=read_uhsas(filenameo[0])
+            
+            uhsas=np.ma.filled(uhsas)
+            uhsas=qc_remove_neg(uhsas)
+            
+            # average in time for quicker plot
+            time2=np.arange(1800,86400,3600)
+            data2 = avg_time_2d(time,uhsas,time2)
+            uhsasall=np.vstack((uhsasall, data2))
+            t_uh = np.hstack((t_uh,time2/86400+dd))
+            
+        # if no obs available, fill one data with NaN
+        if len(t_uh)==0:
+            t_uh=[timem[0],timem[1]]
+            uhsasall=np.full((2,nbins),np.nan)
+            
+        # if time expands two years, add 365 days to the second year
+        if t_uh[0]>t_uh[-1]:
+            t_uh[t_uh<=t_uh[-1]]=t_uh[t_uh<=t_uh[-1]]+365
+            
+        size_u = (dmin+dmax)/2
+        dsize_u = dmax-dmin
+        
+        uhsasall=qc_remove_neg(uhsasall)
+        
+        # change to dN/dlnDp
+        dlnDp_u=np.empty(nbins)
+        for bb in range(len(size_u)):
+            dlnDp_u[bb]=np.log(dmax[bb]/dmin[bb])
+            uhsasall[:,bb]=uhsasall[:,bb]/dlnDp_u[bb]
+        
+        #%% make plot
+            
+        figname = figpath_ship_timeseries+'timeseries_AerosolSize_'+campaign+'_ship'+legnum+'.png'
+        print('plotting figures to '+figname)
+        
+        #fig = plt.figure()
+        fig,ax = plt.subplots(nmodels+1,1,figsize=(8,2*(nmodels+1)))   # figsize in inches
+        plt.tight_layout(h_pad=1.1)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        plt.subplots_adjust(right=0.9,bottom=0.1)
+        
+        leveltick=[0.1,1,10,100,1000,10000,100000]
+        levellist=np.arange(np.log(leveltick[0]),12,.5)
+        
+        uhsasall[uhsasall<0.01]=0.01
+        h1 = ax[0].contourf(t_uh,size_u,np.log(uhsasall.T),levellist,cmap=plt.get_cmap('jet'))
+        
+        size_m=np.arange(1,3001)
+        h2=[]
+        for mm in range(nmodels):
+            data = datam[mm]
+            data[data<0.01]=0.01
+            h_m = ax[mm+1].contourf(timem,size_m,np.log(data),levellist,cmap=plt.get_cmap('jet'))
+            h2.append(h_m)
+    
+        # colorbar
+        cax = plt.axes([0.92, 0.2, 0.02, 0.6])
+        cbar=fig.colorbar(h2[0], cax=cax, ticks=np.log(leveltick))
+        cbar.ax.set_yticklabels(leveltick, fontsize=14)
+        
+        # set axis
+        for ii in range(nmodels+1):
+            ax[ii].set_xlim(timem[0],timem[-1])
+            ax[ii].set_yscale('log')
+            ax[ii].set_ylim(5, 3000)
+            ax[ii].set_yticks([10,100,1000])
+            ax[ii].tick_params(color='k',labelsize=14)
+            if ii==0:
+                ax[ii].text(0.01, 0.94, 'OBS', fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
+            else:
+                ax[ii].text(0.01, 0.94, Model_List[ii-1], fontsize=14,transform=ax[ii].transAxes, verticalalignment='top')
+            
+        ax[1].set_ylabel('Diameter (nm)',fontsize=14)
+        ax[0].set_title('Size Distribution (#/dlnDp, cm-3)',fontsize=15)
+        ax[nmodels].set_xlabel('Calendar Day in '+year0,fontsize=14)
+        
+        fig.text(.08, .97,'ship leg '+legnum, fontsize=12)
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_flight_pdf_AerosolSize.py b/src/esmac_diags/plotting/plot_flight_pdf_AerosolSize.py
new file mode 100644
index 0000000..4b7af36
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_pdf_AerosolSize.py
@@ -0,0 +1,239 @@
+"""
+# plot mean aerosol size ditribution for aircraft track data
+# average for each IOP
+# compare models and aircraft measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_aircraft import read_RF_NCAR
+from ..subroutines.specific_data_treatment import lwc2cflag
+# from time_format_change import yyyymmdd2cday, hhmmss2sec
+from ..subroutines.read_netcdf import read_merged_size,read_extractflight
+
+from ..subroutines.specific_data_treatment import  avg_time_2d
+from ..subroutines.quality_control import qc_mask_cloudflag, qc_uhsas_RF_NCAR,qc_remove_neg,qc_mask_takeoff_landing
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_statistics = settings['figpath_aircraft_statistics']
+    
+    IOP = settings.get('IOP', None)
+    merged_size_path = settings.get('merged_size_path', None)
+
+    #%% other settings
+
+    if not os.path.exists(figpath_aircraft_statistics):
+        os.makedirs(figpath_aircraft_statistics)
+        
+        
+    #%% find files for flight information
+
+    lst = sorted(glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc'))
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    # choose files for specific IOP
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+    elif campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+            
+    alldates = [x.split('_')[-1].split('.')[0] for x in lst]
+        
+    print('reading '+format(len(alldates))+' files to calculate mean aerosol pdf: ')
+
+    nmodels=len(Model_List)
+    pdfall_m = [np.empty((3000,0)) for mm in range(nmodels)]
+    size_m = np.zeros(3000)
+    pdf_model = [size_m for mm in range(nmodels)]
+    if 'pdf_obs' in locals():
+        del pdf_obs
+
+    # number of valid timesteps
+    n_o = 0
+    n_m = [0 for mm in range(nmodels)]
+        
+
+    # dN/dlnDp for model
+    dlnDp_m = np.empty((3000))
+    for bb in range(3000):
+        dlnDp_m[bb]=np.log((bb+2)/(bb+1))
+
+    for date in alldates[:]:
+        print(date)
+        
+        #%% read in Models
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            (timem,heightm,datam,timeunitm,datamunit,datamlongname)=read_extractflight(filename_m,'NCNall')
+            datam=datam*1e-6    # #/m3 to #/cm3
+            
+            # average in time for quicker plot
+            time2=np.arange(300,86400,600)
+            data2 = avg_time_2d(timem,datam.T,time2)
+            datam=data2.T
+            timem=time2
+            
+            for tt in range(len(timem)):
+                datam[:,tt]=datam[:,tt]/dlnDp_m
+                
+            pdfall_m[mm] = np.column_stack((pdfall_m[mm],datam))
+            for tt in range(len(timem)):
+                if ~np.isnan(datam[0,tt]):
+                    pdf_model[mm] = pdf_model[mm]+datam[:,tt]
+                    n_m[mm]=n_m[mm]+1
+            
+        #%% read observation        
+        if campaign in ['HISCALE', 'ACEENA']:
+            if date[-1]=='a':
+                flightidx=1
+            else:
+                flightidx=2
+                
+            if campaign=='HISCALE':
+                filename = merged_size_path+'merged_bin_fims_pcasp_'+campaign+'_'+date+'.nc'
+            elif campaign=='ACEENA':
+                filename = merged_size_path+'merged_bin_fims_pcasp_opc_'+campaign+'_'+date+'.nc'
+        
+            (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
+            (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
+            (time,size,legnum,timeunit,zunit,long_name)=read_merged_size(filename,'leg_number')
+            (time,size,sizeh,timeunit,dataunit,long_name)=read_merged_size(filename,'size_high')
+            (time,size,sizel,timeunit,dataunit,long_name)=read_merged_size(filename,'size_low')
+            (time,size,merge,timeunit,dataunit,long_name)=read_merged_size(filename,'size_distribution_merged')
+            time=np.ma.compressed(time)
+            size=size*1000.
+            merge = qc_mask_cloudflag(merge,cflag)
+            
+            # average in time for quicker plot
+            time2=np.arange(300,86400,600)
+            data2 = avg_time_2d(time,merge,time2)
+            merge = data2.T
+            time=time2/3600.
+            
+
+        elif campaign in ['CSET', 'SOCRATES']:
+            filename = glob.glob(settings['RFpath']+'RF*'+date+'*.PNI.nc')
+            # cloud flag
+            (time,lwc,timeunit,lwcunit,lwclongname,size,cellunit)=read_RF_NCAR(filename[-1],'PLWCC')
+            if campaign=='CSET':
+                (time,uhsas,timeunit,dataunit,long_name,size,cellunit)=read_RF_NCAR(filename[-1],'CUHSAS_RWOOU')
+            elif campaign=='SOCRATES':
+                # there are two variables: CUHSAS_CVIU and CUHSAS_LWII
+                (time,uhsas,timeunit,dataunit,long_name,size,cellunit)=read_RF_NCAR(filename[-1],'CUHSAS_LWII')
+            uhsas=uhsas[:,0,:]
+            # calculate cloud flag based on LWC
+            cflag=lwc2cflag(lwc,lwcunit)
+            uhsas = qc_mask_cloudflag(uhsas,cflag)
+            uhsas= qc_uhsas_RF_NCAR(uhsas)
+            
+            # average in time for quicker plot
+            time2=np.arange(300,86400,600)
+            data2 = avg_time_2d(time,uhsas,time2)
+            merge = data2.T
+            time0 = np.array(time)
+            time=time2/3600.
+            
+            size=size*1000.
+            sizeh = size
+            sizel = np.hstack((2*size[0]-size[1],  size[0:-1]))
+        
+        # change to dN/dlnDp
+        for bb in range(len(size)):
+            dlnDp=np.log(sizeh[bb]/sizel[bb])
+            merge[bb,:]=merge[bb,:]/dlnDp
+        
+        merge=qc_remove_neg(merge)
+        
+        # exclude 30min after takeoff and before landing
+        merge = qc_mask_takeoff_landing(time2,merge)
+        
+        # fig,ax=plt.subplots()
+        # ax.plot(merge[9,:])
+        # ax.set_title(date)
+        # error
+        
+        if ('pdf_obs' in locals()) == False:
+            pdf_obs = np.zeros(len(size)) 
+            pdfall_o = np.empty((len(size),0))
+        idx_valid = ~np.isnan(np.mean(merge,0))
+        pdf_obs = pdf_obs + np.sum(merge[:,idx_valid],1)
+        pdfall_o = np.hstack((pdfall_o,np.array(merge[:,idx_valid])))
+        n_o = n_o + np.sum(idx_valid)
+        
+
+    #%% calculate mean pdf
+
+    pdf_obs[pdf_obs<1e-3]=np.nan
+    pdf_obs=pdf_obs/n_o
+    for mm in range(nmodels):
+        pdf_model[mm]=pdf_model[mm]/n_m[mm]
+
+    #%%
+    pdfall_o[pdfall_o<0]=np.nan
+    pct1_o = [np.nanpercentile(pdfall_o[i,:],10) for i in range(len(size))]
+    pct2_o = [np.nanpercentile(pdfall_o[i,:],90) for i in range(len(size))]
+    pct1_m = [[] for mm in range(nmodels)]
+    pct2_m = [[] for mm in range(nmodels)]
+    for mm in range(nmodels):
+        pct1_m[mm] = [np.nanpercentile(pdfall_m[mm][i,:],10) for i in range(3000)]
+        pct2_m[mm] = [np.nanpercentile(pdfall_m[mm][i,:],90) for i in range(3000)]
+
+    #%% make plot
+
+    if campaign in ['HISCALE', 'ACEENA']:
+        figname = figpath_aircraft_statistics+'pdf_AerosolSize_'+campaign+'_'+IOP+'.png'
+    else:
+        figname = figpath_aircraft_statistics+'pdf_AerosolSize_'+campaign+'.png'
+
+    print('plotting figures to '+figname)
+
+    #fig = plt.figure()
+    fig,ax = plt.subplots(figsize=(4,2.5))   # figsize in inches
+
+    ax.plot(size,pdf_obs,color='k',label='Obs')
+    for mm in range(nmodels):
+        ax.plot(np.arange(1,3001),pdf_model[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+
+    ax.fill_between(size,pct1_o,pct2_o, alpha=0.5, facecolor='gray')
+    for mm in range(nmodels):
+        ax.fill_between(np.arange(1,3001),pct1_m[mm],pct2_m[mm], alpha=0.2, facecolor=color_model[mm])
+
+    ax.legend(loc='upper right', shadow=False, fontsize='medium')
+    ax.tick_params(color='k',labelsize=12)
+    ax.set_xscale('log')
+    ax.set_yscale('log')
+    ax.set_ylim(0.01,1e4)
+    ax.set_xlim(0.67,4500)
+    ax.set_xlabel('Diameter (nm)',fontsize=13)
+    ax.set_ylabel('#/dlnDp (cm$^{-3}$)',fontsize=13)
+
+    if campaign in ['HISCALE', 'ACEENA']:
+        ax.set_title(campaign+' '+IOP,fontsize=14)
+    else:
+        ax.set_title(campaign,fontsize=14)
+
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
+
diff --git a/src/esmac_diags/plotting/plot_flight_pdf_percentile_SeparateCloud_aceena.py b/src/esmac_diags/plotting/plot_flight_pdf_percentile_SeparateCloud_aceena.py
new file mode 100644
index 0000000..0bad3a4
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_pdf_percentile_SeparateCloud_aceena.py
@@ -0,0 +1,421 @@
+"""
+# plot_flight_pdf_percentile_SeparateCloud_aceena.py
+# plot pdf and percentiles in several aerosol size bins for aircraft data
+# separated by observed PBLH 
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+# from time_format_change import  hhmmss2sec,yyyymmdd2cday
+from ..subroutines.read_aircraft import read_cpc
+from ..subroutines.read_netcdf import read_merged_size,read_extractflight
+from ..subroutines.quality_control import qc_remove_neg
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    cpcpath = settings['cpcpath']
+    merged_size_path = settings['merged_size_path']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_statistics = settings['figpath_aircraft_statistics']
+    
+    IOP = settings.get('IOP', None)
+
+    #%% other settings
+    if not os.path.exists(figpath_aircraft_statistics):
+        os.makedirs(figpath_aircraft_statistics)
+       
+    # set final bin sizes
+    binl = np.array([3, 15, 70, 300, 1000])
+    binh = np.array([10, 70, 300, 1000, 3000])
+    binm = (binl+binh)/2
+    
+    d_mam=np.arange(1,3001)
+    blen = len(binm)
+    
+    # numbers of bins in merged size data
+    b2len=67  
+    
+    #%% find files for flight information
+    
+    lst = glob.glob(merged_size_path+'merged_bin_*'+campaign+'*.nc')
+    lst.sort()
+    
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    
+    # choose files for specific IOP
+    if campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+campaign+'_')
+            lst = glob.glob(a[0]+'*'+IOP+'*')
+            lst.sort()
+    else:
+        raise ValueError('this code is only for ACEENA, check the campaign settings')
+    
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    
+    #%% read all data
+    
+    # pdf average for legs
+    pdf_sfc_obs=np.zeros([b2len,0])
+    pdf_near_obs=np.zeros([b2len,0])
+    pdf_above_obs=np.zeros([b2len,0])
+    
+    cpcdiff_sfc=np.zeros([0])
+    cpcdiff_near=np.zeros([0])
+    cpcdiff_above=np.zeros([0])
+    
+    nmodels=len(Model_List)
+    pdf_sfc_model=[]
+    pdf_near_model=[]
+    pdf_above_model=[]
+    for mm in range(nmodels):
+        pdf_sfc_model.append(np.zeros([3000,0]))
+        pdf_near_model.append(np.zeros([3000,0]))
+        pdf_above_model.append(np.zeros([3000,0]))
+    
+    # pdf for the final bin sizes
+    p2_sfc_obs = []
+    p2_near_obs = []
+    p2_above_obs = []
+    p2_sfc_model=[]
+    p2_near_model=[]
+    p2_above_model=[]
+    for mm in range(nmodels):
+        p2_sfc_model.append([])
+        p2_near_model.append([])
+        p2_above_model.append([])
+        
+    print('reading '+format(len(lst))+' files to calculate the statistics: ')
+    
+    for filename in lst:
+        
+        # get date info:        
+        date=filename[-12:-3]
+        if date[-1]=='a':
+            flightidx=1
+        else:
+            flightidx=2
+        print(date)
+        
+        #%% read aerosol size distribution
+        (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
+        (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
+        (time,size,legnum,timeunit,cunit,long_name)=read_merged_size(filename,'leg_number')
+        (time,size,height,timeunit,zunit,long_name)=read_merged_size(filename,'height')
+        (time,size,sizeh,timeunit,dataunit,long_name)=read_merged_size(filename,'size_high')
+        (time,size,sizel,timeunit,dataunit,long_name)=read_merged_size(filename,'size_low')
+        (time,size,merge,timeunit,dataunit,long_name)=read_merged_size(filename,'size_distribution_merged')
+        time=np.ma.compressed(time)
+        time=time/3600.
+        size=np.ma.compressed(size)*1000  # um to nm
+        sizel=sizel*1000
+        sizeh=sizeh*1000
+        merge=qc_remove_neg(merge)
+        
+        
+    
+        #%% read in CPC measurements
+        
+        if campaign=='ACEENA':
+            filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_ACEENA001s.ict')    
+        else:
+            raise ValueError('this code is only for ACEENA, check the campaign settings')
+        filename_c.sort()
+        # read in data
+        if len(filename_c)==1 or len(filename_c)==2: # some days have two flights
+            (cpc,cpclist)=read_cpc(filename_c[flightidx-1])
+            if np.logical_and(campaign=='ACEENA', date=='20180216a'):
+                cpc=np.insert(cpc,1404,(cpc[:,1403]+cpc[:,1404])/2,axis=1)
+            elif np.logical_and(campaign=='HiScale', date=='20160425a'):
+                cpc=np.insert(cpc,0,cpc[:,0],axis=1)
+                cpc[0,0]=cpc[0,0]-1
+            time_cpc = cpc[0,:]/3600
+            cpc10 = cpc[1,:]
+            cpc3 = cpc[2,:]
+        elif len(filename_c)==0:
+            time_cpc=time
+            cpc10=np.nan*np.empty([len(time)])
+            cpc3=np.nan*np.empty([len(time)])
+        else:
+            raise ValueError('find too many files in ' + filename_c)
+        
+        cpcdiff = cpc3-cpc10
+        cpcdiff=qc_remove_neg(cpcdiff)
+        
+        #%% read in Models
+        datam2 = []
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            (timem,heightm,datam,timeunitm,datamunit,datamlongname)=read_extractflight(filename_m,'NCNall')
+            datam2.append(datam*1e-6)    # #/m3 to #/cm3
+        
+        timem = (timem - int(timem[0]))*24
+        
+        if len(timem)!=len(time) or len(time)!=len(time_cpc):
+            raise ValueError('time dimension for obs and/or model are not consistent')
+        
+        #%% get leg information near surface, near cloud base and above cloud
+        # leg_sfc = np.ma.compressed (np.unique(legnum[height<=200])[1:])
+        leg_sfc = list()
+        leg_near = list()
+        leg_above = list()
+        leg_toomuchcld = list()
+        leg_nocld = list()
+        leg_nodata = list()
+        
+        for ii in range(1,max(legnum)+1):
+        # for ii in range(5,7):
+            idx_l = legnum==ii
+            # if any(cflag[idx_l]==1):
+            # make sure cloud flag less than 10% of time and FIMS is not all missing (e.g., TD mode)
+            if np.sum(cflag[idx_l])/len(cflag[idx_l]) >= 0.1:
+            # if np.sum(cflag[idx_l]) > 1:
+                leg_toomuchcld.append(ii)
+                continue
+            if all(np.isnan(merge[idx_l,10])):
+                leg_nodata.append(ii)
+                continue
+            
+            legheight = np.mean(height[idx_l])
+            # if legheight<=250:   # leg number near surface
+            #     leg_sfc.append(ii)
+                
+            # find the mean cloud height within 1hr of the leg
+            i = np.argwhere(legnum==ii)
+            i_start = max(i[0][0]-3600, 0)
+            i_end = min(i[-1][0]+3600, len(cflag))
+            if all(cflag[i_start:i_end]!=1):
+                leg_nocld.append(ii)
+                if legheight>2500:
+                    leg_above.append(ii)
+                elif legheight<=250:   # leg number near surface
+                    leg_sfc.append(ii)
+                continue
+            idx_c = cflag[i_start:i_end]==1
+            cldheight = np.mean(height[i_start:i_end][idx_c])
+            cldmax = np.max(height[i_start:i_end][idx_c])
+            cldmin = np.min(height[i_start:i_end][idx_c])
+            # if (legheight-cldheight)<=200 and (legheight-cldheight)>=-400:
+            if legheight>=max(cldmin,250) and legheight<=cldmax:
+                leg_near.append(ii)
+            elif legheight<min(cldmin,250):   # leg number near surface
+                leg_sfc.append(ii)
+            # elif (legheight-cldheight)>500:
+            elif legheight>cldmax:
+                leg_above.append(ii)
+    
+        #%% calculate all pdfs
+        for ii in range(len(leg_sfc)):
+            idx = legnum==leg_sfc[ii]
+            tmp_obs = np.nanmean(merge[idx,:],0)
+            tmp_obs[tmp_obs==0]=np.nan
+            pdf_sfc_obs = np.hstack((pdf_sfc_obs, np.reshape(tmp_obs,(b2len,1))))
+            cpcdiff_sfc = np.hstack((cpcdiff_sfc, np.nanmean(cpcdiff[idx])))
+            for mm in range(nmodels):
+                tmp_model = np.nanmean(datam2[mm][:,idx],1)
+                tmp_model[tmp_model==0]=np.nan
+                pdf_sfc_model[mm] = np.hstack((pdf_sfc_model[mm], np.reshape(tmp_model,(3000,1))))
+            
+        for ii in range(len(leg_near)):
+            idx = legnum==leg_near[ii]
+            tmp_obs = np.nanmean(merge[idx,:],0)
+            tmp_obs[tmp_obs==0]=np.nan
+            pdf_near_obs = np.hstack((pdf_near_obs, np.reshape(tmp_obs,(b2len,1))))
+            cpcdiff_near = np.hstack((cpcdiff_near, np.nanmean(cpcdiff[idx])))
+            for mm in range(nmodels):
+                tmp_model = np.nanmean(datam2[mm][:,idx],1)
+                tmp_model[tmp_model==0]=np.nan
+                pdf_near_model[mm] = np.hstack((pdf_near_model[mm], np.reshape(tmp_model,(3000,1))))
+            
+        for ii in range(len(leg_above)):
+            idx = legnum==leg_above[ii]
+            tmp_obs = np.nanmean(merge[idx,:],0)
+            tmp_obs[tmp_obs==0]=np.nan
+            pdf_above_obs = np.hstack((pdf_above_obs, np.reshape(tmp_obs,(b2len,1))))
+            cpcdiff_above = np.hstack((cpcdiff_above, np.nanmean(cpcdiff[idx])))
+            for mm in range(nmodels):
+                tmp_model = np.nanmean(datam2[mm][:,idx],1)
+                tmp_model[tmp_model==0]=np.nan
+                pdf_above_model[mm] = np.hstack((pdf_above_model[mm], np.reshape(tmp_model,(3000,1))))
+        
+    
+    #%% change to the pre-defined size bins
+        
+    for bb in range(blen):
+        idx_m = np.logical_and(d_mam>=binl[bb], d_mam<=binh[bb])
+        for mm in range(nmodels):
+            p2_sfc_model[mm].append(np.nansum(pdf_sfc_model[mm][idx_m,:],0))
+            p2_near_model[mm].append(np.nansum(pdf_near_model[mm][idx_m,:],0))
+            p2_above_model[mm].append(np.nansum(pdf_above_model[mm][idx_m,:],0))
+    
+        if bb==0:
+            p2_sfc_obs.append(cpcdiff_sfc[~np.isnan(cpcdiff_sfc)])
+            p2_near_obs.append(cpcdiff_near[~np.isnan(cpcdiff_near)])
+            p2_above_obs.append(cpcdiff_above[~np.isnan(cpcdiff_above)])
+        else:
+            idx_o = np.logical_and(sizel>=binl[bb], sizeh<=binh[bb])
+            if any(idx_o):
+                tmp_sfc = np.nansum(pdf_sfc_obs[idx_o,:],0)
+                tmp_near = np.nansum(pdf_near_obs[idx_o,:],0)
+                tmp_above = np.nansum(pdf_above_obs[idx_o,:],0)
+                p2_sfc_obs.append(tmp_sfc[tmp_sfc!=0])
+                p2_near_obs.append(tmp_near[tmp_near!=0])
+                p2_above_obs.append(tmp_above[tmp_above!=0])
+            else:
+               raise ValueError("no sample is found in the size bin")
+                
+    #%% calculate dlnDp for dN/dlnDp
+    d_mam=np.arange(1,3001)
+    dlnDp_m=np.full(3000,np.nan)
+    for bb in range(3000):
+        dlnDp_m[bb]=np.log((bb+2)/(bb+1))
+    dlnDp_o=np.empty(len(size))
+    for bb in range(len(size)):
+        dlnDp_o[bb]=np.log(sizeh[bb]/sizel[bb])
+    
+    
+    #%% plot entire pdf below and above PBL
+    figname = figpath_aircraft_statistics+'SeparateCloud_pdf_AerosolSize_ACEENA_'+IOP+'.png'
+    print('plotting PDF figures to '+figname)
+    
+    fig,(ax1,ax2,ax3) = plt.subplots(3,1,figsize=(6,8))
+    
+    ax1.plot(size,np.nanmedian(pdf_above_obs,1)/dlnDp_o,color='k',linewidth=1,label='Obs')
+    for mm in range(nmodels):
+        ax1.plot(d_mam,np.nanmedian(pdf_above_model[mm],1)/dlnDp_m,color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax1.tick_params(color='k',labelsize=14)
+    ax1.set_xscale('log')
+    ax1.set_yscale('log')
+    
+    ax2.plot(size,np.nanmedian(pdf_near_obs,1)/dlnDp_o,color='k',linewidth=1,label='Obs')
+    for mm in range(nmodels):
+        ax2.plot(d_mam,np.nanmedian(pdf_near_model[mm],1)/dlnDp_m,color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax2.tick_params(color='k',labelsize=14)
+    ax2.set_xscale('log')
+    ax2.set_yscale('log')
+    
+    ax3.plot(size,np.nanmedian(pdf_sfc_obs,1)/dlnDp_o,color='k',linewidth=1,label='Obs')
+    for mm in range(nmodels):
+        ax3.plot(d_mam,np.nanmedian(pdf_sfc_model[mm],1)/dlnDp_m,color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax3.tick_params(color='k',labelsize=14)
+    ax3.set_xscale('log')
+    ax3.set_yscale('log')
+    
+    # ax0.set_xlim(5,4000)
+    # ax1.set_xlim(5,4000)
+    ax1.set_ylim(1e-3,1e5)
+    ax2.set_ylim(1e-3,1e5)
+    ax3.set_ylim(1e-3,1e5)
+    
+    ax2.set_ylabel('aerosol #/dlnDp (cm$^{-3}$)',fontsize=14)
+    ax3.set_xlabel('Diameter (nm)',fontsize=14)
+    l=ax3.legend(loc='lower center', shadow=False, fontsize='medium')
+    
+    ax1.set_title('size distribution for ACEENA '+IOP,fontsize=15)
+    
+    ax3.text(200,3000,'Near Surface ('+str(pdf_sfc_obs.shape[1])+' legs)',fontsize=12)
+    ax2.text(200,3000,'Near Clouds ('+str(pdf_near_obs.shape[1])+' legs)',fontsize=12)
+    ax1.text(200,3000,'Above Clouds ('+str(pdf_above_obs.shape[1])+' legs)',fontsize=12)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
+    
+    #%% plot percentile on sizes
+    
+    figname = figpath_aircraft_statistics+'SeparateCloud_percentile_AerosolSize_ACEENA_'+IOP+'.png'
+    print('plotting percentile figures to '+figname)
+    
+    # set position shift so that models and obs are not overlapped
+    p_shift = np.arange(nmodels+1)
+    p_shift = (p_shift - p_shift.mean())*0.2
+    
+    fig,(ax1,ax2,ax3) = plt.subplots(3,1,figsize=(6,8))
+        
+    ax1.boxplot(p2_above_obs,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(blen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax1.boxplot(p2_above_model[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(blen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    
+    ax2.boxplot(p2_near_obs,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(blen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax2.boxplot(p2_near_model[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(blen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        
+    ax3.boxplot(p2_sfc_obs,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(blen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax3.boxplot(p2_sfc_model[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(blen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        
+    ax3.tick_params(color='k',labelsize=12)
+    ax2.tick_params(color='k',labelsize=12)
+    ax1.tick_params(color='k',labelsize=12)
+    ax3.set_yscale('log')
+    ax2.set_yscale('log')
+    ax1.set_yscale('log')
+    ax1.set_xlim(-.5,blen-.5)
+    ax2.set_xlim(-.5,blen-.5)
+    ax3.set_xlim(-.5,blen-.5)
+    
+    ax3.set_xlabel('Diameter (nm)',fontsize=14)
+    ax2.set_ylabel('aerosol # (cm$^{-3}$)',fontsize=14)
+    ax1.set_title('percentile for ACEENA '+IOP,fontsize=15)
+    
+    ax3.text(2.4,4000,'Near Surface ('+str(pdf_sfc_obs.shape[1])+' legs)',fontsize=12)
+    ax2.text(2.4,4000,'Near Clouds ('+str(pdf_near_obs.shape[1])+' legs)',fontsize=12)
+    ax1.text(2.4,4000,'Above Clouds ('+str(pdf_above_obs.shape[1])+' legs)',fontsize=12)
+    
+    xlabel=[str(binl[x])+'-'+str(binh[x]) for x in range(blen)]
+    ax1.set_xticks(range(len(binm)))
+    ax1.set_xticklabels(xlabel)
+    ax2.set_xticks(range(len(binm)))
+    ax2.set_xticklabels(xlabel)
+    ax3.set_xticks(range(len(binm)))
+    ax3.set_xticklabels(xlabel)
+    ax1.set_ylim(1e-3,1e5)
+    ax2.set_ylim(1e-3,1e5)
+    ax3.set_ylim(1e-3,1e5)
+    
+    # plot temporal lines for label
+    ax3.plot([],c='k',label='Obs')
+    for mm in range(nmodels):
+        ax3.plot([],c=color_model[mm],label=Model_List[mm])
+    ax3.legend(loc='lower left', shadow=False, fontsize='medium')
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
+    
diff --git a/src/esmac_diags/plotting/plot_flight_pdf_percentile_SeparatePBLH_hiscale.py b/src/esmac_diags/plotting/plot_flight_pdf_percentile_SeparatePBLH_hiscale.py
new file mode 100644
index 0000000..68eb4a1
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_pdf_percentile_SeparatePBLH_hiscale.py
@@ -0,0 +1,390 @@
+"""
+# plot pdf and percentiles in several aerosol size bins for aircraft data
+# separated by observed PBLH 
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import  hhmmss2sec,yyyymmdd2cday
+from ..subroutines.read_ARMdata import read_pblhtmpl1
+from ..subroutines.read_surface import read_dl_pblh
+from ..subroutines.read_aircraft import read_cpc
+from ..subroutines.read_netcdf import read_merged_size,read_extractflight
+from ..subroutines.quality_control import qc_remove_neg, qc_mask_qcflag
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    cpcpath = settings['cpcpath']
+    pblhpath = settings['pblhpath']
+    dlpath = settings['dlpath']
+    merged_size_path = settings['merged_size_path']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_statistics = settings['figpath_aircraft_statistics']
+    
+    IOP = settings.get('IOP', None)
+
+    #%% other settings
+    if not os.path.exists(figpath_aircraft_statistics):
+        os.makedirs(figpath_aircraft_statistics)
+       
+    # set final bin sizes
+    binl = np.array([3, 15, 70, 400, 1000])
+    binh = np.array([10, 70, 400, 1000, 3000])
+    binm = (binl+binh)/2
+    
+    # set a range around PBLH (PBLH +/- heightdiff) that only data outside of the range are counted
+    heightdiff = 100
+       
+    #%% read in doppler lidar data. this is all days in one file
+    dl=read_dl_pblh(dlpath+'sgpdlC1_mlh_0.08.txt')
+    
+    mlh_dl = dl[6,:]*1000
+    day_dl = np.array(mlh_dl[:])
+    time_dl = np.array(mlh_dl[:])
+    for tt in range(len(time_dl)):
+        yyyymmdd=format(int(dl[0,tt]),'04d')+format(int(dl[1,tt]),'02d')+format(int(dl[2,tt]),'02d')
+        hhmmss=format(int(dl[3,tt]),'02d')+':'+format(int(dl[4,tt]),'02d')+':'+format(int(dl[5,tt]),'02d')
+        day_dl[tt]=yyyymmdd2cday(yyyymmdd)
+        time_dl[tt]=hhmmss2sec(hhmmss)
+    mlh_dl=qc_remove_neg(mlh_dl)
+    
+    
+    #%% find files for flight information
+    
+    lst = glob.glob(merged_size_path+'merged_bin_*'+campaign+'*.nc')
+    lst.sort()
+    
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    
+    # choose files for specific IOP
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+campaign+'_')
+            lst = glob.glob(a[0]+'*'+IOP+'*')
+            lst.sort()
+    else:
+        raise ValueError('this code is only for HISCALE, check the campaign settings')
+    
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+        
+    #%% read all data
+    
+    # pdf average for legs
+    pdf_below_obs=np.full([44,len(lst)*10],np.nan)
+    pdf_above_obs=np.full([44,len(lst)*10],np.nan)
+    
+    cpcdiff_above=np.full([len(lst)*10],np.nan)
+    cpcdiff_below=np.full([len(lst)*10],np.nan)
+    
+    nmodels=len(Model_List)
+    pdf_below_model=[]
+    pdf_above_model=[]
+    for mm in range(nmodels):
+        pdf_below_model.append(np.full([3000,len(lst)*10],np.nan))
+        pdf_above_model.append(np.full([3000,len(lst)*10],np.nan))
+    
+    n_below=0
+    n_above=0
+    n_total=0
+        
+    print('reading '+format(len(lst))+' files to calculate the statistics: ')
+    
+    for filename in lst:
+        
+        # get date info:        
+        date=filename[-12:-3]
+        if date[-1]=='a':
+            flightidx=1
+        else:
+            flightidx=2
+        print(date)
+        
+        #%% read aerosol size distribution
+        (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
+        (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
+        (time,size,legnum,timeunit,cunit,long_name)=read_merged_size(filename,'leg_number')
+        (time,size,height,timeunit,zunit,long_name)=read_merged_size(filename,'height')
+        (time,size,sizeh,timeunit,dataunit,long_name)=read_merged_size(filename,'size_high')
+        (time,size,sizel,timeunit,dataunit,long_name)=read_merged_size(filename,'size_low')
+        (time,size,merge,timeunit,dataunit,long_name)=read_merged_size(filename,'size_distribution_merged')
+        time=np.ma.compressed(time)
+        time=time/3600.
+        size=np.ma.compressed(size)*1000  # um to nm
+        sizel=sizel*1000
+        sizeh=sizeh*1000
+        merge=qc_remove_neg(merge)
+        merge=merge.T
+        
+        #%% read in CPC measurements
+        
+        if campaign=='HISCALE':
+            filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_HiScale001s.ict.txt')
+        else:
+            raise ValueError('this code is only for HISCALE, check the campaign settings')
+        filename_c.sort()
+        # read in data
+        if len(filename_c)==1 or len(filename_c)==2: # some days have two flights
+            (cpc,cpclist)=read_cpc(filename_c[flightidx-1])
+            if np.logical_and(campaign=='ACEENA', date=='20180216a'):
+                cpc=np.insert(cpc,1404,(cpc[:,1403]+cpc[:,1404])/2,axis=1)
+            elif np.logical_and(campaign=='HISCALE', date=='20160425a'):
+                cpc=np.insert(cpc,0,cpc[:,0],axis=1)
+                cpc[0,0]=cpc[0,0]-1
+            time_cpc = cpc[0,:]/3600
+            cpc10 = cpc[1,:]
+            cpc3 = cpc[2,:]
+        elif len(filename_c)==0:
+            time_cpc=time
+            cpc10=np.nan*np.empty([len(time)])
+            cpc3=np.nan*np.empty([len(time)])
+        else:
+            raise ValueError('find too many files')
+        
+        cpcdiff = cpc3-cpc10
+        cpcdiff=qc_remove_neg(cpcdiff)
+        
+        
+        #%% read in PBLH data from MPL
+        filename_mpl=glob.glob(pblhpath+'sgppblhtmpl1sawyerliC1*'+date[0:8]+'*.nc')
+        # read in data
+        if len(filename_mpl)==1:
+            (time_pblh,timeunit,mpl,qc_mpl) = read_pblhtmpl1(filename_mpl[0])
+            mpl = qc_mask_qcflag(mpl, qc_mpl)
+        elif len(filename_mpl)==0:
+            print('no pblh file in this day. skip...')
+            continue
+        else:
+            raise ValueError('find too many files: ' + filename_mpl)
+        time_pblh=time_pblh/3600
+        
+        #%% choose the same time of DL. get pblh
+        cday0=yyyymmdd2cday(date[0:8])
+        idx_dl = day_dl==cday0
+        time_dl2 = time_dl[idx_dl]/3600
+        mlh_dl2 = mlh_dl[idx_dl]
+        
+        #%% read in Models
+        datam2 = []
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            (timem,heightm,datam,timeunitm,datamunit,datamlongname)=read_extractflight(filename_m,'NCNall')
+            datam2.append(datam*1e-6)    # #/m3 to #/cm3
+        
+        timem = timem/3600
+        
+        if len(timem)!=len(time) or len(time)!=len(time_cpc):
+            raise ValueError('time dimension for obs and/or model are not consistent')
+        
+        #%% get pdf for legs below and above PBLH
+        
+        for ii in range(max(legnum)):
+            # get the mean pblh for this leg
+            time_leg=time[legnum==ii+1]
+            cflag_leg=cflag[legnum==ii+1]
+            if np.sum(cflag_leg==1)>1: #0.01*len(cflag_leg):
+                continue  # don't use legs with >10% cloud flag
+            
+            idx_dl2 = np.logical_and(time_dl2>=time_leg[0], time_dl2<=time_leg[-1])
+            if idx_dl2.any()==False:
+                idx_dl2 = np.logical_and(time_dl2>=time_leg[0]-2, time_dl2<=time_leg[-1]+2) # extend time range
+            if idx_dl2.any():
+                pblh = np.nanmean(mlh_dl2[idx_dl2])
+            else:# use MPL pblh
+                idx_mpl = np.logical_and(time_pblh>=time_leg[0], time_pblh<time_leg[-1])
+                if any(idx_mpl)==False:
+                    idx_mpl=np.logical_and(time_pblh>=time_leg[0]-2, time_pblh<time_leg[-1]+2)
+                pblh=np.mean(mpl[idx_mpl])
+                
+            
+            # average for each legs first
+            hmean = np.mean(height[legnum==ii+1])
+            if hmean<pblh-heightdiff:  # below PBLH
+                pdf_below_obs[:,n_below] = np.nanmean(merge[:,legnum==ii+1],1)
+                cpcdiff_below[n_below] = np.nanmean(cpcdiff[legnum==ii+1])
+                for mm in range(nmodels):
+                    pdf_below_model[mm][:,n_below] = np.nanmean(datam2[mm][:,legnum==ii+1],1)
+                n_below=n_below+1
+            elif hmean>pblh+heightdiff:
+                pdf_above_obs[:,n_above] = np.nanmean(merge[:,legnum==ii+1],1)
+                cpcdiff_above[n_above] = np.nanmean(cpcdiff[legnum==ii+1])
+                for mm in range(nmodels):
+                    pdf_above_model[mm][:,n_above] = np.nanmean(datam2[mm][:,legnum==ii+1],1)
+                n_above=n_above+1
+                
+                
+    #%% change to the pre-defined size bins
+            
+    d_model=np.arange(1,3001)
+    blen = len(binm)
+    p2_below_obs = list()
+    p2_above_obs = list()
+    p2_above_model = list()
+    p2_below_model = list()
+    for mm in range(nmodels):
+        p2_above_model.append([])
+        p2_below_model.append([])
+    
+    for bb in range(blen):
+        idx_m = np.logical_and(d_model>=binl[bb], d_model<=binh[bb])
+        for mm in range(nmodels):
+            data_below = np.nansum(pdf_below_model[mm][idx_m,:],0)
+            data_above = np.nansum(pdf_above_model[mm][idx_m,:],0)
+            # exclude pre-assigned data space that are not used
+            p2_below_model[mm].append(data_below[range(n_below)])
+            p2_above_model[mm].append(data_above[range(n_above)])
+        if bb==0:
+            p2_below_obs.append(cpcdiff_below[~np.isnan(cpcdiff_below)])
+            p2_above_obs.append(cpcdiff_above[~np.isnan(cpcdiff_above)])
+        else:
+            idx_o = np.logical_and(sizel>=binl[bb], sizeh<=binh[bb])
+            if any(idx_o):
+                tmp_below = np.nansum(pdf_below_obs[idx_o,:],0)
+                tmp_above = np.nansum(pdf_above_obs[idx_o,:],0)
+                # exclude not used or not detected (0 value) data
+                p2_below_obs.append(tmp_below[tmp_below!=0])
+                p2_above_obs.append(tmp_above[tmp_above!=0])
+            else:
+                p2_below_obs.append(np.full([n_below],np.nan))
+                p2_above_obs.append(np.full([n_above],np.nan))
+    
+    #%% change to dN/dlnDp
+    # model
+    dlnDp=np.empty(3000)
+    for bb in range(3000):
+        dlnDp[bb]=np.log((bb+2)/(bb+1))
+    for nn in range(n_below):
+        for mm in range(nmodels):
+            pdf_below_model[mm][:,nn]=pdf_below_model[mm][:,nn]/dlnDp
+    for nn in range(n_above):
+        for mm in range(nmodels):
+            pdf_above_model[mm][:,nn]=pdf_above_model[mm][:,nn]/dlnDp
+        
+    # Obs
+    dlnDp=np.empty(len(size))
+    for bb in range(len(size)):
+        dlnDp[bb]=np.log(sizeh[bb]/sizel[bb])
+    for nn in range(n_below):
+        pdf_below_obs[:,nn]=pdf_below_obs[:,nn]/dlnDp  
+    for nn in range(n_above):  
+        pdf_above_obs[:,nn]=pdf_above_obs[:,nn]/dlnDp
+        
+         
+    #%% plot entire pdf below and above PBL
+    figname = figpath_aircraft_statistics+'SeparatePBLH_pdf_AerosolSize_HISCALE_'+IOP+'.png'
+    print('plotting PDF figures to '+figname)
+    
+    fig,(ax0,ax1) = plt.subplots(2,1,figsize=(8,6))
+    idx_v=range(n_above)
+    h3=ax0.plot(size,np.nanmean(pdf_above_obs[:,idx_v],1),color='k',linewidth=1,label='Obs')
+    for mm in range(nmodels):
+        ax0.plot(np.arange(1,3001),np.nanmean(pdf_above_model[mm][:,idx_v],1),color=color_model[mm],linewidth=1, label=Model_List[mm])
+    # ax0.legend(loc='lower center', shadow=False, fontsize='large')
+    ax0.tick_params(color='k',labelsize=14)
+    ax0.set_xscale('log')
+    ax0.set_yscale('log')
+    
+    idx_v=range(n_below)
+    h3=ax1.plot(size,np.nanmean(pdf_below_obs[:,idx_v],1),color='k',linewidth=1,label='Obs')
+    for mm in range(nmodels):
+        ax1.plot(np.arange(1,3001),np.nanmean(pdf_below_model[mm][:,idx_v],1),color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax1.legend(loc='lower left', shadow=False, fontsize='large')
+    ax1.tick_params(color='k',labelsize=14)
+    ax1.set_xscale('log')
+    ax1.set_yscale('log')
+    
+    # ax0.set_xlim(5,4000)
+    # ax1.set_xlim(5,4000)
+    ax0.set_ylim(1e-3,1e5)
+    ax1.set_ylim(1e-3,1e5)
+    ax1.set_xlabel('Diameter (nm)',fontsize=14)
+    ax0.set_ylabel('aerosol #/dlnDp (cm$^{-3}$)',fontsize=13)
+    ax1.set_ylabel('aerosol #/dlnDp (cm$^{-3}$)',fontsize=13)
+    ax0.set_title('size distribution for Hi-Scale '+IOP,fontsize=15)
+    fig.text(.65,.83,'Above PBL ('+str(n_above)+' legs)',fontsize=12)
+    fig.text(.65,.43,'Below PBL ('+str(n_below)+' legs)',fontsize=12)
+    # fig.text(.68,.83,'Above PBL ('+format(n_above/n_total*100,'.1f')+'%)',fontsize=12)
+    # fig.text(.68,.43,'Below PBL ('+format(n_below/n_total*100,'.1f')+'%)',fontsize=12)
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
+    
+    #%% plot percentile on sizes
+    
+    figname = figpath_aircraft_statistics+'SeparatePBLH_percentile_AerosolSize_HISCALE_'+IOP+'.png'
+    print('plotting percentile figures to '+figname)
+    
+    # set position shift so that models and obs are not overlapped
+    p_shift = np.arange(nmodels+1)
+    p_shift = (p_shift - p_shift.mean())*0.2
+    
+    fig,(ax0,ax1) = plt.subplots(2,1,figsize=(8,6))
+        
+    ax0.boxplot(p2_above_obs,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(blen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax0.boxplot(p2_above_model[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(blen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    
+    ax1.boxplot(p2_below_obs,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(blen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax1.boxplot(p2_below_model[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(blen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        
+    ax0.tick_params(color='k',labelsize=12)
+    ax1.tick_params(color='k',labelsize=14)
+    # ax0.set_xscale('log')
+    # ax1.set_xscale('log')
+    ax0.set_yscale('log')
+    ax1.set_yscale('log')
+    ax0.set_xlim(-1,blen)
+    ax1.set_xlim(-1,blen)
+    ax1.set_xlabel('Diameter (nm)',fontsize=14)
+    ax0.set_ylabel('aerosol # (cm$^{-3}$)',fontsize=14)
+    ax1.set_ylabel('aerosol # (cm$^{-3}$)',fontsize=14)
+    ax0.set_title('percentile for Hi-Scale '+IOP,fontsize=15)
+    fig.text(.66,.83,'Above PBL ('+str(n_above)+' legs)',fontsize=12)
+    fig.text(.66,.43,'Below PBL ('+str(n_below)+' legs)',fontsize=12)
+    
+    xlabel=[str(binl[x])+'-'+str(binh[x]) for x in range(blen)]
+    ax0.set_xticks(range(len(binm)))
+    ax0.set_xticklabels(xlabel)
+    ax1.set_xticks(range(len(binm)))
+    ax1.set_xticklabels(xlabel)
+    # ax0.set_yticks([1,3,5,7,9,11,12,13,14,15,16])
+    # ax0.set_yticklabels(range(400,4100,400))
+    ax0.set_ylim(1e-3,1e5)
+    ax1.set_ylim(1e-3,1e5)
+    
+    # plot temporal lines for label
+    ax1.plot([],c='k',label='Obs')
+    for mm in range(nmodels):
+        ax1.plot([],c=color_model[mm],label=Model_List[mm])
+    ax1.legend(loc='lower left', shadow=False, fontsize='large')
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
diff --git a/src/esmac_diags/plotting/plot_flight_percentile_lat_CCN.py b/src/esmac_diags/plotting/plot_flight_percentile_lat_CCN.py
new file mode 100644
index 0000000..9cb6ea6
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_percentile_lat_CCN.py
@@ -0,0 +1,437 @@
+"""
+# plot percentile of aerosol number concentration binned by different latitudes
+# separated by below-cloud, near-cloud and above-cloud
+# for aircraft measurements in CSET or SOCRATES
+"""
+
+
+import glob
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_aircraft import read_ccn_socrates, read_RF_NCAR
+from ..subroutines.read_netcdf import read_extractflight
+from ..subroutines.quality_control import qc_remove_neg
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    latbin = settings['latbin']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_statistics = settings['figpath_aircraft_statistics']
+    
+    if campaign in ['CSET', 'SOCRATES']:
+        ccnpath = settings['ccnpath']
+        RFpath = settings['RFpath']
+    else:
+        raise ValueError('This code is only for CSET or SOCRATES. check campaign setting: '+campaign)
+    
+    #%% other settings
+
+    plot_method = 'all'  # 'height': separate by height. 'all': all heights below 5km
+    
+    if not os.path.exists(figpath_aircraft_statistics):
+        os.makedirs(figpath_aircraft_statistics)
+       
+    dlat = latbin[1]-latbin[0]
+    latmin = latbin-dlat/2
+    latmax = latbin+dlat/2
+    latlen = len(latbin)
+        
+    nmodels=len(Model_List)
+    
+    #%% find files for flight information
+    
+    lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
+    lst.sort()
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    alldates = [x.split('_')[-1].split('.')[0] for x in lst]
+    
+    #%% define variables by latitude bins below, near and above clouds
+        
+    ccna_below_lat = []
+    ccna_near_lat = []
+    ccna_above_lat = []
+    ccnb_below_lat = []
+    ccnb_near_lat = []
+    ccnb_above_lat = []
+    for bb in range(latlen):
+        ccna_below_lat.append(np.empty(0))
+        ccna_near_lat.append(np.empty(0))
+        ccna_above_lat.append(np.empty(0))
+        ccnb_below_lat.append(np.empty(0))
+        ccnb_near_lat.append(np.empty(0))
+        ccnb_above_lat.append(np.empty(0))
+        
+    ccn3_below_lat = []
+    ccn3_near_lat = []
+    ccn3_above_lat = []
+    ccn5_below_lat = []
+    ccn5_near_lat = []
+    ccn5_above_lat = []
+    for mm in range(nmodels):
+        ccn3_below_lat.append([np.empty(0) for bb in range(latlen)])
+        ccn3_near_lat.append([np.empty(0) for bb in range(latlen)])
+        ccn3_above_lat.append([np.empty(0) for bb in range(latlen)])
+        ccn5_below_lat.append([np.empty(0) for bb in range(latlen)])
+        ccn5_near_lat.append([np.empty(0) for bb in range(latlen)])
+        ccn5_above_lat.append([np.empty(0) for bb in range(latlen)])
+    
+    print('reading '+format(len(alldates))+' files to calculate the statistics: ')
+    
+    for date in alldates:
+        print(date)
+        
+        #%% read in Models
+        
+        ccn3=[]
+        ccn5=[]
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+        
+            (timem,heightm,ccn3_tmp,timeunitm,ccn3_unit,ccn3_longname)=read_extractflight(filename_m,'CCN3')
+            (timem,heightm,ccn5_tmp,timeunitm,ccn5_unit,ccn5_longname)=read_extractflight(filename_m,'CCN5')
+            ccn3.append(ccn3_tmp)
+            ccn5.append(ccn5_tmp)
+            
+        # get supersaturation
+        SS3 = ccn3_longname.split('=')[-1]
+        SS5 = ccn5_longname.split('=')[-1]
+           
+        #%% read in observations for CSET and SOCRATES
+        # CSET does not have observed CCN
+        if campaign=='CSET':
+            timea=timem
+            SSa=np.nan*np.empty([len(timem)])
+            ccna=np.nan*np.empty([len(timem)])
+            timeb=timem
+            SSb=np.nan*np.empty([len(timem)])
+            ccnb=np.nan*np.empty([len(timem)])
+            
+        # SOCRATES
+        elif campaign=='SOCRATES':
+            filename_ccn=glob.glob(ccnpath+'CCNscanning_SOCRATES_GV_RF*'+date[0:8]+'_R0.ict')
+            if len(filename_ccn)==1:
+                (data0,ccnlist)=read_ccn_socrates(filename_ccn[0])
+                time_ccn = data0[0,:]
+                ccn = data0[1,:]
+                SS = data0[3,:]
+                ccn=qc_remove_neg(ccn)
+                timea=time_ccn
+                timeb=time_ccn
+                ccna=np.array(ccn)
+                ccnb=np.array(ccn)
+                idxa=np.logical_and(SS>0.05, SS<0.15)
+                ccna[idxa==False]=np.nan
+                SSa=np.full((len(timea)),0.1)
+                idxb=np.logical_and(SS>0.45, SS<0.55)
+                ccnb[idxb==False]=np.nan
+                SSb=np.full((len(timeb)),0.5)
+            elif len(filename_ccn)==0:
+                timea=timem
+                SSa=np.nan*np.empty([len(timem)])
+                ccna=np.nan*np.empty([len(timem)])
+                timeb=timem
+                SSb=np.nan*np.empty([len(timem)])
+                ccnb=np.nan*np.empty([len(timem)])
+            else:
+                raise ValueError('find too many files: ' + filename_ccn)
+                
+        if any(timea!=timeb):
+            raise ValueError('inconsitent time dimension')
+            
+        
+        # need latitude from RF file
+        lst = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
+        if len(lst)==1 or len(lst)==2:  # SOCRATES has two flights in 20180217, choose the later one
+            filename=lst[-1]
+        else:
+            raise ValueError('find no file or too many files: ' + lst)
+        (time,lat,timeunit,latunit,latlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LAT')
+        
+        # exclude NaNs
+        idx = np.logical_or(~np.isnan(ccna), ~np.isnan(ccnb))
+        ccna=ccna[idx]
+        ccnb=ccnb[idx]
+        SSa=SSa[idx]
+        SSb=SSb[idx]
+        
+        # for interpolation of model results
+        timea=timea[idx]
+        timeb=timeb[idx]
+        time=timea
+        # interpolate model results into observational time
+        for mm in range(nmodels):
+            ccn3[mm] = (np.interp(timea,timem,ccn3[mm])) 
+            ccn5[mm] = (np.interp(timeb,timem,ccn5[mm])) 
+        height = np.interp(timeb,timem,heightm)
+        lat = np.interp(timeb,timem,lat)
+            
+            
+        #%% separate data by cloud or height
+        flag_below = np.zeros(len(time))
+        flag_near = np.zeros(len(time))
+        flag_above = np.zeros(len(time))
+        
+        if plot_method == 'height':
+            for ii in range(len(time)):
+                if height[ii]>5000:
+                    continue   # exclude measurements above 5km
+                elif height[ii]<2000:
+                    flag_below[ii]=1
+                elif height[ii]>=2000:
+                    flag_above[ii]=1
+                        
+        # option 3: use all heights below 5km
+        elif plot_method == 'all':
+            for ii in range(len(time)):
+                if height[ii]<=5000: # exclude measurements above 5km
+                    flag_below[ii]=1
+                        
+        for bb in range(latlen):
+            idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
+            if any(flag_below[idx]==1):
+                ccna_below_lat[bb] = np.hstack((ccna_below_lat[bb], ccna[idx][flag_below[idx]==1]))
+                ccnb_below_lat[bb] = np.hstack((ccnb_below_lat[bb], ccnb[idx][flag_below[idx]==1]))
+                for mm in range(nmodels):
+                    ccn3_below_lat[mm][bb] = np.hstack((ccn3_below_lat[mm][bb], ccn3[mm][idx][flag_below[idx]==1]))
+                    ccn5_below_lat[mm][bb] = np.hstack((ccn5_below_lat[mm][bb], ccn5[mm][idx][flag_below[idx]==1]))
+            if any(flag_near[idx]==1):
+                ccna_near_lat[bb] = np.hstack((ccna_near_lat[bb], ccna[idx][flag_near[idx]==1]))
+                ccnb_near_lat[bb] = np.hstack((ccnb_near_lat[bb], ccnb[idx][flag_near[idx]==1]))
+                for mm in range(nmodels):
+                    ccn3_near_lat[mm][bb] = np.hstack((ccn3_near_lat[mm][bb], ccn3[mm][idx][flag_near[idx]==1]))
+                    ccn5_near_lat[mm][bb] = np.hstack((ccn5_near_lat[mm][bb], ccn5[mm][idx][flag_near[idx]==1]))
+            if any(flag_above[idx]==1):
+                ccna_above_lat[bb] = np.hstack((ccna_above_lat[bb], ccna[idx][flag_above[idx]==1]))
+                ccnb_above_lat[bb] = np.hstack((ccnb_above_lat[bb], ccnb[idx][flag_above[idx]==1]))
+                for mm in range(nmodels):
+                    ccn3_above_lat[mm][bb] = np.hstack((ccn3_above_lat[mm][bb], ccn3[mm][idx][flag_above[idx]==1]))
+                    ccn5_above_lat[mm][bb] = np.hstack((ccn5_above_lat[mm][bb], ccn5[mm][idx][flag_above[idx]==1]))
+              
+    #%% remove nan elements in the observations
+    for bb in range(latlen):
+        ccna=ccna_below_lat[bb]
+        ccna_below_lat[bb] = ccna[~np.isnan(ccna)]
+        ccnb=ccnb_below_lat[bb]
+        ccnb_below_lat[bb] = ccnb[~np.isnan(ccnb)]
+        ccna=ccna_near_lat[bb]
+        ccna_near_lat[bb] = ccna[~np.isnan(ccna)]
+        ccnb=ccnb_near_lat[bb]
+        ccnb_near_lat[bb] = ccnb[~np.isnan(ccnb)]
+        ccna=ccna_above_lat[bb]
+        ccna_above_lat[bb] = ccna[~np.isnan(ccna)]
+        ccnb=ccnb_above_lat[bb]
+        ccnb_above_lat[bb] = ccnb[~np.isnan(ccnb)]
+    
+    
+    #%% make plot
+    # set position shift so that models and obs are not overlapped
+    p_shift = np.arange(nmodels+1)
+    p_shift = (p_shift - p_shift.mean())*0.2
+    
+    #%% plot separate by height
+    if plot_method == 'height':
+        # for ccna 
+        figname = figpath_aircraft_statistics+'percentile_lat_CCN3_byheight_'+campaign+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax3) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+            
+        ax1.boxplot(ccna_above_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax1.boxplot(ccn3_above_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax1.tick_params(color='k',labelsize=15)
+        # ax1.set_yscale('log')
+        ax1.set_xlim(-1,latlen)
+        ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        ax1.set_ylim(-10,50)
+        ax1.plot([],c='k',label='OBS')
+        for mm in range(nmodels):
+            ax1.plot([],c=color_model[mm],label=Model_List[mm])
+            
+        ax3.boxplot(ccna_below_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax3.boxplot(ccn3_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax3.tick_params(color='k',labelsize=15)
+        # ax3.set_yscale('log')
+        ax3.set_xlim(-1,latlen)
+        ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        ax3.set_ylim(-10,100)
+        # plot temporal lines for label
+        ax3.plot([],c='k',label='OBS')
+        for mm in range(nmodels):
+            ax3.plot([],c=color_model[mm],label=Model_List[mm])
+        
+        ax3.set_xlabel('Latitude',fontsize=16)
+        
+        ax1.set_xticklabels([])
+        ax3.set_xticklabels([])
+        ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+        ax1.set_title('Percentile of CCN (SS='+format(np.nanmean(SSa),'.2f')+'%) # (cm$^{-3}$) '+campaign,fontsize=17)
+        fig.text(0.1,0.9,'2-5km',fontsize=15)
+        fig.text(0.1,0.4,'0-2km',fontsize=15)
+        
+        ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        
+        # plot for ccnb (SS=0.5%)
+        figname = figpath_aircraft_statistics+'percentile_lat_CCN5_byheight_'+campaign+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax3) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+            
+        ax1.boxplot(ccnb_above_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax1.boxplot(ccn5_above_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax1.tick_params(color='k',labelsize=15)
+        # ax1.set_yscale('log')
+        ax1.set_xlim(-1,latlen)
+        ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        ax1.plot([],c='k',label='OBS')
+        for mm in range(nmodels):
+            ax1.plot([],c=color_model[mm],label=Model_List[mm])
+            
+        ax3.boxplot(ccnb_below_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax3.boxplot(ccn5_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax3.tick_params(color='k',labelsize=15)
+        # ax3.set_yscale('log')
+        ax3.set_xlim(-1,latlen)
+        ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        # ax3.set_ylim(-10,400)
+        # plot temporal lines for label
+        ax3.plot([],c='k',label='OBS')
+        for mm in range(nmodels):
+            ax3.plot([],c=color_model[mm],label=Model_List[mm])
+        
+        ax3.set_xlabel('Latitude',fontsize=16)
+        
+        ax1.set_xticklabels([])
+        ax3.set_xticklabels([])
+        ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+        ax1.set_title('Percentile of CCN (SS='+format(np.nanmean(SSb),'.2f')+'%) # (cm$^{-3}$) '+campaign,fontsize=17)
+        fig.text(0.1,0.9,'2-5km',fontsize=15)
+        fig.text(0.1,0.4,'0-2km',fontsize=15)
+        
+        ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    #%%    
+    
+    elif plot_method == 'all':
+        #%% for OBS
+        figname = figpath_aircraft_statistics+'percentile_lat_CCN_'+campaign+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax3) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+            
+        ax1.boxplot(ccna_below_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax1.boxplot(ccn3_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax1.tick_params(color='k',labelsize=15)
+        # ax1.set_yscale('log')
+        ax1.set_xlim(-1,latlen)
+        ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        if campaign=='SOCRATES':
+            ax1.set_ylim(-10,200)
+        elif campaign=='CSET':
+            ax1.set_ylim(-10,200)
+        ax1.plot([],c='k',label='OBS')
+        for mm in range(nmodels):
+            ax1.plot([],c=color_model[mm],label=Model_List[mm])
+            
+        ax3.boxplot(ccnb_below_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax3.boxplot(ccn5_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax3.tick_params(color='k',labelsize=15)
+        # ax3.set_yscale('log')
+        ax3.set_xlim(-1,latlen)
+        ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        if campaign=='SOCRATES':
+            ax3.set_ylim(-10,500)
+        elif campaign=='CSET':
+            ax3.set_ylim(-10,500)
+        # plot temporal lines for label
+        ax3.plot([],c='k',label='OBS')
+        for mm in range(nmodels):
+            ax3.plot([],c=color_model[mm],label=Model_List[mm])
+        
+        ax3.set_xlabel('Latitude',fontsize=16)
+        
+        ax1.set_xticklabels([])
+        ax3.set_xticklabels([])
+        ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+        ax1.set_title('Percentile of CCN # (cm$^{-3}$) '+campaign,fontsize=17)
+        
+        ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
+        ax3.legend(loc='upper right', shadow=False, fontsize='x-large')
+        fig.text(0.1,0.9,'SS='+format(np.nanmean(SSa),'.2f')+'%',fontsize=15)
+        fig.text(0.1,0.4,'SS='+format(np.nanmean(SSb),'.2f')+'%',fontsize=15)
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    #%%
+    else:
+        raise ValueError('does not recognize plot_method: '+plot_method)
diff --git a/src/esmac_diags/plotting/plot_flight_percentile_lat_CN.py b/src/esmac_diags/plotting/plot_flight_percentile_lat_CN.py
new file mode 100644
index 0000000..99938c4
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_percentile_lat_CN.py
@@ -0,0 +1,609 @@
+"""
+# plot percentile of aerosol number concentration binned by different latitudes
+# separated by below-cloud, near-cloud and above-cloud
+# for aircraft measurements in CSET or SOCRATES
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_aircraft import read_RF_NCAR
+from ..subroutines.read_netcdf import read_extractflight
+from ..subroutines.specific_data_treatment import lwc2cflag
+from ..subroutines.quality_control import qc_mask_takeoff_landing,qc_remove_neg,qc_mask_cloudflag
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    latbin = settings['latbin']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_statistics = settings['figpath_aircraft_statistics']
+    
+    if campaign in ['CSET', 'SOCRATES']:
+        RFpath = settings['RFpath']
+    else:
+        raise ValueError('This code is only for CSET or SOCRATES. check campaign setting: '+campaign)
+    
+    #%% other settings
+    
+    plot_method = 'all'  # 'cloud': separate by cloud. 'height': separate by height. 'all': all heights below 5km
+    
+    if not os.path.exists(figpath_aircraft_statistics):
+        os.makedirs(figpath_aircraft_statistics)
+        
+    dlat = latbin[1]-latbin[0]
+    latmin = latbin-dlat/2
+    latmax = latbin+dlat/2
+    latlen = len(latbin)
+        
+    nmodels=len(Model_List)
+    
+    #%% find files for flight information
+    
+    lst = glob.glob(E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[0]+'_*.nc')
+    lst.sort()
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    alldates = [x.split('_')[-1].split('.')[0] for x in lst]
+    
+    
+    #%% define variables by latitude bins below, near and above clouds
+        
+    cpc_below_lat = []
+    cpc_near_lat = []
+    cpc_above_lat = []
+    uhsas_below_lat = []
+    uhsas_near_lat = []
+    uhsas_above_lat = []
+    for bb in range(latlen):
+        cpc_below_lat.append(np.empty(0))
+        cpc_near_lat.append(np.empty(0))
+        cpc_above_lat.append(np.empty(0))
+        uhsas_below_lat.append(np.empty(0))
+        uhsas_near_lat.append(np.empty(0))
+        uhsas_above_lat.append(np.empty(0))
+        
+    ncn10_below_lat = []
+    ncn10_near_lat = []
+    ncn10_above_lat = []
+    ncn100_below_lat = []
+    ncn100_near_lat = []
+    ncn100_above_lat = []
+    for mm in range(nmodels):
+        ncn10_below_lat.append([np.empty(0) for bb in range(latlen)])
+        ncn10_near_lat.append([np.empty(0) for bb in range(latlen)])
+        ncn10_above_lat.append([np.empty(0) for bb in range(latlen)])
+        ncn100_below_lat.append([np.empty(0) for bb in range(latlen)])
+        ncn100_near_lat.append([np.empty(0) for bb in range(latlen)])
+        ncn100_above_lat.append([np.empty(0) for bb in range(latlen)])
+    
+    print('reading '+format(len(alldates))+' files to calculate the statistics: ')
+    
+    for date in alldates:
+        print(date)
+        
+        #%% read in Models
+        cpc100_m = []
+        cpc10_m = []
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+        
+            (timem,heightm,cpc_m,timeunitm,ncn_unit,ncn_longname)=read_extractflight(filename_m,'NCN')
+            (timem,heightm,ncnall,timeunitm,ncnall_unit,ncnall_longname)=read_extractflight(filename_m,'NCNall')
+            
+            cpc100_m.append(np.sum(ncnall[100:,:],0)*1e-6) # #/m3 to #/cm3
+            cpc10_m.append(cpc_m*1e-6)    # #/m3 to #/cm3
+        
+        #%% read in observations
+        # note that it is only for CSET and SOCRATES
+        lst = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
+        if len(lst)==1 or len(lst)==2:  # SOCRATES has two flights in 20180217, choose the later one
+            filename=lst[-1]
+        else:
+            raise ValueError('find no file or too many files: '+lst)
+        (time,height,timeunit,hunit,hlongname,cellsize,cellunit)=read_RF_NCAR(filename,'ALT')
+        (time,lat,timeunit,latunit,latlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LAT')
+        (time,lwc,timeunit,lwcunit,lwclongname,cellsize,cellunit)=read_RF_NCAR(filename,'PLWCC')
+        (time,cpc10,timeunit,cpc10unit,cpc10longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCN')
+        if campaign=='CSET':
+            (time,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCU100_RWOOU')
+            # (time,uhsas500,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCU500_RWOOU')
+        elif campaign=='SOCRATES':
+            # there are two variables: CONCU100_CVIU and CONCU100_LWII
+            (time,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCU100_LWII')
+            # (time,uhsas500,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCU500_LWII')
+        
+        # exclude 30min after takeoff and before landing
+        height=qc_mask_takeoff_landing(time,height)
+        lat=qc_mask_takeoff_landing(time,lat)
+        lwc=qc_mask_takeoff_landing(time,lwc)
+        cpc10=qc_mask_takeoff_landing(time,cpc10)
+        uhsas100=qc_mask_takeoff_landing(time,uhsas100)
+        timem=qc_mask_takeoff_landing(time,timem)
+        for mm in range(nmodels):
+            cpc10_m[mm]=qc_mask_takeoff_landing(time,cpc10_m[mm])
+            cpc100_m[mm]=qc_mask_takeoff_landing(time,cpc100_m[mm])
+        
+        # calculate cloud flag based on LWC
+        cldflag=lwc2cflag(lwc,lwcunit)
+        
+        cpc10 = qc_mask_cloudflag(cpc10,cldflag)
+        cpc10 = qc_remove_neg(cpc10)
+        uhsas100 = qc_mask_cloudflag(uhsas100,cldflag)
+        uhsas100 = qc_remove_neg(uhsas100)
+        
+        # if min(lat)<28:
+        #     print(np.nanmax(uhsas100[np.logical_and(lat>25,lat<28)]))
+        
+        #%% separate data by cloud or height
+        flag_below = np.zeros(len(time))
+        flag_near = np.zeros(len(time))
+        flag_above = np.zeros(len(time))
+        
+        # option 1: separate data by cloud and put in each latitude bin
+        if plot_method == 'cloud':
+            for ii in range(len(time)):
+                if height[ii]>5000:
+                    continue   # exclude measurements above 5km
+                # check if  there is cloud within 1hr window
+                i_start = max(ii-1800, 0)
+                i_end = min(ii+1800, len(time))
+                if any(cldflag[i_start:i_end]==1):
+                    cheight=height[i_start:i_end][cldflag[i_start:i_end]==1]
+                    cldmax = np.max(cheight)
+                    cldmin = np.min(cheight)
+                    if height[ii]<min(cldmin,2000):
+                        flag_below[ii]=1
+                    elif height[ii]>=cldmin and height[ii]<=cldmax:
+                        flag_near[ii]=1
+                    elif height[ii]>max(cldmax,1000):
+                        flag_above[ii]=1
+        
+        # option 2: separate data by height
+        elif plot_method == 'height':
+            for ii in range(len(time)):
+                if height[ii]>5000:
+                    continue   # exclude measurements above 5km
+                # check if  there is cloud within 1hr window
+                i_start = max(ii-1800, 0)
+                i_end = min(ii+1800, len(time))
+                if any(cldflag[i_start:i_end]==1):
+                    cheight=height[i_start:i_end][cldflag[i_start:i_end]==1]
+                    cldmax = np.max(cheight)
+                    cldmin = np.min(cheight)
+                    if height[ii]<min(cldmin,2000):
+                        flag_below[ii]=1
+                    elif height[ii]>max(cldmax,2000):
+                        flag_above[ii]=1
+                else:
+                    if height[ii]<2000:
+                        flag_below[ii]=1
+                    elif height[ii]>=2000:
+                        flag_above[ii]=1
+                        
+        # option 3: use all heights below 5km
+        elif plot_method == 'all':
+            for ii in range(len(time)):
+                if height[ii]<=5000: # exclude measurements above 5km
+                    flag_below[ii]=1
+                        
+        for bb in range(latlen):
+            idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
+            if any(flag_below[idx]==1):
+                cpc_below_lat[bb] = np.hstack((cpc_below_lat[bb], cpc10[idx][flag_below[idx]==1]))
+                uhsas_below_lat[bb] = np.hstack((uhsas_below_lat[bb], uhsas100[idx][flag_below[idx]==1]))
+                for mm in range(nmodels):
+                    ncn10_below_lat[mm][bb] = np.hstack((ncn10_below_lat[mm][bb], cpc10_m[mm][idx][flag_below[idx]==1]))
+                    ncn100_below_lat[mm][bb] = np.hstack((ncn100_below_lat[mm][bb], cpc100_m[mm][idx][flag_below[idx]==1]))
+            if any(flag_near[idx]==1):
+                cpc_near_lat[bb] = np.hstack((cpc_near_lat[bb], cpc10[idx][flag_near[idx]==1]))
+                uhsas_near_lat[bb] = np.hstack((uhsas_near_lat[bb], uhsas100[idx][flag_near[idx]==1]))
+                for mm in range(nmodels):
+                    ncn10_near_lat[mm][bb] = np.hstack((ncn10_near_lat[mm][bb], cpc10_m[mm][idx][flag_near[idx]==1]))
+                    ncn100_near_lat[mm][bb] = np.hstack((ncn100_near_lat[mm][bb], cpc100_m[mm][idx][flag_near[idx]==1]))
+            if any(flag_above[idx]==1):
+                cpc_above_lat[bb] = np.hstack((cpc_above_lat[bb], cpc10[idx][flag_above[idx]==1]))
+                uhsas_above_lat[bb] = np.hstack((uhsas_above_lat[bb], uhsas100[idx][flag_above[idx]==1]))
+                for mm in range(nmodels):
+                    ncn10_above_lat[mm][bb] = np.hstack((ncn10_above_lat[mm][bb], cpc10_m[mm][idx][flag_above[idx]==1]))
+                    ncn100_above_lat[mm][bb] = np.hstack((ncn100_above_lat[mm][bb], cpc100_m[mm][idx][flag_above[idx]==1]))
+              
+    #%% remove nan elements in the observations
+    for bb in range(latlen):
+        cpc=cpc_below_lat[bb]
+        cpc_below_lat[bb] = cpc[~np.isnan(cpc)]
+        uhsas=uhsas_below_lat[bb]
+        uhsas_below_lat[bb] = uhsas[~np.isnan(uhsas)]
+        cpc=cpc_near_lat[bb]
+        cpc_near_lat[bb] = cpc[~np.isnan(cpc)]
+        uhsas=uhsas_near_lat[bb]
+        uhsas_near_lat[bb] = uhsas[~np.isnan(uhsas)]
+        cpc=cpc_above_lat[bb]
+        cpc_above_lat[bb] = cpc[~np.isnan(cpc)]
+        uhsas=uhsas_above_lat[bb]
+        uhsas_above_lat[bb] = uhsas[~np.isnan(uhsas)]
+    
+    
+    #%% make plot
+    # set position shift so that models and obs are not overlapped
+    p_shift = np.arange(nmodels+1)
+    p_shift = (p_shift - p_shift.mean())*0.2
+    
+    #%% plot separate by cloud
+    if plot_method == 'cloud':
+        #%% for CPC (>10nm)
+        figname = figpath_aircraft_statistics+'percentile_lat_CN10nm_bycldheight_'+campaign+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax2,ax3) = plt.subplots(3,1,figsize=(8,6))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+            
+        ax1.boxplot(cpc_above_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax1.boxplot(ncn10_above_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax1.tick_params(color='k',labelsize=15)
+        # ax1.set_yscale('log')
+        ax1.set_xlim(-1,latlen)
+        ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        ax1.plot([],c='k',label='CPC')
+        for mm in range(nmodels):
+            ax1.plot([],c=color_model[mm],label=Model_List[mm])
+            
+        ax2.boxplot(cpc_near_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax2.boxplot(ncn10_near_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax2.tick_params(color='k',labelsize=15)
+        # ax2.set_yscale('log')
+        ax2.set_xlim(-1,latlen)
+        ax2.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        ax2.plot([],c='k',label='CPC')
+        for mm in range(nmodels):
+            ax2.plot([],c=color_model[mm],label=Model_List[mm])
+            
+        ax3.boxplot(cpc_below_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax3.boxplot(ncn10_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax3.tick_params(color='k',labelsize=15)
+        # ax3.set_yscale('log')
+        ax3.set_xlim(-1,latlen)
+        ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        # plot temporal lines for label
+        ax3.plot([],c='k',label='CPC')
+        for mm in range(nmodels):
+            ax3.plot([],c=color_model[mm],label=Model_List[mm])
+        
+        ax3.set_xlabel('Latitude',fontsize=16)
+        
+        ax1.set_xticklabels([])
+        ax2.set_xticklabels([])
+        ax3.set_xticklabels([])
+        ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+        ax1.set_title('Percentile of CN (>10nm) # (cm$^{-3}$) '+campaign,fontsize=17)
+        fig.text(0.1,0.95,'Above Clouds',fontsize=15)
+        fig.text(0.1,0.6,'Near Clouds',fontsize=15)
+        fig.text(0.1,0.25,'Below Cloud',fontsize=15)
+        
+        ax2.legend(loc='upper right', shadow=False, fontsize='x-large')
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        
+        #%% plot for UHSAS (>100nm)
+        figname = figpath_aircraft_statistics+'percentile_lat_CN100nm_bycldheight_'+campaign+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax2,ax3) = plt.subplots(3,1,figsize=(8,6))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+            
+        ax1.boxplot(uhsas_above_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax1.boxplot(ncn100_above_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax1.tick_params(color='k',labelsize=15)
+        # ax1.set_yscale('log')
+        ax1.set_xlim(-1,latlen)
+        ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        ax1.plot([],c='k',label='UHSAS100')
+        for mm in range(nmodels):
+            ax1.plot([],c=color_model[mm],label=Model_List[mm])
+            
+        ax2.boxplot(uhsas_near_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax2.boxplot(ncn100_near_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax2.tick_params(color='k',labelsize=15)
+        # ax2.set_yscale('log')
+        ax2.set_xlim(-1,latlen)
+        ax2.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        ax2.plot([],c='k',label='UHSAS100')
+        for mm in range(nmodels):
+            ax2.plot([],c=color_model[mm],label=Model_List[mm])
+            
+        ax3.boxplot(uhsas_below_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax3.boxplot(ncn100_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax3.tick_params(color='k',labelsize=15)
+        # ax3.set_yscale('log')
+        ax3.set_xlim(-1,latlen)
+        ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        # plot temporal lines for label
+        ax3.plot([],c='k',label='UHSAS100')
+        for mm in range(nmodels):
+            ax3.plot([],c=color_model[mm],label=Model_List[mm])
+        
+        ax3.set_xlabel('Latitude',fontsize=16)
+        
+        ax1.set_xticklabels([])
+        ax2.set_xticklabels([])
+        ax3.set_xticklabels([])
+        ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+        ax1.set_title('Percentile of CN (>100nm) # (cm$^{-3}$) '+campaign,fontsize=17)
+        fig.text(0.1,0.95,'Above Clouds',fontsize=15)
+        fig.text(0.1,0.6,'Near Clouds',fontsize=15)
+        fig.text(0.1,0.25,'Below Cloud',fontsize=15)
+        
+        ax2.legend(loc='upper right', shadow=False, fontsize='x-large')
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    
+    elif plot_method == 'height':
+        #%% for CPC (>10nm)
+        figname = figpath_aircraft_statistics+'percentile_lat_CN10nm_byheight_'+campaign+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax3) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+            
+        ax1.boxplot(cpc_above_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax1.boxplot(ncn10_above_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax1.tick_params(color='k',labelsize=15)
+        # ax1.set_yscale('log')
+        ax1.set_xlim(-1,latlen)
+        ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        if campaign=='SOCRATES':
+            ax1.set_ylim(-100,4000)
+        elif campaign=='CSET':
+            ax1.set_ylim(-20,1200)
+        ax1.plot([],c='k',label='CPC')
+        for mm in range(nmodels):
+            ax1.plot([],c=color_model[mm],label=Model_List[mm])
+            
+        ax3.boxplot(cpc_below_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax3.boxplot(ncn10_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax3.tick_params(color='k',labelsize=15)
+        # ax3.set_yscale('log')
+        ax3.set_xlim(-1,latlen)
+        ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        if campaign=='SOCRATES':
+            ax3.set_ylim(-100,2000)
+        elif campaign=='CSET':
+            ax3.set_ylim(-50,4000)
+        # plot temporal lines for label
+        ax3.plot([],c='k',label='CPC')
+        for mm in range(nmodels):
+            ax3.plot([],c=color_model[mm],label=Model_List[mm])
+        
+        ax3.set_xlabel('Latitude',fontsize=16)
+        
+        ax1.set_xticklabels([])
+        ax3.set_xticklabels([])
+        ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+        ax1.set_title('Percentile of CN (>10nm) # (cm$^{-3}$) '+campaign,fontsize=17)
+        fig.text(0.1,0.9,'2-5km',fontsize=15)
+        fig.text(0.1,0.4,'0-2km',fontsize=15)
+        
+        ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        
+        #%% plot for UHSAS (>100nm)
+        figname = figpath_aircraft_statistics+'percentile_lat_CN100nm_byheight_'+campaign+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax3) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+            
+        ax1.boxplot(uhsas_above_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax1.boxplot(ncn100_above_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax1.tick_params(color='k',labelsize=15)
+        # ax1.set_yscale('log')
+        ax1.set_xlim(-1,latlen)
+        ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        ax1.plot([],c='k',label='UHSAS100')
+        for mm in range(nmodels):
+            ax1.plot([],c=color_model[mm],label=Model_List[mm])
+            
+        ax3.boxplot(uhsas_below_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax3.boxplot(ncn100_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax3.tick_params(color='k',labelsize=15)
+        # ax3.set_yscale('log')
+        ax3.set_xlim(-1,latlen)
+        ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        ax3.set_ylim(-10,400)
+        # plot temporal lines for label
+        ax3.plot([],c='k',label='UHSAS100')
+        for mm in range(nmodels):
+            ax3.plot([],c=color_model[mm],label=Model_List[mm])
+        
+        ax3.set_xlabel('Latitude',fontsize=16)
+        
+        ax1.set_xticklabels([])
+        ax3.set_xticklabels([])
+        ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+        ax1.set_title('Percentile of CN (>100nm) # (cm$^{-3}$) '+campaign,fontsize=17)
+        fig.text(0.1,0.9,'2-5km',fontsize=15)
+        fig.text(0.1,0.4,'0-2km',fontsize=15)
+        
+        ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    #%%    
+    
+    elif plot_method == 'all':
+        #%% for CPC (>10nm)
+        figname = figpath_aircraft_statistics+'percentile_lat_CN_'+campaign+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax3) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+            
+        ax1.boxplot(cpc_below_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax1.boxplot(ncn10_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax1.tick_params(color='k',labelsize=15)
+        # ax1.set_yscale('log')
+        ax1.set_xlim(-1,latlen)
+        ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        if campaign=='SOCRATES':
+            ax1.set_ylim(-100,4000)
+        elif campaign=='CSET':
+            ax1.set_ylim(-20,2500)
+        ax1.plot([],c='k',label='CPC')
+        for mm in range(nmodels):
+            ax1.plot([],c=color_model[mm],label=Model_List[mm])
+            
+        ax3.boxplot(uhsas_below_lat,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax3.boxplot(ncn100_below_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=True, patch_artist=True)    # need patch_artist to fill color in box
+        ax3.tick_params(color='k',labelsize=15)
+        # ax3.set_yscale('log')
+        ax3.set_xlim(-1,latlen)
+        ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+        if campaign=='SOCRATES':
+            ax3.set_ylim(-10,400)
+        elif campaign=='CSET':
+            ax3.set_ylim(-10,1000)
+        # plot temporal lines for label
+        ax3.plot([],c='k',label='UHSAS100')
+        for mm in range(nmodels):
+            ax3.plot([],c=color_model[mm],label=Model_List[mm])
+        
+        ax3.set_xlabel('Latitude',fontsize=16)
+        
+        ax1.set_xticklabels([])
+        ax3.set_xticklabels([])
+        ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+        ax1.set_title('Percentile of CN # (cm$^{-3}$) '+campaign,fontsize=17)
+        
+        ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
+        ax3.legend(loc='upper right', shadow=False, fontsize='x-large')
+        fig.text(0.1,0.9,'>10nm',fontsize=15)
+        fig.text(0.1,0.4,'>100nm',fontsize=15)
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    #%%
+    else:
+        raise ValueError('does not recognize plot_method: '+plot_method)
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_flight_percentile_lat_cldfreq.py b/src/esmac_diags/plotting/plot_flight_percentile_lat_cldfreq.py
new file mode 100644
index 0000000..5ff6c5f
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_percentile_lat_cldfreq.py
@@ -0,0 +1,169 @@
+"""# plot percentile of meteorological variables binned by different latitudes
+# for aircraft measurements in CSET or SOCRATES
+# only select a certain height ranges for warm clouds (the height range needs to be further tuned)
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_aircraft import read_RF_NCAR
+from ..subroutines.read_netcdf import read_extractflight
+from ..subroutines.specific_data_treatment import lwc2cflag
+from ..subroutines.quality_control import qc_mask_takeoff_landing
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    latbin = settings['latbin']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_statistics = settings['figpath_aircraft_statistics']
+    
+    if campaign in ['CSET', 'SOCRATES']:
+        RFpath = settings['RFpath']
+    else:
+        raise ValueError('This code is only for CSET or SOCRATES. check campaign setting: '+campaign)
+    
+    #%% other settings
+    
+    if not os.path.exists(figpath_aircraft_statistics):
+        os.makedirs(figpath_aircraft_statistics)
+        
+    dlat = latbin[1]-latbin[0]
+    latmin = latbin-dlat/2
+    latmax = latbin+dlat/2
+    latlen = len(latbin)
+        
+    nmodels=len(Model_List)
+    
+    #%% find files for flight information
+    
+    lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
+    lst.sort()
+    if len(lst)==0:
+        raise ValueError('cannot fine any file')
+    alldates = [x.split('_')[-1].split('.')[0] for x in lst]
+    
+    
+    #%% define variables by latitude bins
+        
+    height_lat = []
+    cbheight = []         # cloud base height
+    cflag_lat = []
+    cloudo_lat = []        # cloud fraction by flag
+    
+    for bb in range(latlen):
+        height_lat.append(np.empty(0))
+        cbheight.append(np.empty(0))
+        cflag_lat.append(np.empty(0))
+        cloudo_lat.append(np.empty(0))
+        
+    cloudm_lat = []
+    for mm in range(nmodels):
+        cloudm_lat.append(list(cloudo_lat))
+    
+    print('reading '+format(len(alldates))+' files to calculate the statistics: ')
+    
+    for date in alldates:
+        print(date)
+        
+        #%% read in Models
+        cloudm = []
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+        
+            (timem,heightm,cloud,timeunitm,clunit,cllongname)=read_extractflight(filename_m,'CLOUD')
+            cloudm.append(cloud)   
+        
+        #%% read in observations
+        # note that it is only for CSET and SOCRATES
+        lst = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
+        if len(lst)==1 or len(lst)==2:  # SOCRATES has two flights in 20180217, choose the later one
+            filename=lst[-1]
+        else:
+            raise ValueError('find no file or too many files: '+lst)
+        (time,height,timeunit,hunit,hlongname,cellsize,cellunit)=read_RF_NCAR(filename,'ALT')
+        (time,lat,timeunit,latunit,latlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LAT')
+        (time,lon,timeunit,lonunit,lonlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LON')
+        (time,lwc,timeunit,lwcunit,lwclongname,cellsize,cellunit)=read_RF_NCAR(filename,'PLWCC')
+        
+        # exclude 30min after takeoff and before landing
+        height=qc_mask_takeoff_landing(time,height)
+        lat=qc_mask_takeoff_landing(time,lat)
+        lon=qc_mask_takeoff_landing(time,lon)
+        lwc=qc_mask_takeoff_landing(time,lwc)
+        timem=qc_mask_takeoff_landing(time,timem)
+        for mm in range(nmodels):
+            cloudm[mm]=qc_mask_takeoff_landing(time,cloudm[mm])
+        
+        # calculate cloud flag based on LWC
+        cldflag=lwc2cflag(lwc,lwcunit)
+        
+            
+        #%% put data in each latitude bin
+        for bb in range(latlen):
+            idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
+            height2 = height[idx]
+            cldflag2 = cldflag[idx]
+            
+            # set specific height range
+            idx2 = height2<5000
+            
+            if any(cldflag2==1):
+                cbheight[bb] = np.hstack((cbheight[bb],min(height2[cldflag2==1])))
+            
+            if len(idx2)!=0:
+                height_lat[bb] = np.hstack((height_lat[bb],height2[idx2]))
+                cflag_lat[bb] = np.hstack((cflag_lat[bb],cldflag2[idx2]))
+                cloudo_lat[bb] = np.hstack((cloudo_lat[bb],sum(cldflag2)/len(cldflag2)))
+                for mm in range(nmodels):
+                    cld_temp = np.nanmean(cloudm[mm][idx][idx2])
+                    if ~np.isnan(cld_temp):
+                        cloudm_lat[mm][bb] = np.hstack((cloudm_lat[mm][bb], cld_temp))
+        
+    #%% make plot
+    # set position shift so that models and obs are not overlapped
+    p_shift = np.arange(nmodels+1)
+    p_shift = (p_shift - p_shift.mean())*0.2
+    
+        
+    figname = figpath_aircraft_statistics+'percentile_lat_CldFreq_'+campaign+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,ax = plt.subplots(1,1,figsize=(8,2))   # figsize in inches
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+        
+    ax.boxplot(cloudo_lat,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax.boxplot(cloudm_lat[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    ax.tick_params(color='k',labelsize=15)
+    # ax.set_yscale('log')
+    ax.set_xlim(-1,latlen)
+    ax.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+    # plot temporal lines for label
+    ax.plot([],c='k',label='OBS')
+    for mm in range(nmodels):
+        ax.plot([],c=color_model[mm],label=Model_List[mm])
+    
+    ax.set_xlabel('Latitude',fontsize=16)
+    
+    ax.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+    ax.set_title('Cloud Fraction (Fraction)',fontsize=17)
+    # ax4.set_title(varmlongname[3]+' ('+varmunit[3]+')',fontsize=15)
+    
+    ax.legend(loc='upper right', shadow=False, fontsize='x-large')
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_flight_percentile_z_AerosolComposition.py b/src/esmac_diags/plotting/plot_flight_percentile_z_AerosolComposition.py
new file mode 100644
index 0000000..92f8dbc
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_percentile_z_AerosolComposition.py
@@ -0,0 +1,328 @@
+"""
+# plot percentile of Aerosol Sulfate and Organics with height
+# for flight data in IOPs
+# compare models and aircraft measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import hhmmss2sec
+from ..subroutines.read_aircraft import read_ams,read_iwg1
+from ..subroutines.read_netcdf import read_merged_size,read_extractflight
+from ..subroutines.quality_control import qc_mask_qcflag,qc_mask_cloudflag,qc_remove_neg
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    height_bin = settings['height_bin']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_statistics = settings['figpath_aircraft_statistics']
+    
+    if campaign in ['HISCALE', 'ACEENA']:
+        IOP = settings.get('IOP', None)
+        merged_size_path = settings.get('merged_size_path', None)
+        amspath = settings.get('amspath', None)
+        iwgpath = settings.get('iwgpath', None)
+    elif campaign in ['CSET', 'SOCRATES']:
+        raise ValueError('CSET or SOCRATES do not have aerosol composition data')
+    else:
+        raise ValueError('check campaign name setting: '+campaign)
+
+    #%% other settings
+    
+    if not os.path.exists(figpath_aircraft_statistics):
+        os.makedirs(figpath_aircraft_statistics)
+        
+    #%%
+    z=height_bin
+    dz = z[1]-z[0]
+    zmin=z-np.insert((z[1:]-z[0:-1])/2,0,dz)
+    zmax=z+np.append((z[1:]-z[0:-1])/2,dz)
+    
+    zlen=len(z)   
+    
+    
+    #%% find files for flight information
+    
+    lst = glob.glob(merged_size_path+'merged_bin_*'+campaign+'*.nc')
+    lst.sort()
+    
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    
+    # choose files for specific IOP
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+campaign+'_')
+            lst = glob.glob(a[0]+'*'+IOP+'*')
+            lst.sort()
+    elif campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+campaign+'_')
+            lst = glob.glob(a[0]+'*'+IOP+'*')
+            lst.sort()
+    else:
+        raise ValueError('check campaign name setting: '+campaign)
+    
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    
+    #%% read all data
+    
+    height_all = []
+    so4_o_all = []
+    org_o_all = []
+    so4_m_all = []
+    org_m_all = []
+    nmodels=len(Model_List)
+    for mm in range(nmodels):
+        so4_m_all.append([])
+        org_m_all.append([])
+        
+    print('reading '+format(len(lst))+' files to calculate the statistics: ')
+    
+    for filename in lst:
+        
+        # get date info:        
+        date=filename[-12:-3]
+        if date[-1]=='a':
+            flightidx=1
+        else:
+            flightidx=2
+        print(date)
+        
+        #% read in flight information
+        (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
+        (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
+        (time,size,height,timeunit,zunit,long_name)=read_merged_size(filename,'height')
+        time=np.ma.compressed(time)
+        
+        #%% read T and P from iwg
+        filename_i=glob.glob(iwgpath+'aaf.iwg*.'+date+'*txt')
+        filename_i.sort()
+        # read in data
+        if len(filename_i)==1: 
+            (iwg,iwgvars)=read_iwg1(filename_i[0])
+            timelen = len(iwg)
+            if np.logical_and(campaign=='ACEENA', date=='20180216a'):
+                iwg.insert(1403,list(iwg[1403]))
+                tstr=iwg[1403][1]
+                tstr=tstr[0:-1]+str(int(tstr[-1])-1)
+                iwg[1403][1]=tstr
+                del iwg[-1]
+            # get variables
+            time_iwg=np.empty(timelen)
+            T_iwg=np.empty(timelen)
+            P_iwg=np.empty(timelen)
+            for t in range(timelen):
+                T_iwg[t]=float(iwg[t][20])+273.15
+                P_iwg[t]=float(iwg[t][23])*100
+                timestr=iwg[t][1].split(' ')
+                time_iwg[t]=hhmmss2sec(timestr[1])
+        else:
+            raise ValueError('cannot find any file or find too many files: ' + filename_i)
+        # remove cloud flag
+        T_iwg=qc_mask_cloudflag(T_iwg,cflag)
+        P_iwg=qc_mask_cloudflag(P_iwg,cflag)
+        
+        #%% read aerosol composition in AMS
+        
+        filename_ams=glob.glob(amspath+'*'+date[0:8]+'*')
+        filename_ams.sort()
+        
+        if len(filename_ams)==1 or len(filename_ams)==2:
+            (ams,amslist)=read_ams(filename_ams[flightidx-1])
+            time_ams=ams[0,:]
+            flag=ams[-1,:]
+            orgaaf=ams[1,:]
+            so4aaf=ams[5,:]
+            orgaaf=qc_mask_qcflag(orgaaf,flag)
+            so4aaf=qc_mask_qcflag(so4aaf,flag)
+        elif len(filename_ams)==0:
+            time_ams = time_iwg
+            orgaaf = np.full(len(time_ams),np.nan)
+            so4aaf = np.full(len(time_ams),np.nan)
+        else:
+            raise ValueError('find too many files: ' + filename_ams)
+        
+        # change values from standardize condition to ambient condition
+        T_ams = np.interp(time_ams,time,T_iwg)
+        P_ams = np.interp(time_ams,time,P_iwg)
+        so4aaf = so4aaf * (296.15/T_ams) * (P_ams/101325.)
+        orgaaf = orgaaf * (296.15/T_ams) * (P_ams/101325.)
+        
+        so4aaf = qc_remove_neg(so4aaf)
+        orgaaf = qc_remove_neg(orgaaf)
+    
+        # exclude NaNs
+        idx = np.logical_and(~np.isnan(so4aaf), ~np.isnan(orgaaf))
+        so4_o_all.append(so4aaf[idx])
+        org_o_all.append(orgaaf[idx])
+        
+        height2=np.interp(time_ams,time,height)
+        height_all.append(height2[idx])
+        
+        # for interpolation of model results
+        time_ams=time_ams[idx]
+    
+        #%% read in Models
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+        
+            (timem,heightm,soa_a1,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a1')
+            (timem,heightm,soa_a2,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a2')
+            (timem,heightm,soa_a3,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a3')
+            (timem,heightm,so4_a1,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a1')
+            (timem,heightm,so4_a2,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a2')
+            (timem,heightm,so4_a3,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a3')
+            (timem,heightm,pom_a1,timeunitm,pomunit,pomname)=read_extractflight(filename_m,'pom_a1')
+            (timem,heightm,pom_a3,timeunitm,pomunit,pomname)=read_extractflight(filename_m,'pom_a3')
+            (timem,heightm,pom_a4,timeunitm,pomunit,pomname)=read_extractflight(filename_m,'pom_a4')
+            (timem,heightm,mom_a1,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a1')
+            (timem,heightm,mom_a2,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a2')
+            (timem,heightm,mom_a3,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a3')
+            (timem,heightm,mom_a4,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a4')
+            
+            # add nucleation mode if available
+            try:
+                (timem,heightm,soa_a5,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a5')
+                model_org = soa_a1+soa_a2+soa_a3+soa_a5 + pom_a1+pom_a3+pom_a4 + mom_a1+mom_a2+mom_a3+mom_a4
+            except:
+                model_org = soa_a1+soa_a2+soa_a3 + pom_a1+pom_a3+pom_a4 + mom_a1+mom_a2+mom_a3+mom_a4
+            try:
+                (timem,heightm,so4_a5,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a5')
+                model_so4 = so4_a1+so4_a2+so4_a3+so4_a5
+            except:
+                model_so4 = so4_a1+so4_a2+so4_a3
+            
+            # change E3SM unit from kg/kg to ug/m3 
+            rho = P_iwg/T_iwg/287.06
+            model_so4=model_so4*1e9*rho
+            model_org=model_org*1e9*rho
+            
+            # interpolate into observational time
+            so4_m_all[mm].append(np.interp(time_ams,timem,model_so4)) 
+            org_m_all[mm].append(np.interp(time_ams,timem,model_org)) 
+        
+    #%% calculate percentiles for each height bin
+    
+    so4_o_z = list()
+    org_o_z = list()
+    so4_m_z = []
+    org_m_z = []
+    for mm in range(nmodels):
+        so4_m_z.append([])
+        org_m_z.append([])
+    for zz in range(zlen):
+        so4_o_z.append(np.empty(0))
+        org_o_z.append(np.empty(0))
+        for mm in range(nmodels):
+            so4_m_z[mm].append(np.empty(0))
+            org_m_z[mm].append(np.empty(0))
+        
+    ndays=len(height_all)
+    for dd in range(ndays):
+        height = height_all[dd]
+        so4_o = so4_o_all[dd]
+        org_o = org_o_all[dd]
+        for zz in range(zlen):
+            idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
+            so4_o_z[zz]=np.append(so4_o_z[zz],so4_o[idx])
+            org_o_z[zz]=np.append(org_o_z[zz],org_o[idx])
+            for mm in range(nmodels):
+                so4_m = so4_m_all[mm][dd]
+                org_m = org_m_all[mm][dd]
+                so4_m_z[mm][zz]=np.append(so4_m_z[mm][zz],so4_m[idx])
+                org_m_z[mm][zz]=np.append(org_m_z[mm][zz],org_m[idx])
+            
+    
+    #%% make plot
+    # set position shift so that models and obs are not overlapped
+    p_shift = np.arange(nmodels+1)
+    p_shift = (p_shift - p_shift.mean())*0.2
+    
+        
+    figname = figpath_aircraft_statistics+'percentile_height_AerosolComposition_'+campaign+'_'+IOP+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2) = plt.subplots(1,2,figsize=(8,8))   # figsize in inches
+    # plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+    ax1.boxplot(so4_o_z,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=False, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax1.boxplot(so4_m_z[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=False, patch_artist=True)    # need patch_artist to fill color in box
+    ax1.tick_params(color='k',labelsize=16)
+    # ax1.set_xscale('log')
+    ax1.set_ylim(-1,zlen)
+    ax1.set_yticks(range(zlen))
+    ax1.set_yticklabels(z)
+    # ax1.set_yticks([1,3,5,7,9,11,12,13,14,15,16])
+    # ax1.set_yticklabels(range(400,4100,400))
+    # plot temporal lines for label
+    ax1.plot([],c='k',label='Obs')
+    for mm in range(nmodels):
+        ax1.plot([],c=color_model[mm],label=Model_List[mm])
+    ax1.legend(loc='upper right', fontsize='x-large')
+        
+    ax2.boxplot(org_o_z,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=False, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax2.boxplot(org_m_z[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=False, patch_artist=True)    # need patch_artist to fill color in box
+    ax2.tick_params(color='k',labelsize=16)
+    # ax2.set_xscale('log')
+    ax2.set_ylim(-1,zlen)
+    ax2.set_yticks(range(zlen))
+    ax2.set_yticklabels([])
+    # ax1.set_yticks(np.arange(0,20,2))
+    # ax1.set_yticklabels(range(400,4100,400))
+    # plot temporal lines for label
+    ax2.plot([],c='k',label='Obs')
+    for mm in range(nmodels):
+        ax2.plot([],c=color_model[mm],label=Model_List[mm])
+    ax2.legend(loc='upper right', fontsize='x-large')
+        
+    # set xlimit consistent in subplots
+    # xlim1 = ax1.get_xlim()
+    # xlim2 = ax2.get_xlim()
+    # ax1.set_xlim([min(xlim1[0],xlim2[0]), max(xlim1[1],xlim2[1])])
+    # ax2.set_xlim([min(xlim1[0],xlim2[0]), max(xlim1[1],xlim2[1])])
+    
+    ax1.set_ylabel('Height (m MSL)',fontsize=16)
+    fig.text(0.46,0.06, '$\mu$g/m$^3$', fontsize=16)
+    ax1.set_title('Sulfate',fontsize=16)
+    ax2.set_title('Organic',fontsize=16)
+    fig.text(0.48,0.92, IOP, fontsize=18)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
+    
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_flight_percentile_z_CCN.py b/src/esmac_diags/plotting/plot_flight_percentile_z_CCN.py
new file mode 100644
index 0000000..6288ae1
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_percentile_z_CCN.py
@@ -0,0 +1,376 @@
+"""
+# plot percentile of CCN number concentration with height
+# for flight data in IOPs
+# compare models and CCN measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_aircraft import read_ccn_hiscale, read_ccn_socrates
+from ..subroutines.read_ARMdata import read_ccn
+from ..subroutines.read_netcdf import read_extractflight,read_merged_size
+from ..subroutines.quality_control import qc_mask_qcflag,qc_mask_cloudflag,qc_remove_neg
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    height_bin = settings['height_bin']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_statistics = settings['figpath_aircraft_statistics']
+    
+    if campaign in ['HISCALE', 'ACEENA']:
+        IOP = settings.get('IOP', None)
+        merged_size_path = settings.get('merged_size_path', None)
+        ccnpath = settings.get('ccnpath', None)
+    elif campaign in ['CSET', 'SOCRATES']:
+        ccnpath = settings.get('ccnpath', None)
+    else:
+        raise ValueError('campaign name is not recognized: '+campaign)
+
+    #%% other settings
+    
+    if not os.path.exists(figpath_aircraft_statistics):
+        os.makedirs(figpath_aircraft_statistics)
+       
+        
+    #%%
+    z=height_bin
+    dz = z[1]-z[0]
+    zmin=z-np.insert((z[1:]-z[0:-1])/2,0,dz)
+    zmax=z+np.append((z[1:]-z[0:-1])/2,dz)
+    
+    zlen=len(z)   
+    
+    
+    #%% find files for flight information
+    
+    lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
+    lst.sort()
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    # choose files for specific IOP
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+    elif campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+            
+    alldates = [x.split('_')[-1].split('.')[0] for x in lst]
+        
+    #%% read all data
+    
+    height_all = []
+    ccna_all = []
+    ccnb_all = []
+    SSa_all = np.array([])
+    SSb_all = np.array([])
+    ccn3_all = []
+    ccn5_all = []
+    nmodels=len(Model_List)
+    for mm in range(nmodels):
+        ccn3_all.append([])
+        ccn5_all.append([])
+        
+    print('reading '+format(len(alldates))+' files to calculate the statistics: ')
+    
+    for date in alldates:
+        print(date)
+        
+        #%% read in Models
+        
+        ccn3=[]
+        ccn5=[]
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+        
+            (timem,heightm,ccn3_tmp,timeunitm,ccn3_unit,ccn3_longname)=read_extractflight(filename_m,'CCN3')
+            (timem,heightm,ccn5_tmp,timeunitm,ccn5_unit,ccn5_longname)=read_extractflight(filename_m,'CCN5')
+            ccn3.append(ccn3_tmp)
+            ccn5.append(ccn5_tmp)
+            
+            
+        # get supersaturation
+        SS3 = ccn3_longname.split('=')[-1]
+        SS5 = ccn5_longname.split('=')[-1]
+        
+        #%% read in flight data (for HISCALE)
+        if campaign=='HISCALE':
+            filename_ccn=glob.glob(ccnpath+'CCN_G1_'+date[0:8]+'*R2_HiScale001s.*')
+            filename_ccn.sort()
+            if date[-1]=='a':
+                flightidx=1
+            else:
+                flightidx=2
+            # read in data
+            if len(filename_ccn)==1 or len(filename_ccn)==2:
+                (data0,ccnlist)=read_ccn_hiscale(filename_ccn[flightidx-1])
+                # only choose data quality is good (flag=0)
+                flag = data0[7,:]
+                time_ccn = data0[0,:]
+                ccna = data0[10,:]
+                ccnb = data0[11,:]
+                SSa = data0[2,:]
+                SSb = data0[5,:]
+                ccna = qc_mask_qcflag(ccna,flag)
+                ccnb = qc_mask_qcflag(ccnb,flag)
+                SSa=qc_remove_neg(SSa)
+                SSb=qc_remove_neg(SSb)
+            elif len(filename_ccn)==0:
+                time_ccn=timem
+                ccna=np.nan*np.empty([len(timem)])
+                ccnb=np.nan*np.empty([len(timem)])
+                SSa=0.24*np.full(len(timem),1)
+                SSb=0.46*np.full(len(timem),1)
+            else:
+                raise ValueError('find too many files: '+filename_ccn)
+            timea=time_ccn
+            timeb=time_ccn
+            # cloud flag
+            filename = merged_size_path+'merged_bin_fims_pcasp_'+campaign+'_'+date+'.nc'
+            (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
+            ccna = qc_mask_cloudflag(ccna,cflag)
+            ccnb = qc_mask_cloudflag(ccnb,cflag)
+            
+        elif campaign=='ACEENA':
+            filename_ccna=glob.glob(ccnpath+'enaaafccn2colaF1.b1.'+date[0:8]+'*.nc')
+            filename_ccnb=glob.glob(ccnpath+'enaaafccn2colbF1.b1.'+date[0:8]+'*.nc')
+            # read in data
+            if len(filename_ccna)==1:
+                (timea,timeunita,ccna,qcflag,ccnunit,SSa)=read_ccn(filename_ccna[0])
+                ccna=qc_mask_qcflag(ccna,qcflag)
+                ccna=qc_remove_neg(ccna)
+                SSa=qc_remove_neg(SSa)
+            elif len(filename_ccna)==0:
+                # print('no CCN data found. set as NaN')
+                timea=timem
+                SSa=np.nan*np.empty([len(timem)])
+                ccna=np.nan*np.empty([len(timem)])
+            else:
+                raise ValueError('find too many files: '+filename_ccna)
+            if len(filename_ccnb)==1:
+                (timeb,timeunitb,ccnb,qcflag,ccnunit,SSb)=read_ccn(filename_ccnb[0])
+                ccnb=qc_mask_qcflag(ccnb,qcflag)
+                ccnb=qc_remove_neg(ccnb)
+                SSb=qc_remove_neg(SSb)
+            elif len(filename_ccnb)==0:
+                # print('no CCN data found. set as NaN')
+                timeb=timem
+                SSb=np.nan*np.empty([len(timem)])
+                ccnb=np.nan*np.empty([len(timem)])
+            else:
+                raise ValueError('find too many files: '+filename_ccnb)
+            # cloud flag
+            filename = merged_size_path+'merged_bin_fims_pcasp_opc_'+campaign+'_'+date+'.nc'
+            (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
+            if date=='20170707a':
+                time=np.delete(time,5247)
+                cflag=np.delete(cflag,5247)
+            elif date=='20180201a':
+                time=np.delete(time,3635)
+                cflag=np.delete(cflag,3635)
+            if time[0]<timea[0]:
+                cflag = cflag[np.where(time==timea[0])[0][0]:]
+                time = time[np.where(time==timea[0])[0][0]:]
+            elif time[0]>timea[0]:
+                cflag = np.insert(cflag,np.full(int(time[0]-timea[0]),0), -9999)
+                time = np.insert(time,np.full(int(time[0]-timea[0]),0), -9999)
+            if time[-1]<timea[-1]:
+                cflag = np.append(cflag,np.full(int(timea[-1]-time[-1]), -9999))
+                time = np.append(time,np.full(int(timea[-1]-time[-1]), -9999))
+            elif time[-1]>timea[-1]:
+                cflag = cflag[0:np.where(time==timea[-1])[0][0]+1]
+                time = time[0:np.where(time==timea[-1])[0][0]+1]
+            ccna = qc_mask_cloudflag(ccna,cflag)
+            ccnb = qc_mask_cloudflag(ccnb,cflag)
+            
+        # CSET does not have observed CCN
+        elif campaign=='CSET':
+            timea=timem
+            SSa=np.nan*np.empty([len(timem)])
+            ccna=np.nan*np.empty([len(timem)])
+            timeb=timem
+            SSb=np.nan*np.empty([len(timem)])
+            ccnb=np.nan*np.empty([len(timem)])
+            
+        # SOCRATES
+        elif campaign=='SOCRATES':
+            filename_ccn=glob.glob(ccnpath+'CCNscanning_SOCRATES_GV_RF*'+date[0:8]+'_R0.ict')
+            if len(filename_ccn)==1:
+                (data0,ccnlist)=read_ccn_socrates(filename_ccn[0])
+                time_ccn = data0[0,:]
+                ccn = data0[1,:]
+                SS = data0[3,:]
+                ccn=qc_remove_neg(ccn)
+                timea=time_ccn
+                timeb=time_ccn
+                ccna=np.array(ccn)
+                ccnb=np.array(ccn)
+                idxa=np.logical_and(SS>0.05, SS<0.15)
+                ccna[idxa==False]=np.nan
+                SSa=np.full((len(timea)),0.1)
+                idxb=np.logical_and(SS>0.45, SS<0.55)
+                ccnb[idxb==False]=np.nan
+                SSb=np.full((len(timeb)),0.5)
+            elif len(filename_ccn)==0:
+                timea=timem
+                SSa=np.nan*np.empty([len(timem)])
+                ccna=np.nan*np.empty([len(timem)])
+                timeb=timem
+                SSb=np.nan*np.empty([len(timem)])
+                ccnb=np.nan*np.empty([len(timem)])
+            else:
+                raise ValueError('find too many files: '+filename_ccn)
+                
+        if any(timea!=timeb):
+            raise ValueError('time dimension is inconsistent')
+            
+        # exclude NaNs
+        idx = np.logical_or(~np.isnan(ccna), ~np.isnan(ccnb))
+        ccna_all.append(ccna[idx])
+        ccnb_all.append(ccnb[idx])
+        SSa_all=np.append(SSa_all,SSa[idx])
+        SSb_all=np.append(SSb_all,SSb[idx])
+        
+        height2=np.interp(timea,timem,heightm)
+        height_all.append(height2[idx])
+        
+        # for interpolation of model results
+        timea=timea[idx]
+        timeb=timeb[idx]
+        
+        # interpolate model results into observational time
+        for mm in range(nmodels):
+            ccn3_all[mm].append(np.interp(timea,timem,ccn3[mm])) 
+            ccn5_all[mm].append(np.interp(timeb,timem,ccn5[mm])) 
+             
+    #%% calculate percentiles for each height bin
+    
+    ccna_z = list()
+    ccnb_z = list()
+    ccn3_z = []
+    ccn5_z = []
+    nmodels=len(Model_List)
+    for mm in range(nmodels):
+        ccn3_z.append([])
+        ccn5_z.append([])
+    for zz in range(zlen):
+        ccna_z.append(np.empty(0))
+        ccnb_z.append(np.empty(0))
+        for mm in range(nmodels):
+            ccn3_z[mm].append(np.empty(0))
+            ccn5_z[mm].append(np.empty(0))
+        
+    ndays=len(height_all)
+    for dd in range(ndays):
+        height = height_all[dd]
+        ccna = ccna_all[dd]
+        ccnb = ccnb_all[dd]
+        for zz in range(zlen):
+            idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
+            ccna_z[zz]=np.append(ccna_z[zz],ccna[np.logical_and(idx,~np.isnan(ccna))])
+            ccnb_z[zz]=np.append(ccnb_z[zz],ccnb[np.logical_and(idx,~np.isnan(ccnb))])
+            for mm in range(nmodels):
+                ccn3 = ccn3_all[mm][dd]
+                ccn5 = ccn5_all[mm][dd]
+                ccn3_z[mm][zz]=np.append(ccn3_z[mm][zz],ccn3[idx])
+                ccn5_z[mm][zz]=np.append(ccn5_z[mm][zz],ccn5[idx])
+            
+    
+    #%% make plot
+    # set position shift so that models and obs are not overlapped
+    p_shift = np.arange(nmodels+1)
+    p_shift = (p_shift - p_shift.mean())*0.2
+    
+    if campaign in ['HISCALE', 'ACEENA']:
+        figname = figpath_aircraft_statistics+'percentile_height_CCN_'+campaign+'_'+IOP+'.png'
+    else:
+        figname = figpath_aircraft_statistics+'percentile_height_CCN_'+campaign+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2) = plt.subplots(1,2,figsize=(8,8))   # figsize in inches
+    # plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+    ax1.boxplot(ccna_z,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=False, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax1.boxplot(ccn3_z[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=False, patch_artist=True)    # need patch_artist to fill color in box
+    ax1.tick_params(color='k',labelsize=16)
+    #ax1.set_xscale('log')
+    ax1.set_ylim(-1,zlen)
+    ax1.set_yticks(range(zlen))
+    ax1.set_yticklabels(z)
+    # ax1.set_yticks([1,3,5,7,9,11,12,13,14,15,16])
+    # ax1.set_yticklabels(range(400,4100,400))
+    # plot temporal lines for label
+    ax1.plot([],c='k',label='Obs ('+format(np.nanmean(SSa_all),'.2f')+'%)')
+    for mm in range(nmodels):
+        ax1.plot([],c=color_model[mm],label=Model_List[mm])
+    ax1.legend(loc='upper right', fontsize='x-large')
+        
+    ax2.boxplot(ccnb_z,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=False, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax2.boxplot(ccn5_z[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=False, patch_artist=True)    # need patch_artist to fill color in box
+    ax2.tick_params(color='k',labelsize=16)
+    #ax2.set_xscale('log')
+    ax2.set_ylim(-1,zlen)
+    ax2.set_yticks(range(zlen))
+    ax2.set_yticklabels([])
+    # ax1.set_yticks(np.arange(0,20,2))
+    # ax1.set_yticklabels(range(400,4100,400))
+    # plot temporal lines for label
+    ax2.plot([],c='k',label='Obs ('+format(np.nanmean(SSb_all),'.2f')+'%)')
+    for mm in range(nmodels):
+        ax2.plot([],c=color_model[mm],label=Model_List[mm])
+    ax2.legend(loc='upper right', fontsize='x-large')
+        
+    # set xlimit consistent in subplots
+    # xlim1 = ax1.get_xlim()
+    # xlim2 = ax2.get_xlim()
+    # ax1.set_xlim([min(xlim1[0],xlim2[0]), max(xlim1[1],xlim2[1])])
+    # ax2.set_xlim([min(xlim1[0],xlim2[0]), max(xlim1[1],xlim2[1])])
+    
+    ax1.set_ylabel('Height (m MSL)',fontsize=16)
+    fig.text(0.4,0.06, 'CCN number (cm$^{-3}$)', fontsize=16)
+    ax1.set_title('SS = '+SS3,fontsize=16)
+    ax2.set_title('SS = '+SS5,fontsize=16)
+    if campaign in ['HISCALE', 'ACEENA']:
+        fig.text(0.48,0.92, IOP, fontsize=18)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
+        
diff --git a/src/esmac_diags/plotting/plot_flight_percentile_z_CN.py b/src/esmac_diags/plotting/plot_flight_percentile_z_CN.py
new file mode 100644
index 0000000..4ca4bee
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_percentile_z_CN.py
@@ -0,0 +1,345 @@
+"""
+# plot percentile of aerosol number concentration (CN) with height
+# for flight data in IOPs
+# compare models and CPC measurements
+"""
+
+import glob
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_aircraft import read_cpc, read_RF_NCAR
+from ..subroutines.read_netcdf import read_merged_size,read_extractflight
+from ..subroutines.quality_control import qc_mask_cloudflag,qc_remove_neg,qc_cpc_air
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    height_bin = settings['height_bin']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_statistics = settings['figpath_aircraft_statistics']
+    
+    if campaign in ['HISCALE', 'ACEENA']:
+        IOP = settings.get('IOP', None)
+        merged_size_path = settings.get('merged_size_path', None)
+        cpcpath = settings.get('cpcpath', None)
+    elif campaign in ['CSET', 'SOCRATES']:
+        RFpath = settings.get('RFpath', None)
+    else:
+        raise ValueError('campaign name is not recognized: '+campaign)
+
+    #%% other settings
+        
+    if not os.path.exists(figpath_aircraft_statistics):
+        os.makedirs(figpath_aircraft_statistics)
+    
+    #%%
+    z=height_bin
+    dz = z[1]-z[0]
+    zmin=z-np.insert((z[1:]-z[0:-1])/2,0,dz)
+    zmax=z+np.append((z[1:]-z[0:-1])/2,dz)
+    
+    zlen=len(z)   
+    
+    
+    #%% find files for flight information
+    
+    lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
+    lst.sort()
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    # choose files for specific IOP
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+    elif campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+            
+    alldates = [x.split('_')[-1].split('.')[0] for x in lst]
+        
+    #%% read all data
+    
+    height_all = []
+    cpc10_o = []
+    cpc3_o = []
+    uhsas100_o = []
+    cpc100_m = []
+    cpc10_m = []
+    cpc3_m = []
+    nmodels=len(Model_List)
+    for mm in range(nmodels):
+        cpc100_m.append([])
+        cpc10_m.append([])
+        cpc3_m.append([])
+        
+    print('reading '+format(len(alldates))+' files to calculate the statistics: ')
+    
+    for date in alldates:
+        print(date)
+        
+        #%% read in Models
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+        
+            (timem,heightm,cpc_m,timeunitm,ncn_unit,ncn_longname)=read_extractflight(filename_m,'NCN')
+            (timem,heightm,cpcu_m,timeunitm,ncnu_unit,ncnu_longname)=read_extractflight(filename_m,'NUCN')
+            (timem,heightm,ncnall,timeunitm,ncnall_unit,ncnall_longname)=read_extractflight(filename_m,'NCNall')
+            
+            cpc100_m[mm].append(np.sum(ncnall[100:,:],0)*1e-6) # #/m3 to #/cm3
+            cpc10_m[mm].append(cpc_m*1e-6)    # #/m3 to #/cm3
+            cpc3_m[mm].append(cpcu_m*1e-6)    # #/m3 to #/cm3
+        
+        height_all.append(heightm)
+        
+        #%% read in CPC measurements
+        if campaign in ['HISCALE', 'ACEENA']:
+            if date[-1]=='a':
+                flightidx=1
+            else:
+                flightidx=2
+            if campaign=='HISCALE':
+                filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_HiScale001s.ict.txt')
+            elif campaign=='ACEENA':
+                filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_ACEENA001s.ict')    
+            filename_c.sort()
+            # read in data
+            if len(filename_c)==1 or len(filename_c)==2: # some days have two flights
+                (cpc,cpclist)=read_cpc(filename_c[flightidx-1])
+                if np.logical_and(campaign=='ACEENA', date=='20180216a'):
+                    cpc=np.insert(cpc,1404,(cpc[:,1403]+cpc[:,1404])/2,axis=1)
+                elif np.logical_and(campaign=='HISCALE', date=='20160425a'):
+                    cpc=np.insert(cpc,0,cpc[:,0],axis=1)
+                    cpc[0,0]=cpc[0,0]-1
+                time_cpc = cpc[0,:]
+                cpc10 = cpc[1,:]
+                cpc3 = cpc[2,:]
+            elif len(filename_c)==0:
+                time_cpc=timem
+                cpc10=np.nan*np.empty([len(timem)])
+                cpc3=np.nan*np.empty([len(timem)])
+            else:
+                raise ValueError('find too many files: '+filename_c)
+            
+            # cloud flag
+            if campaign=='HISCALE':
+                filename = merged_size_path+'merged_bin_fims_pcasp_'+campaign+'_'+date+'.nc'
+            elif campaign=='ACEENA':
+                filename = merged_size_path+'merged_bin_fims_pcasp_opc_'+campaign+'_'+date+'.nc'
+            (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
+            cpc3=qc_mask_cloudflag(cpc3,cflag)
+            cpc10=qc_mask_cloudflag(cpc10,cflag)
+            
+            # some quality checks
+            (cpc3,cpc10) = qc_cpc_air(cpc3,cpc10)
+            
+            cpc10_o.append(cpc10)
+            cpc3_o.append(cpc3)
+        
+        #%% read in flight data (for CSET and SOCRATES)
+        elif campaign in ['CSET', 'SOCRATES']:
+            filename = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
+            if len(filename)==1 or len(filename)==2:  # SOCRATES has two flights in 20180217, choose the later one
+                (time_cpc,cpc10,timeunit,cpc10unit,cpc10longname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'CONCN')
+                if campaign=='CSET':
+                    (time_cpc,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'CONCU100_RWOOU')
+                elif campaign=='SOCRATES':
+                    # there are two variables: CONCU100_CVIU and CONCU100_LWII
+                    (time_cpc,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'CONCU100_LWII')
+            else:
+                raise ValueError('cannot find any file or find too many files: '+filename)
+            
+            # some quality checks
+            uhsas100=qc_remove_neg(uhsas100)
+            cpc10=qc_remove_neg(cpc10)
+            
+            cpc10_o.append(cpc10)
+            uhsas100_o.append(uhsas100)
+            
+    #%% calculate percentiles for each height bin
+    
+    uhsas100_o_z = list()
+    cpc10_o_z = list()
+    cpc3_o_z = list()
+    cpc100_m_z = []
+    cpc10_m_z = []
+    cpc3_m_z = []
+    nmodels=len(Model_List)
+    for mm in range(nmodels):
+        cpc100_m_z.append([])
+        cpc10_m_z.append([])
+        cpc3_m_z.append([])
+    for zz in range(zlen):
+        uhsas100_o_z.append(np.empty(0))
+        cpc10_o_z.append(np.empty(0))
+        cpc3_o_z.append(np.empty(0))
+        for mm in range(nmodels):
+            cpc100_m_z[mm].append(np.empty(0))
+            cpc10_m_z[mm].append(np.empty(0))
+            cpc3_m_z[mm].append(np.empty(0))
+        
+    ndays=len(height_all)
+    for dd in range(ndays):
+        height = height_all[dd]
+        cpc10 = cpc10_o[dd]
+        if campaign in ['HISCALE', 'ACEENA']:
+            cpc3 = cpc3_o[dd]
+        elif campaign in ['CSET', 'SOCRATES']:
+            uhsas100 = uhsas100_o[dd]
+        for zz in range(zlen):
+            idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
+            cpc10_o_z[zz]=np.append(cpc10_o_z[zz],cpc10[np.logical_and(idx,~np.isnan(cpc10))])
+            for mm in range(nmodels):
+                model10 = cpc10_m[mm][dd]
+                cpc10_m_z[mm][zz]=np.append(cpc10_m_z[mm][zz],model10[idx])
+            if campaign in ['HISCALE', 'ACEENA']:
+                cpc3_o_z[zz]=np.append(cpc3_o_z[zz],cpc3[np.logical_and(idx,~np.isnan(cpc3))])
+                for mm in range(nmodels):
+                    model3 = cpc3_m[mm][dd]
+                    cpc3_m_z[mm][zz]=np.append(cpc3_m_z[mm][zz],model3[idx])
+            elif campaign in ['CSET', 'SOCRATES']:
+                uhsas100_o_z[zz]=np.append(uhsas100_o_z[zz],uhsas100[np.logical_and(idx,~np.isnan(uhsas100))])
+                for mm in range(nmodels):
+                    model100 = cpc100_m[mm][dd]
+                    cpc100_m_z[mm][zz]=np.append(cpc100_m_z[mm][zz],model100[idx])
+    
+    #%% make plot
+    # set position shift so that models and obs are not overlapped
+    p_shift = np.arange(nmodels+1)
+    p_shift = (p_shift - p_shift.mean())*0.2
+    
+    if campaign in ['HISCALE', 'ACEENA']:
+        figname = figpath_aircraft_statistics+'percentile_height_CN_'+campaign+'_'+IOP+'.png'
+    else:
+        figname = figpath_aircraft_statistics+'percentile_height_CN_'+campaign+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2) = plt.subplots(1,2,figsize=(8,8))   # figsize in inches
+    # plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+    ax1.boxplot(cpc10_o_z,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=False, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax1.boxplot(cpc10_m_z[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=False, patch_artist=True)    # need patch_artist to fill color in box
+    ax1.tick_params(color='k',labelsize=16)
+    #ax1.set_xscale('log')
+    ax1.set_ylim(-1,zlen)
+    ax1.set_yticks(range(zlen))
+    ax1.set_yticklabels(z)
+    # ax1.set_yticks([1,3,5,7,9,11,12,13,14,15,16])
+    # ax1.set_yticklabels(range(400,4100,400))
+    # plot temporal lines for label
+    ax1.plot([],c='k',label='CPC(>10nm)')
+    for mm in range(nmodels):
+        ax1.plot([],c=color_model[mm],label=Model_List[mm])
+    ax1.legend(loc='upper right', fontsize='x-large')
+        
+    if campaign in ['HISCALE', 'ACEENA']:
+        ax2.boxplot(cpc3_o_z,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=False, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax2.boxplot(cpc3_m_z[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=False, patch_artist=True)    # need patch_artist to fill color in box
+        ax2.plot([],c='k',label='CPC(>3nm)')
+    elif campaign in ['CSET', 'SOCRATES']:
+        ax2.boxplot(uhsas100_o_z,whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(zlen))+p_shift[-1],widths=0.15,
+                    boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                    vert=False, patch_artist=True)    # need patch_artist to fill color in box
+        for mm in range(nmodels):
+            c = color_model[mm]
+            ax2.boxplot(cpc100_m_z[mm],whis=(5,95),showmeans=False,showfliers=False,
+                    positions=np.array(range(zlen))+p_shift[mm],widths=0.15,
+                    boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                    medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                    vert=False, patch_artist=True)    # need patch_artist to fill color in box
+        ax2.plot([],c='k',label='UHSAS(>100nm)')
+        
+    ax2.tick_params(color='k',labelsize=16)
+    ax2.set_ylim(-1,zlen)
+    ax2.set_yticks(range(zlen))
+    ax2.set_yticklabels([])
+    # ax1.set_yticks(np.arange(0,20,2))
+    # ax1.set_yticklabels(range(400,4100,400))
+    # plot temporal lines for label
+    for mm in range(nmodels):
+        ax2.plot([],c=color_model[mm],label=Model_List[mm])
+    ax2.legend(loc='upper right', fontsize='x-large')
+        
+    # set xlimit consistent in subplots
+    # xlim1 = ax1.get_xlim()
+    # xlim2 = ax2.get_xlim()
+    # ax1.set_xlim([min(xlim1[0],xlim2[0]), max(xlim1[1],xlim2[1])])
+    # ax2.set_xlim([min(xlim1[0],xlim2[0]), max(xlim1[1],xlim2[1])])
+    if campaign=='HISCALE':
+        ax1.set_xlim(-200,15000)
+        ax2.set_xlim(-200,15000)
+    elif campaign=='ACEENA':
+        ax1.set_xlim(0,3000)
+        ax2.set_xlim(0,3000)
+    elif campaign=='CSET':
+        ax1.set_xscale('log')
+        ax2.set_xscale('log')
+        ax1.set_xlim(1,1e4)
+        ax2.set_xlim(1,1e4)
+    elif campaign=='SOCRATES':
+        ax1.set_xscale('log')
+        ax2.set_xscale('log')
+        ax1.set_xlim(1,1e4)
+        ax2.set_xlim(1,1e4)
+    
+    ax1.set_ylabel('Height (m MSL)',fontsize=16)
+    fig.text(0.4,0.06, 'Aerosol number (cm$^{-3}$)', fontsize=16)
+    if campaign in ['HISCALE', 'ACEENA']:
+        fig.text(0.48,0.9, IOP, fontsize=18)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
+    
+    #%% plot sample numbers
+    # num_sample = [len(a) for a in cpc10_o_z]
+    # fig,ax = plt.subplots(figsize=(2,8))
+    # ax.plot(num_sample,z,color='k',linewidth=1,linestyle='-')
+    # ax.set_xscale('log')
+    # ax.set_xlabel('Sample Number (#)',fontsize=16)
+    # ax.set_ylabel('Height (m MSL)',fontsize=16)
+    # ax.set_xticks([10,1e3,1e5])
+    # ax.set_yticks(z)
+    # ax.tick_params(color='k',labelsize=16)
+    
+    # if campaign in ['HISCALE', 'ACEENA']:
+    #     figname = figpath_aircraft_statistics+'samplenumber_'+campaign+'_'+IOP+'.png'
+    # else:
+    #     figname = figpath_aircraft_statistics+'samplenumber_'+campaign+'.png'
+    # fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_flight_profile_z_CldFreq.py b/src/esmac_diags/plotting/plot_flight_profile_z_CldFreq.py
new file mode 100644
index 0000000..74db7a9
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_profile_z_CldFreq.py
@@ -0,0 +1,189 @@
+"""
+# plot vertical profile of cloud fraction for all flights in each IOP
+# compare models and aircraft measurements
+"""
+
+import glob
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_aircraft import  read_RF_NCAR
+from ..subroutines.specific_data_treatment import lwc2cflag
+from ..subroutines.read_netcdf import read_extractflight,read_merged_size
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    height_bin = settings['height_bin']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_statistics = settings['figpath_aircraft_statistics']
+    
+    if campaign in ['HISCALE', 'ACEENA']:
+        IOP = settings.get('IOP', None)
+        merged_size_path = settings.get('merged_size_path', None)
+    elif campaign in ['CSET', 'SOCRATES']:
+        RFpath = settings.get('RFpath', None)
+    else:
+        raise ValueError('campaign name is not recognized: '+campaign)
+
+    #%% other settings
+    
+    if not os.path.exists(figpath_aircraft_statistics):
+        os.makedirs(figpath_aircraft_statistics)
+        
+    #%%
+    z=height_bin
+    dz = z[1]-z[0]
+    zmin=z-np.insert((z[1:]-z[0:-1])/2,0,dz)
+    zmax=z+np.append((z[1:]-z[0:-1])/2,dz)
+    
+    zlen=len(z)   
+    
+    
+    #%% find files for flight information
+    
+    lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
+    lst.sort()
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    # choose files for specific IOP
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+    elif campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+            
+    alldates = [x.split('_')[-1].split('.')[0] for x in lst]
+        
+    #%% read all data
+    
+    cflagall=[]
+    heightall=[]
+    cldmall=[]
+    
+    nmodels=len(Model_List)
+    for mm in range(nmodels):
+        cldmall.append([])
+        
+    print('reading '+format(len(alldates))+' files to calculate the statistics: ')
+    
+    for date in alldates:
+        
+        #%% read in models
+        
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            
+            (timem,heightm,cloud,timeunit,cldunit,cldname)=read_extractflight(filename_m,'CLOUD')
+                
+            cldmall[mm].append(cloud)
+            
+        #%% read in obs
+        if campaign in ['HISCALE', 'ACEENA']:
+
+            #% read in flight information
+            if campaign=='HISCALE':
+                filename = merged_size_path+'merged_bin_fims_pcasp_'+campaign+'_'+date+'.nc'
+            elif campaign=='ACEENA':
+                filename = merged_size_path+'merged_bin_fims_pcasp_opc_'+campaign+'_'+date+'.nc'
+            (time,size,height,timeunit,cunit,long_name)=read_merged_size(filename,'height')
+            (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
+            time=np.ma.compressed(time)
+        
+        elif campaign in ['CSET', 'SOCRATES']:
+            filename = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
+            if len(filename)==1 or len(filename)==2:  # SOCRATES has two flights in 20180217, choose the later one
+                (time,lwc,timeunit,lwcunit,lwclongname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'PLWCC')
+            # calculate cloud flag based on LWC
+            cflag=lwc2cflag(lwc,lwcunit)
+            
+        heightall.append(heightm)
+        cflagall.append(cflag)
+        
+    
+    #%% calculate percentiles for each height bin
+    
+    cflag_z = list()
+    cldm_z = []
+    nmodels=len(Model_List)
+    for mm in range(nmodels):
+        cldm_z.append([])
+    for zz in range(zlen):
+        cflag_z.append(np.empty(0))
+        for mm in range(nmodels):
+            cldm_z[mm].append(np.empty(0))
+        
+    ndays=len(heightall)
+    # ndays=1;
+    for dd in range(ndays):
+        height = heightall[dd]
+        cflag  = cflagall[dd]
+        for zz in range(zlen):
+            idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
+            cflag_z[zz]=np.append(cflag_z[zz],cflag[idx])
+            
+        for mm in range(nmodels):
+            cldm = cldmall[mm][dd]
+            for zz in range(zlen):
+                idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
+                cldm_z[mm][zz]=np.append(cldm_z[mm][zz],cldm[idx])
+            
+    #%% remove all NANs and calculate cloud frequency
+    cldfreq_flag = np.full(zlen,np.nan)
+    cldfreq_m = []
+    for mm in range(nmodels):
+        cldfreq_m.append(np.full(zlen,np.nan))
+        
+    for zz in range(zlen):
+        data = cflag_z[zz]
+        data = data[data>=0]
+        if len(data)>0:
+            cldfreq_flag[zz] = sum(data==1)/len(data)
+        for mm in range(nmodels):
+            data = cldm_z[mm][zz]
+            data = data[~np.isnan(data)]
+            if len(data)>0:
+                cldfreq_m[mm][zz] = np.mean(data)
+      
+    #%% plot frequency  
+    if campaign in ['HISCALE', 'ACEENA']:
+        figname = figpath_aircraft_statistics+'profile_height_CldFreq_'+campaign+'_'+IOP+'.png'
+    else:
+        figname = figpath_aircraft_statistics+'profile_height_CldFreq_'+campaign+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,ax = plt.subplots(figsize=(4,8))
+    
+    ax.plot(cldfreq_flag,z,color='k',linewidth=1,linestyle='-',label='Obs')
+    for mm in range(nmodels):
+        ax.plot(cldfreq_m[mm],z,color=color_model[mm],linewidth=1,label=Model_List[mm])
+    
+    ax.tick_params(color='k',labelsize=12)
+    # ax.set_ylim(-1,zlen)
+    # ax.set_yticks(range(zlen))
+    # ax.set_yticks(z[0:-1:2])
+    ax.set_ylabel('Height (m MSL)',fontsize=12)
+    ax.legend(loc='upper right', fontsize='large')
+    ax.set_xlabel('Cloud Frequency',fontsize=12)
+    if campaign in ['HISCALE', 'ACEENA']:
+        ax.set_title(IOP,fontsize=15)
+    else:
+        ax.set_title(campaign,fontsize=15)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_flight_profile_z_LWC.py b/src/esmac_diags/plotting/plot_flight_profile_z_LWC.py
new file mode 100644
index 0000000..c49406f
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_profile_z_LWC.py
@@ -0,0 +1,204 @@
+"""
+# plot vertical profile of cloud fraction for all flights in each IOP
+# compare models and aircraft measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_aircraft import read_wcm, read_RF_NCAR
+from ..subroutines.read_netcdf import read_extractflight
+from ..subroutines.quality_control import qc_mask_qcflag,qc_remove_neg
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    height_bin = settings['height_bin']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_statistics = settings['figpath_aircraft_statistics']
+    
+    if campaign in ['HISCALE', 'ACEENA']:
+        IOP = settings.get('IOP', None)
+        wcmpath = settings.get('wcmpath', None)
+    elif campaign in ['CSET', 'SOCRATES']:
+        RFpath = settings.get('RFpath', None)
+    else:
+        raise ValueError('campaign name is not recognized: '+campaign)
+
+    #%% other settings
+            
+    if not os.path.exists(figpath_aircraft_statistics):
+        os.makedirs(figpath_aircraft_statistics)
+        
+        
+    #%%
+    z=height_bin
+    dz = z[1]-z[0]
+    zmin=z-np.insert((z[1:]-z[0:-1])/2,0,dz)
+    zmax=z+np.append((z[1:]-z[0:-1])/2,dz)
+    
+    zlen=len(z)   
+    
+    #%% find files for flight information
+    
+    lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
+    lst.sort()
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    # choose files for specific IOP
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+    elif campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+            
+    alldates = [x.split('_')[-1].split('.')[0] for x in lst]
+        
+    #%% read all data
+    
+    heightall=[]
+    lwcobsall=[]
+    lwcmall=[]
+    
+    nmodels=len(Model_List)
+    for mm in range(nmodels):
+        lwcmall.append([])
+        
+    print('reading '+format(len(alldates))+' files to calculate the statistics: ')
+    
+    for date in alldates:
+        print(date)
+            
+        #%% read in obs
+        if campaign in ['HISCALE', 'ACEENA']:
+            if date[-1]=='a':
+                flightidx=1
+            else:
+                flightidx=2
+            
+            filename_wcm = glob.glob(wcmpath+'WCM_G1_'+date[0:8]+'*')
+            filename_wcm.sort()
+            if len(filename_wcm)==0:
+                print('skip this date: '+date)
+                continue
+            (wcm,wcmlist)=read_wcm(filename_wcm[flightidx-1])
+            time0=wcm[0,:]
+            flag=wcm[-1,:]
+            lwcobs=wcm[2,:]
+            lwcobs=qc_remove_neg(lwcobs)
+            lwcobs=qc_mask_qcflag(lwcobs,flag)
+        
+        elif campaign in ['CSET', 'SOCRATES']:
+            filename = glob.glob(RFpath+'RF*'+date+'*.PNI.nc')
+            if len(filename)==1 or len(filename)==2:  # SOCRATES has two flights in 20180217, choose the later one
+                (time,lwcobs,timeunit,lwcunit,lwclongname,cellsize,cellunit)=read_RF_NCAR(filename[-1],'PLWCC')
+            lwcobs=qc_remove_neg(lwcobs)
+            
+        lwcobsall.append(lwcobs)
+        
+        #%% read in models
+        
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            
+            (timem,heightm,lwc,timeunit,cldunit,cldname)=read_extractflight(filename_m,'LWC')
+                
+             # change E3SM unit from kg/m3 to g/m3 
+            lwcmall[mm].append(lwc*1000)
+            
+        heightall.append(heightm)
+        
+    #%% calculate percentiles for each height bin
+    
+    lwcobs_z = list()
+    lwcm_z = []
+    for mm in range(nmodels):
+        lwcm_z.append([])
+    for zz in range(zlen):
+        lwcobs_z.append(np.empty(0))
+        for mm in range(nmodels):
+            lwcm_z[mm].append(np.empty(0))
+        
+    ndays=len(heightall)
+    # ndays=1;
+    for dd in range(ndays):
+        height = heightall[dd]
+        lwcobs  = lwcobsall[dd]
+        for zz in range(zlen):
+            idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
+            lwcobs_z[zz]=np.append(lwcobs_z[zz],lwcobs[idx])
+            
+        for mm in range(nmodels):
+            lwcm = lwcmall[mm][dd]
+            for zz in range(zlen):
+                idx = np.logical_and(height>=zmin[zz], height<zmax[zz])
+                lwcm_z[mm][zz]=np.append(lwcm_z[mm][zz],lwcm[idx])
+          
+    #%% remove all NANs and calculate cloud frequency
+    lwcmean_o = np.full(zlen,np.nan)
+    std_lwc_o = np.full(zlen,np.nan)
+    lwcmean_m = []
+    for mm in range(nmodels):
+        lwcmean_m.append(np.full(zlen,np.nan))
+        
+    for zz in range(zlen):
+        data = lwcobs_z[zz]
+        data = data[~np.isnan(data)]
+        if len(data)>0:
+            lwcmean_o[zz] = np.mean(data)
+            std_lwc_o[zz] = np.std(data)/np.sqrt(len(data))
+        for mm in range(nmodels):
+            data = lwcm_z[mm][zz]
+            data = data[~np.isnan(data)]
+            if len(data)>0:
+                lwcmean_m[mm][zz] = np.mean(data)
+                
+    #%% plot frequency  
+    if campaign in ['HISCALE', 'ACEENA']:
+        figname = figpath_aircraft_statistics+'profile_height_LWC_'+campaign+'_'+IOP+'.png'
+    else:
+        figname = figpath_aircraft_statistics+'profile_height_LWC_'+campaign+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,ax = plt.subplots(figsize=(3,8))
+    
+    ax.plot(lwcmean_o,z,color='k',linewidth=1,linestyle='-',label='Obs')
+    ax.fill_betweenx(z,lwcmean_o-std_lwc_o,lwcmean_o+std_lwc_o,facecolor='k',alpha=0.2)
+    
+    for mm in range(nmodels):
+        ax.plot(lwcmean_m[mm],z,color=color_model[mm],linewidth=1,label=Model_List[mm])
+    
+    ax.tick_params(color='k',labelsize=16)
+    # ax.set_ylim(-1,zlen)
+    # ax.set_yticks(range(zlen))
+    if campaign=='HISCALE':
+        ax.set_ylim(0,4500)
+    ax.set_yticks(z)
+    ax.set_ylabel('Height (m MSL)',fontsize=16)
+    ax.legend(loc='upper right', fontsize='large')
+    ax.set_xlabel('LWC (g/m3)',fontsize=16)
+    if campaign in ['HISCALE', 'ACEENA']:
+        ax.set_title(IOP,fontsize=18)
+    else:
+        ax.set_title(campaign,fontsize=18)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+                
+                
diff --git a/src/esmac_diags/plotting/plot_flight_timeseries_AerosolComposition.py b/src/esmac_diags/plotting/plot_flight_timeseries_AerosolComposition.py
new file mode 100644
index 0000000..ead49b5
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_timeseries_AerosolComposition.py
@@ -0,0 +1,216 @@
+"""
+# plot aircraft track data
+# timeseries of aerosol composition (SO4 and total organic) concentration 
+# compare models and aircraft measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import hhmmss2sec
+from ..subroutines.read_aircraft import read_ams,read_iwg1
+from ..subroutines.read_netcdf import read_merged_size,read_extractflight
+from ..subroutines.quality_control import qc_mask_qcflag,qc_remove_neg
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_timeseries = settings['figpath_aircraft_timeseries']
+
+    IOP = settings.get('IOP', None)
+    merged_size_path = settings.get('merged_size_path', None)
+    amspath = settings.get('amspath', None)
+    iwgpath = settings.get('iwgpath', None)
+    
+    if campaign in ['CSET', 'SOCRATES']:
+        raise ValueError('CSET and SOCRATES do not have composition data')
+    elif campaign not in ['HISCALE', 'ACEENA']:
+        raise ValueError('campaign name is not recognized: '+campaign)
+
+    #%% other settings
+    
+    if not os.path.exists(figpath_aircraft_timeseries):
+        os.makedirs(figpath_aircraft_timeseries)
+        
+    #%% find files for flight information
+    lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
+    lst.sort()
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    # choose files for specific IOP
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+    elif campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+            
+    alldates = [x.split('_')[-1].split('.')[0] for x in lst]
+        
+    # for each flight
+    for date in alldates:
+        
+        if date[-1]=='a':
+            flightidx=1
+        else:
+            flightidx=2
+    
+        #% read in flight information
+        if campaign=='HISCALE':
+            filename = merged_size_path+'merged_bin_fims_pcasp_'+campaign+'_'+date+'.nc'
+        elif campaign=='ACEENA':
+            filename = merged_size_path+'merged_bin_fims_pcasp_opc_'+campaign+'_'+date+'.nc'
+        (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
+        (time,size,cflag,timeunit,cunit,long_name)=read_merged_size(filename,'cld_flag')
+        (time,size,height,timeunit,zunit,long_name)=read_merged_size(filename,'height')
+        time=np.ma.compressed(time)
+        
+        #%% read T and P from iwg
+        filename_i=glob.glob(iwgpath+'aaf.iwg*.'+date+'*txt')
+        filename_i.sort()
+        # read in data
+        if len(filename_i)==1: 
+            (iwg,iwgvars)=read_iwg1(filename_i[0])
+            timelen = len(iwg)
+            if np.logical_and(campaign=='ACEENA', date=='20180216a'):
+                iwg.insert(1403,list(iwg[1403]))
+                tstr=iwg[1403][1]
+                tstr=tstr[0:-1]+str(int(tstr[-1])-1)
+                iwg[1403][1]=tstr
+                del iwg[-1]
+            # get variables
+            time_iwg=np.empty(timelen)
+            T_iwg=np.empty(timelen)
+            P_iwg=np.empty(timelen)
+            for t in range(timelen):
+                T_iwg[t]=float(iwg[t][20])+273.15
+                P_iwg[t]=float(iwg[t][23])*100
+                timestr=iwg[t][1].split(' ')
+                time_iwg[t]=hhmmss2sec(timestr[1])
+        else:
+            raise ValueError('find no file or multiple files: ' + filename_i)
+        
+        #%% read aerosol composition in AMS
+        
+        filename_ams=glob.glob(amspath+'*'+date[0:8]+'*')
+        filename_ams.sort()
+        
+        if len(filename_ams)==1 or len(filename_ams)==2:
+            (ams,amslist)=read_ams(filename_ams[flightidx-1])
+            time_ams=ams[0,:]
+            flag=ams[-1,:]
+            orgaaf=ams[1,:]
+            so4aaf=ams[5,:]
+            # flag=1 is also good data but behind CVI inlet. currently only use good data behind isokinetic inlet (flag=0)
+            orgaaf=qc_mask_qcflag(orgaaf,flag)
+            so4aaf=qc_mask_qcflag(so4aaf,flag)
+        elif len(filename_ams)==0:
+            time_ams = time_iwg
+            orgaaf = np.full(len(time_ams),np.nan)
+            so4aaf = np.full(len(time_ams),np.nan)
+        else:
+            raise ValueError('find too many files')
+        
+        # change values from standardize condition to ambient condition
+        T_ams = np.interp(time_ams,time,T_iwg)
+        P_ams = np.interp(time_ams,time,P_iwg)
+        so4aaf = so4aaf * (296.15/T_ams) * (P_ams/101325.)
+        orgaaf = orgaaf * (296.15/T_ams) * (P_ams/101325.)
+        
+        # some quality check:
+        orgaaf=qc_remove_neg(orgaaf)
+        so4aaf=qc_remove_neg(so4aaf)
+        
+        
+        #%% read in Models
+        nmodels=len(Model_List)
+        so4_m = []
+        org_m = []
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+        
+            (timem,heightm,soa_a1,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a1')
+            (timem,heightm,soa_a2,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a2')
+            (timem,heightm,soa_a3,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a3')
+            (timem,heightm,so4_a1,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a1')
+            (timem,heightm,so4_a2,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a2')
+            (timem,heightm,so4_a3,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a3')
+            (timem,heightm,pom_a1,timeunitm,pomunit,pomname)=read_extractflight(filename_m,'pom_a1')
+            (timem,heightm,pom_a3,timeunitm,pomunit,pomname)=read_extractflight(filename_m,'pom_a3')
+            (timem,heightm,pom_a4,timeunitm,pomunit,pomname)=read_extractflight(filename_m,'pom_a4')
+            (timem,heightm,mom_a1,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a1')
+            (timem,heightm,mom_a2,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a2')
+            (timem,heightm,mom_a3,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a3')
+            (timem,heightm,mom_a4,timeunitm,momunit,momname)=read_extractflight(filename_m,'mom_a4')
+            
+            # add nucleation mode if available
+            try:
+                (timem,heightm,soa_a5,timeunitm,soaunit,soaname)=read_extractflight(filename_m,'soa_a5')
+                model_org = soa_a1+soa_a2+soa_a3+soa_a5 + pom_a1+pom_a3+pom_a4 + mom_a1+mom_a2+mom_a3+mom_a4
+            except:
+                model_org = soa_a1+soa_a2+soa_a3 + pom_a1+pom_a3+pom_a4 + mom_a1+mom_a2+mom_a3+mom_a4
+            try:
+                (timem,heightm,so4_a5,timeunitm,so4unit,so4name)=read_extractflight(filename_m,'so4_a5')
+                model_so4 = so4_a1+so4_a2+so4_a3+so4_a5
+            except:
+                model_so4 = so4_a1+so4_a2+so4_a3
+            
+            # change E3SM unit from kg/kg to ug/m3 
+            rho = P_iwg/T_iwg/287.06
+            model_so4=model_so4*1e9*rho
+            model_org=model_org*1e9*rho
+            
+            so4_m.append(model_so4) 
+            org_m.append(model_org) 
+        
+        timem2 = timem/3600
+           
+        #%% make plot
+            
+        figname = figpath_aircraft_timeseries+'AerosolComposition_'+campaign+'_'+date+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=2.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+        ax1.plot(time_ams/3600,so4aaf,color='k',linewidth=1,label='OBS')
+        for mm in range(nmodels):
+            ax1.plot(timem2, so4_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        # ax1.set_yscale('log')
+        ax1.tick_params(color='k',labelsize=12)
+        # ylim1 = ax1.get_ylim()
+        
+        ax2.plot(time_ams/3600,orgaaf,color='k',linewidth=1,label='OBS')
+        for mm in range(nmodels):
+            ax2.plot(timem2, org_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        # ax2.set_yscale('log')
+        ax2.tick_params(color='k',labelsize=12)
+        # ylim2 = ax2.get_ylim()
+        
+        ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
+        ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
+        
+        ax2.set_xlabel('time (hour UTC) '+date,fontsize=14)
+    
+        ax1.set_title('Aerosol Sulfate Concentration ($\mu$g/m$^3$)',fontsize=13)
+        ax2.set_title('Aerosol Organic Concentration ($\mu$g/m$^3$)',fontsize=13)
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        plt.close()   
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_flight_timeseries_CCN.py b/src/esmac_diags/plotting/plot_flight_timeseries_CCN.py
new file mode 100644
index 0000000..42d36f1
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_timeseries_CCN.py
@@ -0,0 +1,225 @@
+"""
+# plot aircraft track data
+# timeseries of CCN number concentration 
+# compare models and aircraft measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_aircraft import read_ccn_hiscale,read_ccn_socrates
+from ..subroutines.read_ARMdata import read_ccn
+from ..subroutines.read_netcdf import read_extractflight
+from ..subroutines.quality_control import qc_mask_qcflag,qc_remove_neg
+
+def run_plot(settings):
+
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_timeseries = settings['figpath_aircraft_timeseries']
+    
+    if campaign in ['HISCALE', 'ACEENA']:
+        IOP = settings.get('IOP', None)
+        ccnpath = settings.get('ccnpath', None)
+    elif campaign in ['CSET', 'SOCRATES']:
+        ccnpath = settings.get('ccnpath', None)
+    else:
+        raise ValueError('campaign name is not recognized: '+campaign)
+
+    #%% other settings
+        
+    if not os.path.exists(figpath_aircraft_timeseries):
+        os.makedirs(figpath_aircraft_timeseries)
+        
+    
+    #%% find files for flight information
+    lst = glob.glob(E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[0]+'_*.nc')
+    lst.sort()
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+    # choose files for specific IOP
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+    elif campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+Model_List[0]+'_')
+            lst = glob.glob(a[0]+'_'+Model_List[0]+'_'+IOP+'*')
+            lst.sort()
+            
+    alldates = [x.split('_')[-1].split('.')[0] for x in lst]
+        
+    # for each flight
+    for date in alldates:
+        
+        #%% read in Models
+        nmodels=len(Model_List)
+        ccn3_m = []
+        ccn5_m = []
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+        
+            (timem,heightm,ccn3,timeunitm,ccn3_unit,ccn3_longname)=read_extractflight(filename_m,'CCN3')
+            (timem,heightm,ccn5,timeunitm,ccn5_unit,ccn5_longname)=read_extractflight(filename_m,'CCN5')
+            
+            ccn3_m.append(ccn3) 
+            ccn5_m.append(ccn5) 
+            
+        # get supersaturation
+        SS3 = ccn3_longname.split('=')[-1]
+        SS5 = ccn5_longname.split('=')[-1]
+        timem2 = timem/3600
+        
+        #%% read in flight data (for HISCALE)
+        if campaign=='HISCALE':
+            filename_ccn=glob.glob(ccnpath+'CCN_G1_'+date[0:8]+'*R2_HiScale001s.*')
+            filename_ccn.sort()
+            if date[-1]=='a':
+                flightidx=1
+            else:
+                flightidx=2
+            # read in data
+            if len(filename_ccn)==1 or len(filename_ccn)==2:
+                (data0,ccnlist)=read_ccn_hiscale(filename_ccn[flightidx-1])
+                # only choose data quality is good (flag=0)
+                flag = data0[7,:]
+                time_ccn = data0[0,:]
+                ccna = data0[10,:]
+                ccnb = data0[11,:]
+                SSa = data0[2,:]
+                SSb = data0[5,:]
+                ccna=qc_mask_qcflag(ccna,flag)
+                ccnb=qc_mask_qcflag(ccnb,flag)
+            elif len(filename_ccn)==0:
+                time_ccn=timem
+                ccna=np.nan*np.empty([len(timem)])
+                ccnb=np.nan*np.empty([len(timem)])
+                SSa=0.24
+                SSb=0.46
+            else:
+                raise ValueError('find too many files')
+            timea=time_ccn
+            timeb=time_ccn
+            
+        elif campaign=='ACEENA':
+            filename_ccna=glob.glob(ccnpath+'enaaafccn2colaF1.b1.'+date[0:8]+'*.nc')
+            filename_ccnb=glob.glob(ccnpath+'enaaafccn2colbF1.b1.'+date[0:8]+'*.nc')
+            # read in data
+            if len(filename_ccna)==1:
+                (timea,timeunita,ccna,qcflag,ccnunit,SSa)=read_ccn(filename_ccna[0])
+                ccna=qc_mask_qcflag(ccna,qcflag)
+                ccna=qc_remove_neg(ccna)
+                SSa=qc_remove_neg(SSa)
+            elif len(filename_ccna)==0:
+                # print('no CCN data found. set as NaN')
+                timea=timem
+                SSa=np.nan*np.empty([len(timem)])
+                ccna=np.nan*np.empty([len(timem)])
+            else:
+                raise ValueError('find too many files')
+            if len(filename_ccnb)==1:
+                (timeb,timeunitb,ccnb,qcflag,ccnunit,SSb)=read_ccn(filename_ccnb[0])
+                ccnb=qc_mask_qcflag(ccnb,qcflag)
+                ccnb=qc_remove_neg(ccnb)
+                SSb=qc_remove_neg(SSb)
+            elif len(filename_ccnb)==0:
+                # print('no CCN data found. set as NaN')
+                timeb=timem
+                SSb=np.nan*np.empty([len(timem)])
+                ccnb=np.nan*np.empty([len(timem)])
+            else:
+                raise ValueError('find too many files')
+            
+        # CSET does not have observed CCN
+        elif campaign=='CSET':
+            timea=timem
+            SSa=np.nan*np.empty([len(timem)])
+            ccna=np.nan*np.empty([len(timem)])
+            timeb=timem
+            SSb=np.nan*np.empty([len(timem)])
+            ccnb=np.nan*np.empty([len(timem)])
+            
+        # SOCRATES
+        elif campaign=='SOCRATES':
+            filename_ccn=glob.glob(ccnpath+'CCNscanning_SOCRATES_GV_RF*'+date[0:8]+'_R0.ict')
+            if len(filename_ccn)==1:
+                (data0,ccnlist)=read_ccn_socrates(filename_ccn[0])
+                time_ccn = data0[0,:]
+                ccn = data0[1,:]
+                SS = data0[3,:]
+                ccn=qc_remove_neg(ccn)
+                timea=time_ccn
+                timeb=time_ccn
+                ccna=np.array(ccn)
+                ccnb=np.array(ccn)
+                idxa=np.logical_and(SS>0.05, SS<0.15)
+                ccna[idxa==False]=np.nan
+                SSa=0.1
+                idxb=np.logical_and(SS>0.45, SS<0.55)
+                ccnb[idxb==False]=np.nan
+                SSb=0.5
+            elif len(filename_ccn)==0:
+                timea=timem
+                SSa=np.nan*np.empty([len(timem)])
+                ccna=np.nan*np.empty([len(timem)])
+                timeb=timem
+                SSb=np.nan*np.empty([len(timem)])
+                ccnb=np.nan*np.empty([len(timem)])
+            else:
+                raise ValueError('find too many files')
+                
+                
+        
+        #%% make plot
+            
+        figname = figpath_aircraft_timeseries+'CCN_'+campaign+'_'+date+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+        ax1.plot(timea/3600,ccna,'k.',linewidth=1,label='OBS')
+        for mm in range(nmodels):
+            ax1.plot(timem2, ccn3_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        # ax1.set_yscale('log')
+        ax1.tick_params(color='k',labelsize=12)
+        ylim1 = ax1.get_ylim()
+        
+        ax2.plot(timeb/3600,ccnb,'k.',linewidth=1,label='OBS')
+        for mm in range(nmodels):
+            ax2.plot(timem2, ccn5_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        # ax2.set_yscale('log')
+        ax2.tick_params(color='k',labelsize=12)
+        ylim2 = ax2.get_ylim()
+        
+        # set ylimit consistent in subplots
+        ax1.set_ylim([ylim1[0], ylim2[1]])
+        ax2.set_ylim([ylim1[0], ylim2[1]])
+        
+        ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
+        ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
+        
+        # supersaturation
+        fig.text(0.67,0.9,'SS_obs='+format(np.nanmean(SSa),'.2f')+'%, SS_model='+SS3)
+        fig.text(0.67,0.4,'SS_obs='+format(np.nanmean(SSb),'.2f')+'%, SS_model='+SS5)
+        
+        ax2.set_xlabel('time (hour UTC) '+date,fontsize=14)
+        ax1.set_title('CCN Number Concentration (cm$^{-3}$)',fontsize=15)
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        plt.close()
+        
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_flight_timeseries_CN.py b/src/esmac_diags/plotting/plot_flight_timeseries_CN.py
new file mode 100644
index 0000000..dda0a53
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_timeseries_CN.py
@@ -0,0 +1,177 @@
+"""
+# plot aircraft track data
+# timeseries of aerosol number concentration (CN)
+# compare models and CPC measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_aircraft import read_cpc, read_RF_NCAR
+from ..subroutines.read_netcdf import read_merged_size,read_extractflight
+from ..subroutines.quality_control import qc_cpc_air
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+    figpath_aircraft_timeseries = settings['figpath_aircraft_timeseries']
+    
+    if campaign in ['HISCALE', 'ACEENA']:
+        IOP = settings.get('IOP', None)
+        merged_size_path = settings.get('merged_size_path', None)
+        cpcpath = settings.get('cpcpath', None)
+    elif campaign in ['CSET', 'SOCRATES']:
+        RFpath = settings.get('RFpath', None)
+    else:
+        raise ValueError('campaign name is not recognized: '+campaign)
+
+    #%% other settings
+    
+    if not os.path.exists(figpath_aircraft_timeseries):
+        os.makedirs(figpath_aircraft_timeseries)
+       
+    
+    #%% find files for flight information
+    if campaign in ['HISCALE', 'ACEENA']:
+        lst = glob.glob(merged_size_path+'merged_bin_*'+campaign+'*.nc')
+    elif campaign in ['CSET', 'SOCRATES']:
+        lst = glob.glob(RFpath+'RF*.PNI.nc')
+    else:
+        raise ValueError('campaign name is not recognized: '+campaign)
+    lst.sort()
+    
+    if len(lst)==0:
+        raise ValueError('cannot find any file')
+      
+    # choose files for specific IOP
+    if campaign=='HISCALE':
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+campaign+'_')
+            lst = glob.glob(a[0]+'*'+IOP+'*')
+            lst.sort()
+    elif campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+campaign+'_')
+            lst = glob.glob(a[0]+'*'+IOP+'*')
+            lst.sort()
+        
+    # for each flight
+    for filename in lst:
+        
+        #%% read in flight data (for HISCALE and ACEENA)
+        if campaign in ['HISCALE', 'ACEENA']:
+            # get date info:        
+            date=filename[-12:-3]
+            if date[-1]=='a':
+                flightidx=1
+            else:
+                flightidx=2
+        
+            #% read in flight information
+            (time,size,cvi,timeunit,cunit,long_name)=read_merged_size(filename,'CVI_inlet')
+            time=np.ma.compressed(time)
+            if campaign=='HISCALE':
+                filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_HiScale001s.ict.txt')
+            elif campaign=='ACEENA':
+                filename_c=glob.glob(cpcpath+'CPC_G1_'+date[0:8]+'*R2_ACEENA001s.ict')    
+            filename_c.sort()
+            # read in data
+            if len(filename_c)==1 or len(filename_c)==2: # some days have two flights
+                (cpc,cpclist)=read_cpc(filename_c[flightidx-1])
+                if np.logical_and(campaign=='ACEENA', date=='20180216a'):
+                    cpc=np.insert(cpc,1404,(cpc[:,1403]+cpc[:,1404])/2,axis=1)
+                time_cpc = cpc[0,:]
+                cpc10 = cpc[1,:]
+                cpc3 = cpc[2,:]
+            elif len(filename_c)==0:
+                time_cpc=time
+                cpc10=np.nan*np.empty([len(time)])
+                cpc3=np.nan*np.empty([len(time)])
+            else:
+                raise ValueError('find too many files')
+            # some quality checks
+            (cpc3,cpc10) = qc_cpc_air(cpc3,cpc10)
+            
+        #%% read in flight data (for CSET and SOCRATES)
+        elif campaign in ['CSET', 'SOCRATES']:
+            fname=filename.split('.')
+            date=fname[-4]
+            (time_cpc,cpc10,timeunit,cpc10unit,cpc10longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCN')
+            if campaign=='CSET':
+                (time_cpc,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCU100_RWOOU')
+            elif campaign=='SOCRATES':
+                # there are two variables: CONCU100_CVIU and CONCU100_LWII
+                (time_cpc,uhsas100,timeunit,uhsas100unit,uhsas100longname,cellsize,cellunit)=read_RF_NCAR(filename,'CONCU100_LWII')
+            
+        #%% read in Models
+        nmodels=len(Model_List)
+        cpc100_m = []
+        cpc10_m = []
+        cpc3_m = []
+        for mm in range(nmodels):
+            filename_m = E3SM_aircraft_path+'Aircraft_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+        
+            (timem,heightm,cpc_m,timeunitm,ncn_unit,ncn_longname)=read_extractflight(filename_m,'NCN')
+            (timem,heightm,cpcu_m,timeunitm,ncnu_unit,ncnu_longname)=read_extractflight(filename_m,'NUCN')
+            (timem,heightm,ncnall,timeunitm,ncnall_unit,ncnall_longname)=read_extractflight(filename_m,'NCNall')
+            # if len(cpc_m)!=cpc.shape[1]:
+            #     print('CPC and MAM have different dimensions! check')
+            #     print(cpc.shape,cpc_m.shape)
+            #     errors
+            cpc100_m.append(np.sum(ncnall[100:,:],0)*1e-6) # #/m3 to #/cm3
+            cpc10_m.append(cpc_m*1e-6) # #/m3 to #/cm3
+            cpc3_m.append(cpcu_m*1e-6) # #/m3 to #/cm3
+        
+        timem2 = timem/3600
+        
+        #%% make plot
+            
+        figname = figpath_aircraft_timeseries+'CN_'+campaign+'_'+date+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+        ax1.plot(time_cpc/3600,cpc10,color='k',linewidth=1,label='CPC(>10nm)')
+        for mm in range(nmodels):
+            ax1.plot(timem2, cpc10_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        ax1.set_yscale('log')
+        ax1.tick_params(color='k',labelsize=12)
+        ylim1 = ax1.get_ylim()
+        
+        if campaign in ['HISCALE', 'ACEENA']:
+            ax2.plot(time_cpc/3600,cpc3,color='k',linewidth=1,label='CPC(>3nm)')
+            for mm in range(nmodels):
+                ax2.plot(timem2, cpc3_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        elif campaign in ['CSET', 'SOCRATES']:
+            ax2.plot(time_cpc/3600,uhsas100,color='k',linewidth=1,label='UHSAS(>100nm)')
+            for mm in range(nmodels):
+                ax2.plot(timem2, cpc100_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        ax2.set_yscale('log')
+        ax2.tick_params(color='k',labelsize=12)
+        ylim2 = ax2.get_ylim()
+        
+        # set ylimit consistent in subplots
+        ax1.set_ylim([max(1,min(ylim1[0],ylim2[0])), max(ylim1[1],ylim2[1])])
+        ax2.set_ylim([max(1,min(ylim1[0],ylim2[0])), max(ylim1[1],ylim2[1])])
+        
+        ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
+        ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
+        
+        ax2.set_xlabel('time (hour UTC) '+date,fontsize=14)
+        ax1.set_title('Aerosol Number Concentration (cm$^{-3}$)',fontsize=15)
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        plt.close()
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_flight_track_height.py b/src/esmac_diags/plotting/plot_flight_track_height.py
new file mode 100644
index 0000000..9801c1b
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_flight_track_height.py
@@ -0,0 +1,250 @@
+"""
+# plot aircraft information
+# plot 1: plot flight track location (lat/lon) with height in color
+# plot 2: plot timeseries of flight height with cloud and CVI flags
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_ARMdata import read_cvi_aceena
+from ..subroutines.specific_data_treatment import lwc2cflag
+from ..subroutines.time_format_change import hhmmss2sec
+from ..subroutines.read_aircraft import read_iwg1, read_cvi_hiscale, read_RF_NCAR
+
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    lat0 = settings['lat0']
+    lon0 = settings['lon0']
+    figpath_aircraft_timeseries = settings['figpath_aircraft_timeseries']
+
+    IOP = settings.get('IOP', None)
+    iwgpath = settings.get('iwgpath', None)
+    cvipath = settings.get('cvipath', None)
+    RFpath = settings.get('RFpath', None)
+
+    #%% other settings
+
+    if not os.path.exists(figpath_aircraft_timeseries):
+        os.makedirs(figpath_aircraft_timeseries)
+        
+
+    #%% find all flight data
+
+    if campaign=='HISCALE':
+        lst = glob.glob(iwgpath+'*a2.txt')
+        lst.sort()
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+campaign+'_')
+            lst = glob.glob(a[0]+'*'+IOP+'*')
+    elif campaign=='ACEENA':
+        lst = glob.glob(iwgpath+'*a2.txt')
+        lst.sort()
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+campaign+'_')
+            lst = glob.glob(a[0]+'*'+IOP+'*')
+    elif campaign in ['CSET', 'SOCRATES']:
+        lst = glob.glob(RFpath+'RF*.PNI.nc')
+    else:
+        raise ValueError('campaign name is not recognized: '+campaign)
+    lst.sort()
+
+    #%% read in data and make plot
+    for filename in lst:
+        
+        # get date info:        
+        fname=filename.split('.')
+        
+        #%% read in flight data (for HISCALE and ACEENA)
+        if campaign in ['HISCALE', 'ACEENA']:
+            date=fname[-3]
+            if date[-1]=='a':
+                flightidx=1
+            else:
+                flightidx=2
+            
+            # read in IWG data
+            (iwg,iwgvars)=read_iwg1(filename)
+            timelen = len(iwg)
+            if np.logical_and(campaign=='ACEENA', date=='20180216a'):
+                iwg.insert(1403,list(iwg[1403]))
+                tstr=iwg[1403][1]
+                tstr=tstr[0:-1]+str(int(tstr[-1])-1)
+                iwg[1403][1]=tstr
+                del iwg[-1]
+            # get lat, lon, height, time
+            lon=np.empty(timelen)
+            lat=np.empty(timelen)
+            height=np.empty(timelen)
+            time=np.empty(timelen)
+            cldflag=np.empty(timelen)
+            legnum=np.full(timelen,0)
+            for t in range(timelen):
+                lat[t]=float(iwg[t][2])
+                lon[t]=float(iwg[t][3])
+                height[t]=float(iwg[t][4])
+                cldflag[t]=int(iwg[t][35])
+                legnum[t]=int(iwg[t][-1])
+                timestr=iwg[t][1].split(' ')
+                time[t]=hhmmss2sec(timestr[1])
+            datestr=timestr[0]
+            
+            # read in CVI
+            if campaign=='HISCALE':
+                filename_c=glob.glob(cvipath+'CVI_G1_'+date[0:8]+'*R4_HISCALE_001s.ict.txt')
+                filename_c.sort()
+                # read in data
+                if len(filename_c)==1 or len(filename_c)==2:
+                    (cvi,cvilist)=read_cvi_hiscale(filename_c[flightidx-1])
+                    time_cvi = cvi[0,:]
+                    cvi_inlet=cvi[-1,:]
+                    if all(time_cvi==time)==False:
+                        raise ValueError('time dimension is incosistent')
+                elif len(filename_c)==0:
+                    time_cvi=time
+                    cvi_inlet=np.nan*np.empty([len(time)])
+                else:
+                    raise ValueError('find too many files: '+filename_c)
+                    
+            elif campaign=='ACEENA':
+                filename_c=glob.glob(cvipath+'enaaafinletcviF1.c1.'+date[0:8]+'*.nc')
+                filename_c.sort()
+                # read in data
+                if len(filename_c)==1:
+                    (time_c,lon_c,lat_c,alt_c,timeunit_c,cvimode,cvi_inlet,enhance_factor,dilution_factor)=read_cvi_aceena(filename_c[0])
+                    if date=='20180216a':
+                        time_c=np.insert(time_c,1403,(time_c[1402]+time_c[1403])/2)
+                        cvi_inlet=np.insert(cvi_inlet,1403,cvi_inlet[1403])
+                    if all(time_c==time)==False:
+                        raise ValueError('time dimension is incosistent')
+                elif len(filename_c)==0:
+                    time_cvi=time
+                    cvi_inlet=np.nan*np.empty([len(time)])
+                else:
+                    raise ValueError('find too many files: '+filename_c)
+                # cvi_inlet[cvi_inlet==-9]=1  # if cvi_inlet is unfunctional, use fims as good data
+            
+            else:
+                raise ValueError('do not recognize this campaign: '+campaign)
+        
+        #%% read in flight data (for CSET and SOCRATES)
+        elif campaign in ['CSET', 'SOCRATES']:
+            date=fname[-4]
+            print('input data for '+date)
+            (time,height,timeunit,hunit,hlongname,cellsize,cellunit)=read_RF_NCAR(filename,'ALT')
+            (time,lat,timeunit,latunit,latlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LAT')
+            (time,lon,timeunit,lonunit,lonlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LON')
+            (time,lwc,timeunit,lwcunit,lwclongname,cellsize,cellunit)=read_RF_NCAR(filename,'PLWCC')
+            # lon[lon<0]=lon[lon<0]+360
+            # calculate cloud flag based on LWC
+            cldflag=lwc2cflag(lwc,lwcunit)
+            if campaign=='SOCRATES':
+                (time,cvi_inlet,timeunit,cviunit,cvilongname,cellsize,cellunit)=read_RF_NCAR(filename,'CVINLET')
+            else:
+                cvi_inlet=np.nan*np.empty([len(time)])
+        
+        
+        #%% plot flight tracks:
+        lat[lat<-9000]=np.nan
+        lon[lon<-9000]=np.nan
+        height[height<-9000]=np.nan
+        
+        # change longitude to [-180, 180]
+        if lon0>180:
+            lon0=lon0-360
+            
+        try:
+        #     os.environ['PROJ_LIB'] = r'c:\Users\tang357\Anaconda3\pkgs\basemap-1.3.0-py38ha7665c8_0\Library\share'
+        #     from mpl_toolkits.basemap import Basemap
+        #     figname = figpath_aircraft_timeseries + 'flighttrack_'+campaign+'_'+date+'.png'
+        #     print('plot flight track to '+figname)
+        #     fig,ax = plt.subplots(figsize=(8,5))   # figsize in inches
+        #     plt.tight_layout(pad=0.1, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        #     if campaign in ['CSET', 'SOCRATES']:
+        #         m = Basemap(llcrnrlon=min(np.floor(min(lon)),np.floor(lon0))-2,llcrnrlat=min(np.floor(min(lat)),np.floor(lat0))-2,\
+        #             urcrnrlon=max(np.ceil(max(lon)),np.ceil(lon0))+2,urcrnrlat=max(np.ceil(max(lat)),np.ceil(lat0))+2,\
+        #             resolution='l',rsphere=(6378137.00,6356752.3142),projection='lcc',lat_0=np.min(lat),lon_0=np.min(lon)) #,lat_ts=5.)
+        #         m.drawparallels(np.arange(-90,90,5),labels=[1,0,0,0])
+        #         m.drawmeridians(np.arange(-180,180,5),labels=[0,0,0,1])
+        #         m.drawcoastlines()
+        #         m.fillcontinents()
+        #     elif campaign=='HISCALE':
+        #         m = Basemap(llcrnrlon=-99,llcrnrlat=35,urcrnrlon=-95,urcrnrlat=38,\
+        #             resolution='l',rsphere=(6378137.00,6356752.3142),projection='lcc',lat_0=lat0,lon_0=lon0) #,lat_ts=5.)
+        #         m.drawparallels(np.arange(30,40,1),labels=[1,0,0,0])
+        #         m.drawmeridians(np.arange(-110,-90,1),labels=[0,0,0,1])
+        #         m.drawstates()
+        #         x2,y2=m(lon0,lat0)
+        #         m.scatter(x2,y2,s=100,marker='*',color='k')
+        #     elif campaign=='ACEENA':
+        #         m = Basemap(llcrnrlon=-30,llcrnrlat=37,urcrnrlon=-25,urcrnrlat=41,\
+        #             resolution='l',rsphere=(6378137.00,6356752.3142),projection='lcc',lat_0=lat0,lon_0=lon0) #,lat_ts=5.)
+        #         m.drawparallels(np.arange(30,42,1),labels=[1,0,0,0])
+        #         m.drawmeridians(np.arange(-30,-20,1),labels=[0,0,0,1])
+        #         m.drawcoastlines()
+        #         m.fillcontinents()
+        #         x2,y2=m(lon0,lat0)
+        #         m.scatter(x2,y2,s=100,marker='*',color='k')
+        #     x, y = m(lon,lat)
+        #     h=m.scatter(x,y,s=1,c=height,cmap='jet')
+        #     ax.set_title('Flight track '+date,fontsize=15)
+        #     cbar=fig.colorbar(h)
+        # except:
+            figname = figpath_aircraft_timeseries + 'flighttrack_'+campaign+'_'+date+'.png'
+            print('plot flight track to '+figname)
+            fig,ax = plt.subplots(figsize=(8,5))   # figsize in inches
+            # plot the location of the campaign site:
+            ax.plot([lon0,lon0],[lat0-50, lat0+50],':',color=[.8,.8,.8])
+            ax.plot([lon0-50, lon0+50],[lat0,lat0],':',color=[.8,.8,.8])
+            # plot flight track
+            h=ax.scatter(lon,lat,s=1,c=height,cmap='jet',vmin=0,vmax=max(height))  #vmin/vmax: color range
+            ax.set_xlim(min(np.floor(min(lon)),np.floor(lon0)), max(np.ceil(max(lon)),np.ceil(lon0)))
+            ax.set_ylim(min(np.floor(min(lat)),np.floor(lat0)), max(np.ceil(max(lat)),np.ceil(lat0)))
+            ax.tick_params(color='k',labelsize=14)
+            ax.set_xlabel('longitude',fontsize=14)
+            ax.set_ylabel('latitude',fontsize=14)
+            ax.set_title('Flight track '+date,fontsize=15)
+            cbar=fig.colorbar(h)
+            fig.text(0.81,0.91, 'm MSL')
+        except:
+            raise ValueError("cannot make flight track plot")
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        plt.close()
+            
+        #%% plot flight height and flag/leg timeseries
+        figname = figpath_aircraft_timeseries + 'flightheight_'+campaign+'_'+date+'.png'
+        print('plot flight height timeseries to '+figname)
+        
+        fig,ax1 = plt.subplots(figsize=(8,2))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+        h11=ax1.plot(time/3600,height/1000,color='k',linewidth=1)
+        if campaign in ['HISCALE', 'ACEENA']:
+            for ll in range(1,max(legnum)+1):
+                idx=legnum==ll
+                ax1.plot(time[idx]/3600,height[idx]/1000,color='b',linewidth=2)
+        h12=ax1.plot(time/3600,time*0+max(height)*0.00105,color='k',linewidth=.2)
+        cvi2=0.0*cvi_inlet
+        cvi2[cvi_inlet==1]=np.nan
+        cvi2=cvi2+max(height)*0.00105
+        h13=ax1.plot(time/3600,cvi2,color='k',linewidth=2)
+        h14=ax1.vlines(time[cldflag==1]/3600,0,max(height)*0.0011,color='silver',linewidth=0.1)
+        # ax1.set_xlim(time[0]/3600-0.3, time[-1]/3600+0.3)
+        ax1.set_ylim(0,max(height)*0.0011)
+        ax1.set_ylabel('height (km)',fontsize=12)
+        ax1.set_xlabel('time (hour UTC) '+date,fontsize=12)
+        ax1.set_title('thin black: flight track. blue: flight legs. gray vertical lines: cloud flag. thick black: CVI mode', fontsize=10)
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        plt.close()
+    
diff --git a/src/esmac_diags/plotting/plot_profile_cloud.py b/src/esmac_diags/plotting/plot_profile_cloud.py
new file mode 100644
index 0000000..f0a5efc
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_profile_cloud.py
@@ -0,0 +1,127 @@
+"""
+# plot vertical profile of cloud fraction
+# for each day of selected IOP
+# compare models and surface measurements
+"""
+
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import timeunit2cday,yyyymmdd2cday,cday2mmdd
+from ..subroutines.read_ARMdata import read_armbe
+from ..subroutines.read_netcdf import read_E3SM_z
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    lon0 = settings['lon0']
+    Model_List = settings['Model_List']
+    armbepath = settings['armbepath']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    E3SM_profile_path = settings['E3SM_profile_path']
+    figpath_profile_timeseries = settings['figpath_profile_timeseries']
+
+    IOP = settings.get('IOP', None)
+
+    #%% other settings
+        
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date,'noleap')
+    cday2 = yyyymmdd2cday(end_date,'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+        
+    if not os.path.exists(figpath_profile_timeseries):
+        os.makedirs(figpath_profile_timeseries)
+    
+    
+    #%% read in obs data
+    if campaign=='ACEENA':
+        if IOP=='IOP1':
+            filename_armbe = armbepath+'enaarmbecldradC1.c1.20170101.003000.nc'
+            year='2017'
+        elif IOP=='IOP2':
+            filename_armbe = armbepath+'enaarmbecldradC1.c1.20180101.003000.nc'
+            year='2018'
+    elif campaign=='HISCALE':  
+        filename_armbe = armbepath+'sgparmbecldradC1.c1.20160101.003000.nc'
+        year='2016'
+            
+    (time0,height0,cld0,time0unit,cld0unit) = read_armbe(filename_armbe,'cld_frac')
+    
+    
+    time0=time0/86400.+timeunit2cday(time0unit)
+    if campaign=='HISCALE':
+        # observation is leap year. change the time for comparison with noleap model output. 
+        # note that it is not suitable for January and February
+        time0=time0-1   
+        
+    #%% read in model
+    
+    cldm = []
+    nmodels = len(Model_List)
+    for mm in range(nmodels):
+        timem=np.empty(0)
+        for cday in range(cday1,cday2+1):
+            mmdd=cday2mmdd(cday)
+            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
+            
+            filename_input = E3SM_profile_path+'Profile_vars_'+campaign+'_'+Model_List[mm]+'.'+date+'.nc'
+            (time,height,data,timemunit,dataunit,long_name)=read_E3SM_z(filename_input,'CLOUD')
+            
+            timem = np.hstack((timem,time))
+            if cday==cday1:
+                datam=data*100
+            else:
+                datam = np.vstack((datam,data*100))
+        
+        data=data*100.
+        dataunit='%'
+        cldm.append(datam)
+    
+    # change to local solar time
+    timeshift = lon0/360*24
+    if timeshift>12:
+        timeshift=timeshift-24
+    time0 = time0+timeshift/24.
+    timem = timem+timeshift/24.
+    
+    #%% plot cloud for each day in time_range
+    
+    
+    for cday in range(cday1,cday2+1):
+        idxo=np.logical_and(time0>cday-0.1, time0<cday+1.1)
+        idxm=np.logical_and(timem>cday-0.1, timem<cday+1.1)
+        
+        mmdd = cday2mmdd(cday)
+        figname = figpath_profile_timeseries+'cloudfraction_'+campaign+'_'+year+mmdd+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,ax = plt.subplots(nmodels+1,1,figsize=(6,2*nmodels+1))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+    
+        h0=ax[0].contourf(time0[idxo],height0/1000,cld0[idxo,:].T,np.arange(0,101,10),cmap=plt.get_cmap('jet'))
+        ax[0].set_title('Cloud Fraction (%) Obs')
+        ax[0].set_ylim(0,15)
+        ax[0].set_xlim(cday,cday+1)
+        ax[0].set_xticks(np.arange(cday,cday+1.01,0.125))
+        ax[0].set_xticklabels([])
+        ax[0].set_ylabel('Height (km)')
+        for mm in range(nmodels):
+            ax[mm+1].contourf(timem[idxm],height/1000,cldm[mm][idxm,:].T,np.arange(0,101,10),cmap=plt.get_cmap('jet'))
+            ax[mm+1].set_title(Model_List[mm])
+            ax[mm+1].set_ylim(0,15)
+            ax[mm+1].set_xlim(cday,cday+1)
+            ax[mm+1].set_xticks(np.arange(cday,cday+1.01,0.125))
+            ax[mm+1].set_xticklabels([])
+            ax[mm+1].set_ylabel('Height (km)')
+            
+        ax[-1].set_xticklabels(['00','03','06','09','12','15','18','21','24'])
+        ax[-1].set_xlabel('Local Solar Time for ' + campaign +' day '+year+mmdd)
+        
+        cax = plt.axes([1.01, 0.2, 0.02, 0.6])
+        cbar=fig.colorbar(h0, cax=cax)
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        plt.close()
diff --git a/src/esmac_diags/plotting/plot_sfc_diurnalcycle_AerosolComposition.py b/src/esmac_diags/plotting/plot_sfc_diurnalcycle_AerosolComposition.py
new file mode 100644
index 0000000..c04fddb
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_sfc_diurnalcycle_AerosolComposition.py
@@ -0,0 +1,200 @@
+"""
+# plot surface diurnal cycle of aerosol composition
+# compare models and surface measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import yyyymmdd2cday,cday2mmdd
+from ..subroutines.read_ARMdata import read_acsm
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.specific_data_treatment import  avg_time_1d
+from ..subroutines.quality_control import qc_remove_neg,qc_acsm_org_max
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    acsmpath = settings['acsmpath']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+    figpath_sfc_timeseries = settings['figpath_sfc_timeseries']
+
+    IOP = settings.get('IOP', None)
+
+    #%% other settings
+    
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date,'noleap')
+    cday2 = yyyymmdd2cday(end_date,'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+        
+    if not os.path.exists(figpath_sfc_timeseries):
+        os.makedirs(figpath_sfc_timeseries)
+        
+                
+    #%% read in obs data
+    if campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst = glob.glob(acsmpath+'enaaosacsmC1.a1.201706*') + glob.glob(acsmpath+'enaaosacsmC1.a1.201707*')
+        elif IOP=='IOP2':
+            lst = glob.glob(acsmpath+'enaaosacsmC1.a1.201801*') + glob.glob(acsmpath+'enaaosacsmC1.a1.201802*')
+        lst.sort()
+    elif campaign=='HISCALE':  
+        if IOP=='IOP1':
+            lst = glob.glob(acsmpath+'sgpaosacsmC1.b1.201604*') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201605*') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201606*')
+        elif IOP=='IOP2':
+            lst = glob.glob(acsmpath+'sgpaosacsmC1.b1.201608*.cdf') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201609*.cdf')
+        lst.sort()
+        
+    t_obs=np.empty(0)
+    so4_obs=np.empty(0)
+    org_obs=np.empty(0)
+    for filename in lst:
+        (times_obs,so4sfc,timeunit,so4sfcunit)=read_acsm(filename,'sulfate')
+        (times_obs,orgsfc,timeunit,orgsfcunit)=read_acsm(filename,'total_organics')
+        timestr=timeunit.split(' ')
+        date=timestr[2]
+        cday=yyyymmdd2cday(date,'noleap')
+        # average in time for quicker plot
+        time2=np.arange(0,86400,3600)
+        so42 = avg_time_1d(np.array(times_obs),np.array(so4sfc),time2)
+        org2 = avg_time_1d(np.array(times_obs),np.array(orgsfc),time2)
+        t_obs=np.hstack((t_obs, cday+time2/86400))
+        so4_obs=np.hstack((so4_obs, so42))
+        org_obs=np.hstack((org_obs, org2))
+    
+    so4_obs=qc_remove_neg(so4_obs)
+    org_obs=qc_remove_neg(org_obs)
+    org_obs=qc_acsm_org_max(org_obs)
+        
+        
+    
+    #%% read in models
+    nmodels = len(Model_List)
+    model_org = list()
+    model_so4 = list()
+    
+    for mm in range(nmodels):
+        so4varname=['so4_a1','so4_a2','so4_a3']
+        orgvarname=['soa_a1','soa_a2','soa_a3','pom_a1','pom_a3','pom_a4',\
+                    'mom_a1','mom_a2','mom_a3','mom_a4']
+        if Model_List[mm]=='NucSoaCond':
+            so4varname.append('so4_a5')
+            orgvarname.append('soa_a5')
+    
+        timem2 = np.array([])
+        tmp_so4 = np.empty(0)
+        tmp_org = np.empty(0)
+        ps = np.empty(0)
+        ts = np.empty(0)
+        for cday in range(cday1,cday2+1):
+            mmdd=cday2mmdd(cday)
+            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
+            filename_input = E3SM_sfc_path+'SFC_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            
+            (timem,so4all,timeunitm,so4unit,so4name)=read_E3SM(filename_input,so4varname)
+            (timem,orgall,timeunitm,orgunit,orgname)=read_E3SM(filename_input,orgvarname)
+            (timem,[psm,tsm],timeunitm,varunit,varlongname)=read_E3SM(filename_input,['PS','T'])
+            
+            tmp_so4 = np.hstack((tmp_so4,sum(so4all)))
+            tmp_org = np.hstack((tmp_org,sum(orgall)))
+            ps = np.hstack((ps,psm))
+            ts = np.hstack((ts,tsm))
+            timem2 = np.hstack((timem2,timem))
+        
+        model_org.append(tmp_org)
+        model_so4.append(tmp_so4)
+    
+    # change E3SM unit from kg/kg to ug/m3 
+    rho = ps/287.06/ts
+    
+    for mm in range(nmodels):
+        model_so4[mm]=model_so4[mm]*1e9*rho
+        model_org[mm]=model_org[mm]*1e9*rho
+        
+    #%% calculate diurnal cycle
+    days = np.arange(cday1, cday2+1)
+    
+    time_dc = np.arange(30,1440.,60)
+    so4_o_dc = np.full((len(time_dc),len(days)),np.nan)
+    org_o_dc = np.full((len(time_dc),len(days)),np.nan)
+    n_valid = list()
+    if len(so4_obs)>1: # not NaN
+        for dd in range(len(days)):
+            for tt in range(len(time_dc)):
+                time_tmp = days[dd]+time_dc[tt]/1440.
+                idx = np.abs(t_obs-time_tmp).argmin()
+                if (t_obs[idx]-time_tmp)*1440 <= 30:    
+                    so4_o_dc[tt,dd] = so4_obs[idx]
+    if len(org_obs)>1:
+        for dd in range(len(days)):
+            for tt in range(len(time_dc)):
+                time_tmp = days[dd]+time_dc[tt]/1440.
+                idx = np.abs(t_obs-time_tmp).argmin()
+                if (t_obs[idx]-time_tmp)*1440 <= 30:    
+                    org_o_dc[tt,dd] = org_obs[idx]
+    so4_o_dc = np.nanmean(so4_o_dc,1)
+    org_o_dc = np.nanmean(org_o_dc,1)
+    
+    # for E3SM data
+    so4_m_dc = []
+    org_m_dc = []
+    for mm in range(nmodels):
+        tmp_so4 = np.full((24,len(days)),np.nan)
+        tmp_org = np.full((24,len(days)),np.nan)
+        for dd in range(len(days)):
+            idx=np.logical_and(timem2>=days[dd], timem2<days[dd]+1)
+            tmp_so4[:,dd] = model_so4[mm][idx]
+            tmp_org[:,dd] = model_org[mm][idx]
+        so4_m_dc.append(np.nanmean(tmp_so4,1))
+        org_m_dc.append(np.nanmean(tmp_org,1))
+        
+    #%% make plot
+        
+    figname = figpath_sfc_timeseries+'diurnalcycle_AerosolComposition_'+campaign+'_'+IOP+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(6,4))   # figsize in inches
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+    
+    ax1.plot(time_dc/60,so4_o_dc,color='k',linewidth=1,label='OBS (SO4)')
+    for mm in range(nmodels):
+        ax1.plot(time_dc/60, so4_m_dc[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax1.tick_params(color='k',labelsize=12)
+    # ylim1 = ax1.get_ylim()
+    
+    ax2.plot(time_dc/60,org_o_dc,color='k',linewidth=1,label='OBS (ORG)')
+    for mm in range(nmodels):
+        ax2.plot(time_dc/60, org_m_dc[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax2.tick_params(color='k',labelsize=12)
+    # ylim2 = ax2.get_ylim()
+    
+    ax1.set_xlim(0,24)
+    ax2.set_xlim(0,24)
+    ax1.set_xticks(np.arange(0,24,3))
+    ax2.set_xticks(np.arange(0,24,3))
+    
+    # set ylimit consistent in subplots
+    # ax1.set_yticks([10,100,1000,10000,100000])
+    # ax2.set_yticks([10,100,1000,10000,100000])
+    # ax1.set_yscale('log')
+    # ax2.set_yscale('log')
+    # ax1.set_ylim([ylim1[0], ylim2[1]])
+    # ax2.set_ylim([ylim1[0], ylim2[1]])
+    
+    
+    ax1.legend(loc='center right', shadow=False, fontsize='medium',bbox_to_anchor=(1.3, .5))
+    ax2.legend(loc='center right', shadow=False, fontsize='medium',bbox_to_anchor=(1.3, .5))
+    
+    ax2.set_xlabel('Hour (UTC)',fontsize=12)
+    ax1.set_title('Aerosol Sulfate and Organic Concentration ($\mu$g/m$^3$) '+campaign+' '+IOP,fontsize=14)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_sfc_diurnalcycle_CCN.py b/src/esmac_diags/plotting/plot_sfc_diurnalcycle_CCN.py
new file mode 100644
index 0000000..ab2a3c1
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_sfc_diurnalcycle_CCN.py
@@ -0,0 +1,203 @@
+"""
+# plot surface diurnal cycle of aerosol size distribution
+# compare models and surface measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import yyyymmdd2cday,cday2mmdd
+from ..subroutines.read_ARMdata import read_ccn
+from ..subroutines.read_surface import read_CCN_hiscale_IOP1, read_CCN_hiscale_IOP2
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.quality_control import qc_remove_neg,qc_mask_qcflag,qc_ccn_max
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    ccnsfcpath = settings['ccnsfcpath']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+    figpath_sfc_timeseries = settings['figpath_sfc_timeseries']
+
+    IOP = settings.get('IOP', None)
+
+    #%% other settings
+    
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date,'noleap')
+    cday2 = yyyymmdd2cday(end_date,'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+        
+    if not os.path.exists(figpath_sfc_timeseries):
+        os.makedirs(figpath_sfc_timeseries)
+        
+    #%% read in obs data
+    if campaign=='ACEENA':
+        # cpc
+        if IOP=='IOP1':
+            lst = glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201706*')+glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201707*')
+        elif IOP=='IOP2':
+            lst = glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201801*')+glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201802*')
+        lst.sort()
+        t_ccn=np.empty(0)
+        ccn=np.empty(0)
+        SS=np.empty(0)
+        for filename in lst:
+            (time,timeunit,data,qc,dataunit,SS0)=read_ccn(filename)
+            data=qc_mask_qcflag(data,qc)
+            timestr=timeunit.split(' ')
+            date=timestr[2]
+            cday=yyyymmdd2cday(date,'noleap')
+            t_ccn=np.hstack((t_ccn, cday+time/86400))
+            ccn=np.hstack((ccn, data))
+            SS=np.hstack((SS, SS0))
+        ccn=qc_remove_neg(ccn)
+        ccn=qc_ccn_max(ccn,SS)
+        # SS=0.1%
+        idx = np.logical_and(SS>0.05, SS<0.15)
+        t_ccna = t_ccn[idx]
+        ccna = ccn[idx]
+        SSa = 0.1
+        # SS=0.5%
+        idx = np.logical_and(SS>0.4, SS<0.6)
+        t_ccnb = t_ccn[idx]
+        ccnb = ccn[idx]
+        SSb = 0.5
+    
+    elif campaign=='HISCALE':  
+        if IOP=='IOP1':
+            (times_ccn,ccnsfc,sssfc,timeunit)=read_CCN_hiscale_IOP1(ccnsfcpath)
+            sssfc=[int(x*10) for x in sssfc]
+            sssfc=np.array(sssfc)/10.
+            times_ccn=np.array(times_ccn)
+            ccnsfc=np.array(ccnsfc)
+        elif IOP=='IOP2':
+            (times_ccn,ccnsfc,sssfc,timeunit)=read_CCN_hiscale_IOP2(ccnsfcpath)
+            sssfc=[int(x*10) for x in sssfc]
+            sssfc=np.array(sssfc)/10.
+            times_ccn=np.array(times_ccn)
+            ccnsfc=np.array(ccnsfc)
+        # find the nearest Supersaturation in Obs comparing to model
+        # 0.1%
+        idx = sssfc==0.1
+        ccna = ccnsfc[idx]
+        t_ccna = times_ccn[idx]
+        SSa = 0.1
+        # 0.5%
+        idx = sssfc==0.5
+        ccnb = ccnsfc[idx]
+        t_ccnb = times_ccn[idx]
+        SSb = 0.5
+        
+    #%% read in models
+    ccna_m = []
+    ccnb_m = []
+    nmodels = len(Model_List)
+    for mm in range(nmodels):
+        tmp_CCN3=np.empty(0)
+        tmp_CCN5=np.empty(0)
+        timem=np.empty(0)
+        for cday in range(cday1,cday2+1):
+            mmdd=cday2mmdd(cday)
+            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
+            
+            filename_input = E3SM_sfc_path+'SFC_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            (time,ccn3,timemunit,dataunit,ccn3_longname)=read_E3SM(filename_input,'CCN3')
+            (time,ccn5,timemunit,dataunit,ccn5_longname)=read_E3SM(filename_input,'CCN5')
+            
+            timem = np.hstack((timem,time))
+            tmp_CCN3 = np.hstack((tmp_CCN3,ccn3))
+            tmp_CCN5 = np.hstack((tmp_CCN5,ccn5))
+        
+        ccna_m.append(tmp_CCN3)
+        ccnb_m.append(tmp_CCN5)
+        
+        # get supersaturation
+        SS3 = ccn3_longname.split('=')[-1]
+        SS5 = ccn5_longname.split('=')[-1]
+        
+        
+        
+    #%% calculate diurnal cycle
+    days = np.arange(cday1, cday2+1)
+    
+    time_dc = np.arange(30,1440.,60)
+    ccna_o_dc = np.full((len(time_dc),len(days)),np.nan)
+    ccnb_o_dc = np.full((len(time_dc),len(days)),np.nan)
+    for dd in range(len(days)):
+        for tt in range(len(time_dc)):
+            time_tmp = days[dd]+time_dc[tt]/1440.
+            idx = np.abs(t_ccna-time_tmp).argmin()
+            if (t_ccna[idx]-time_tmp)*1440 <= 30:    
+                ccna_o_dc[tt,dd] = ccna[idx]
+            idx = np.abs(t_ccnb-time_tmp).argmin()
+            if (t_ccnb[idx]-time_tmp)*1440 <= 30:    
+                ccnb_o_dc[tt,dd] = ccnb[idx]
+    ccna_o_dc = np.nanmean(ccna_o_dc,1)
+    ccnb_o_dc = np.nanmean(ccnb_o_dc,1)
+    
+    # for E3SM data
+    ccna_m_dc = []
+    ccnb_m_dc = []
+    for mm in range(nmodels):
+        tmp_ccna = np.full((24,len(days)),np.nan)
+        tmp_ccnb = np.full((24,len(days)),np.nan)
+        for dd in range(len(days)):
+            idx=np.logical_and(timem>=days[dd], timem<days[dd]+1)
+            tmp_ccna[:,dd] = ccna_m[mm][idx]
+            tmp_ccnb[:,dd] = ccnb_m[mm][idx]
+        ccna_m_dc.append(np.nanmean(tmp_ccna,1))
+        ccnb_m_dc.append(np.nanmean(tmp_ccnb,1))
+        
+    #%% make plot
+        
+    figname = figpath_sfc_timeseries+'diurnalcycle_CCN_'+campaign+'_'+IOP+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(6,4))   # figsize in inches
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+    
+    ax1.plot(time_dc/60,ccna_o_dc,color='k',linewidth=1,label='Obs')
+    for mm in range(nmodels):
+        ax1.plot(time_dc/60, ccna_m_dc[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax1.tick_params(color='k',labelsize=12)
+    # ylim1 = ax1.get_ylim()
+    
+    ax2.plot(time_dc/60,ccnb_o_dc,color='k',linewidth=1,label='Obs')
+    for mm in range(nmodels):
+        ax2.plot(time_dc/60, ccnb_m_dc[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax2.tick_params(color='k',labelsize=12)
+    # ylim2 = ax2.get_ylim()
+    
+    # ax1.set_yticks([10,100,1000,10000,100000])
+    # ax2.set_yticks([10,100,1000,10000,100000])
+    ax1.set_xlim(0,24)
+    ax2.set_xlim(0,24)
+    ax1.set_xticks(np.arange(0,24,3))
+    ax2.set_xticks(np.arange(0,24,3))
+    
+    # set ylimit consistent in subplots
+    # ax1.set_yscale('log')
+    # ax2.set_yscale('log')
+    # ax1.set_ylim([ylim1[0], ylim2[1]])
+    # ax2.set_ylim([ylim1[0], ylim2[1]])
+    
+    # supersaturation
+    fig.text(0.1,0.92,'SS_obs='+format(np.nanmean(SSa),'.2f')+'%, SS_model='+SS3)
+    fig.text(0.1,0.42,'SS_obs='+format(np.nanmean(SSb),'.2f')+'%, SS_model='+SS5)
+    
+    ax1.legend(loc='center right', shadow=False, fontsize='medium',bbox_to_anchor=(1.3, .5))
+    ax2.legend(loc='center right', shadow=False, fontsize='medium',bbox_to_anchor=(1.3, .5))
+    
+    ax2.set_xlabel('Hour (UTC)',fontsize=12)
+    ax1.set_title('CCN Number Concentration (cm$^{-3}$) '+campaign+' '+IOP,fontsize=14)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
diff --git a/src/esmac_diags/plotting/plot_sfc_diurnalcycle_CN.py b/src/esmac_diags/plotting/plot_sfc_diurnalcycle_CN.py
new file mode 100644
index 0000000..1255911
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_sfc_diurnalcycle_CN.py
@@ -0,0 +1,216 @@
+"""
+# plot surface diurnal cycle of aerosol size distribution
+# compare models and surface measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import yyyymmdd2cday,cday2mmdd
+from ..subroutines.read_ARMdata import read_cpc
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.specific_data_treatment import  avg_time_1d
+from ..subroutines.quality_control import qc_mask_qcflag_cpc
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    cpcsfcpath = settings['cpcsfcpath']
+    cpcusfcpath = settings['cpcusfcpath']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+    figpath_sfc_timeseries = settings['figpath_sfc_timeseries']
+
+    IOP = settings.get('IOP', None)
+
+    #%% other settings
+    
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date,'noleap')
+    cday2 = yyyymmdd2cday(end_date,'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+        
+    if not os.path.exists(figpath_sfc_timeseries):
+        os.makedirs(figpath_sfc_timeseries)
+        
+    #%% read in obs data
+    if campaign=='ACEENA':
+        # cpc
+        if IOP=='IOP1':
+            lst = glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.2017062*')+glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201707*')
+        elif IOP=='IOP2':
+            lst = glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201801*')+glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201802*')
+        lst.sort()
+        t_cpc=np.empty(0)
+        cpc=np.empty(0)
+        for filename in lst:
+            (time,data,qc,timeunit,cpcunit)=read_cpc(filename)
+            data=qc_mask_qcflag_cpc(data,qc)
+            timestr=timeunit.split(' ')
+            date=timestr[2]
+            cday=yyyymmdd2cday(date,'noleap')
+            # average in time for better comparison with obs
+            time2=np.arange(0,86400,3600)
+            data2 = avg_time_1d(np.array(time),np.array(data),time2)
+            t_cpc=np.hstack((t_cpc, cday+time2/86400))
+            cpc=np.hstack((cpc, data2))
+        # no cpcu
+        t_cpcu = np.array([np.nan])
+        cpcu = np.array([np.nan])
+        
+    elif campaign=='HISCALE':  
+        # cpc
+        if IOP=='IOP1':
+            lst = glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201604*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201605*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201606*')
+        elif IOP=='IOP2':
+            lst = glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201608*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201609*')
+        lst.sort()
+        t_cpc=np.empty(0)
+        cpc=np.empty(0)
+        if len(lst)==0:
+            t_cpc = np.array([np.nan])
+            cpc = np.array([np.nan])
+        else:
+            for filename in lst:
+                (time,data,qc,timeunit,cpcunit)=read_cpc(filename)
+                data=qc_mask_qcflag_cpc(data,qc)
+                timestr=timeunit.split(' ')
+                date=timestr[2]
+                cday=yyyymmdd2cday(date,'noleap')
+                # average in time for better comparison with obs
+                time2=np.arange(0,86400,3600)
+                data2 = avg_time_1d(np.array(time),np.array(data),time2)
+                t_cpc=np.hstack((t_cpc, cday+time2/86400))
+                cpc=np.hstack((cpc, data2))
+      
+        # cpcu
+        if IOP=='IOP1':
+            lst = glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201604*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201605*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201606*')
+        elif IOP=='IOP2':
+            lst = glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201608*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201609*')
+        lst.sort()
+        t_cpcu=np.empty(0)
+        cpcu=np.empty(0)
+        if len(lst)==0:
+            t_cpcu = np.array([np.nan])
+            cpcu = np.array([np.nan])
+        else:
+            for filename in lst:
+                (time,data,qc,timeunit,cpcuunit)=read_cpc(filename)
+                data=qc_mask_qcflag_cpc(data,qc)
+                timestr=timeunit.split(' ')
+                date=timestr[2]
+                cday=yyyymmdd2cday(date,'noleap')
+                # average in time for better comparison with obs
+                time2=np.arange(0,86400,3600)
+                data2 = avg_time_1d(np.array(time),np.array(data),time2)
+                t_cpcu=np.hstack((t_cpcu, cday+time2/86400))
+                cpcu=np.hstack((cpcu, data2))
+        
+    #%% read in models
+    ncn_m = []
+    nucn_m = []
+    nmodels = len(Model_List)
+    for mm in range(nmodels):
+        tmp_ncn=np.empty(0)
+        tmp_nucn=np.empty(0)
+        timem=np.empty(0)
+        for cday in range(cday1,cday2+1):
+            mmdd=cday2mmdd(cday)
+            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
+            
+            filename_input = E3SM_sfc_path+'SFC_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            (time,ncn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCN')
+            (time,nucn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NUCN')
+            
+            timem = np.hstack((timem,time))
+            tmp_ncn = np.hstack((tmp_ncn,ncn*1e-6))
+            tmp_nucn = np.hstack((tmp_nucn,nucn*1e-6))
+        
+        ncn_m.append(tmp_ncn)
+        nucn_m.append(tmp_nucn)
+        
+    #%% calculate diurnal cycle
+    days = np.arange(cday1,cday2+1)
+    
+    time_dc = np.arange(30,1440.,60)
+    cpc_o_dc = np.full((len(time_dc),len(days)),np.nan)
+    cpcu_o_dc = np.full((len(time_dc),len(days)),np.nan)
+    if len(cpc)>1: # not NaN
+        for dd in range(len(days)):
+            for tt in range(len(time_dc)):
+                time_tmp = days[dd]+time_dc[tt]/1440.
+                idx = np.abs(t_cpc-time_tmp).argmin()
+                if (t_cpc[idx]-time_tmp)*1440 <= 30:    
+                    cpc_o_dc[tt,dd] = cpc[idx]
+    if len(cpcu)>1:
+        for dd in range(len(days)):
+            for tt in range(len(time_dc)):
+                time_tmp = days[dd]+time_dc[tt]/1440.
+                idx = np.abs(t_cpcu-time_tmp).argmin()
+                if (t_cpcu[idx]-time_tmp)*1440 <= 30:    
+                    cpcu_o_dc[tt,dd] = cpcu[idx]
+    cpc_o_dc = np.nanmean(cpc_o_dc,1)
+    cpcu_o_dc = np.nanmean(cpcu_o_dc,1)
+    
+    # for E3SM data
+    ncn_m_dc = []
+    nucn_m_dc = []
+    for mm in range(nmodels):
+        tmp_ncn = np.full((24,len(days)),np.nan)
+        tmp_nucn = np.full((24,len(days)),np.nan)
+        for dd in range(len(days)):
+            idx=np.logical_and(timem>=days[dd], timem<days[dd]+1)
+            tmp_ncn[:,dd] = ncn_m[mm][idx]
+            tmp_nucn[:,dd] = nucn_m[mm][idx]
+        ncn_m_dc.append(np.nanmean(tmp_ncn,1))
+        nucn_m_dc.append(np.nanmean(tmp_nucn,1))
+        
+    #%% make plot
+        
+    figname = figpath_sfc_timeseries+'diurnalcycle_CN_'+campaign+'_'+IOP+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(6,4))   # figsize in inches
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+    
+    ax1.plot(time_dc/60,cpc_o_dc,color='k',linewidth=1,label='CPC(>10nm)')
+    for mm in range(nmodels):
+        ax1.plot(time_dc/60, ncn_m_dc[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax1.tick_params(color='k',labelsize=12)
+    # ylim1 = ax1.get_ylim()
+    
+    ax2.plot(time_dc/60,cpcu_o_dc,color='k',linewidth=1,label='CPC(>3nm)')
+    for mm in range(nmodels):
+        ax2.plot(time_dc/60, nucn_m_dc[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax2.tick_params(color='k',labelsize=12)
+    # ylim2 = ax2.get_ylim()
+    
+    ax1.set_xlim(0,24)
+    ax2.set_xlim(0,24)
+    ax1.set_xticks(np.arange(0,24,3))
+    ax2.set_xticks(np.arange(0,24,3))
+    
+    # set ylimit consistent in subplots
+    # ax1.set_yticks([10,100,1000,10000,100000])
+    # ax2.set_yticks([10,100,1000,10000,100000])
+    # ax1.set_yscale('log')
+    # ax2.set_yscale('log')
+    # ax1.set_ylim([ylim1[0], ylim2[1]])
+    # ax2.set_ylim([ylim1[0], ylim2[1]])
+    
+    
+    ax1.legend(loc='center right', shadow=False, fontsize='medium',bbox_to_anchor=(1.3, .5))
+    ax2.legend(loc='center right', shadow=False, fontsize='medium',bbox_to_anchor=(1.3, .5))
+    
+    ax2.set_xlabel('Hour (UTC)',fontsize=12)
+    ax1.set_title('Aerosol Number Concentration (cm$^{-3}$) '+campaign+' '+IOP,fontsize=14)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_sfc_pdf_AerosolSize.py b/src/esmac_diags/plotting/plot_sfc_pdf_AerosolSize.py
new file mode 100644
index 0000000..0085fbd
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_sfc_pdf_AerosolSize.py
@@ -0,0 +1,220 @@
+"""
+# plot mean aerosol size ditribution for surface data
+# compare models and surface measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import yyyymmdd2cday, cday2mmdd
+from ..subroutines.read_surface import read_smpsb_pnnl,read_smps_bin
+from ..subroutines.read_ARMdata import read_uhsas, read_smps_bnl
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.specific_data_treatment import  avg_time_2d
+from ..subroutines.quality_control import qc_mask_qcflag,qc_correction_nanosmps
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+    figpath_sfc_statistics = settings['figpath_sfc_statistics']
+
+    if campaign=='ACEENA':
+        IOP = settings['IOP']
+        uhsassfcpath = settings['uhsassfcpath']
+    elif campaign=='HISCALE':
+        IOP = settings['IOP']
+        if IOP=='IOP1':
+            smps_bnl_path = settings['smps_bnl_path']
+            nanosmps_bnl_path = settings['nanosmps_bnl_path']
+        elif IOP=='IOP2':
+            smps_pnnl_path = settings['smps_pnnl_path']
+    else:
+        raise ValueError('campaign name is not recognized: '+campaign)
+        
+    #%% other settings
+        
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date,'noleap')
+    cday2 = yyyymmdd2cday(end_date,'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+    
+    if not os.path.exists(figpath_sfc_statistics):
+        os.makedirs(figpath_sfc_statistics)
+            
+    #%% read in obs data
+    if campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.2017062*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201707*')
+        elif IOP=='IOP2':
+            lst = glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201801*')+glob.glob(uhsassfcpath+'enaaosuhsasC1.a1.201802*')
+        lst.sort()
+        t_uhsas=np.empty(0)
+        uhsas=np.empty((0,99))
+        for filename in lst:
+            (time,dmin,dmax,data,timeunit,dataunit,long_name) = read_uhsas(filename)
+            timestr=timeunit.split(' ')
+            date=timestr[2]
+            cday=yyyymmdd2cday(date,'noleap')
+            # average in time for quicker plot
+            time2=np.arange(1800,86400,3600)
+            data2 = avg_time_2d(time,data,time2)
+            t_uhsas=np.hstack((t_uhsas, cday+time2/86400))
+            uhsas=np.vstack((uhsas, data2))
+        size_u = (dmin+dmax)/2
+        # change to dN/dlogDp
+        dlnDp_u=np.empty(99)
+        for bb in range(len(size_u)):
+            dlnDp_u[bb]=np.log10(dmax[bb]/dmin[bb])
+            uhsas[:,bb]=uhsas[:,bb]/dlnDp_u[bb]
+        
+        time = np.array(t_uhsas)
+        size = np.array(size_u)
+        obs = np.array(uhsas.T)
+        
+    elif campaign=='HISCALE':    
+        if IOP=='IOP1':
+            lst = glob.glob(smps_bnl_path+'*.nc')
+            lst.sort()
+            t_smps=np.empty(0)
+            smps=np.empty((0,192))
+            for filename in lst:
+                (time,size,flag,timeunit,dataunit,smps_longname)=read_smps_bnl(filename,'status_flag')
+                (time,size,data,timeunit,smpsunit,smps_longname)=read_smps_bnl(filename,'number_size_distribution')
+                data=qc_mask_qcflag(data,flag)
+                timestr=timeunit.split(' ')
+                date=timestr[2]
+                cday=yyyymmdd2cday(date,'noleap')
+                # average in time for quicker plot
+                time2=np.arange(1800,86400,3600)
+                data2 = avg_time_2d(time,data,time2)
+                t_smps=np.hstack((t_smps, cday+time2/86400))
+                smps=np.vstack((smps, data2))
+            smps=smps.T
+            # combine with nanoSMPS
+            lst2 = glob.glob(nanosmps_bnl_path+'*.nc')
+            lst2.sort()
+            t_nano=np.empty(0)
+            nanosmps=np.empty((0,192))
+            for filename2 in lst2:
+                (timen,sizen,flagn,timenunit,datanunit,long_name)=read_smps_bnl(filename2,'status_flag')
+                (timen,sizen,datan,timenunit,nanounit,nanoname)=read_smps_bnl(filename2,'number_size_distribution')
+                datan=qc_mask_qcflag(datan,flagn)
+                timestr=timenunit.split(' ')
+                date=timestr[2]
+                cday=yyyymmdd2cday(date,'noleap')
+                # average in time for quicker plot
+                time2=np.arange(1800,86400,3600)
+                data2 = avg_time_2d(timen,datan,time2)
+                t_nano=np.hstack((t_nano, cday+time2/86400))
+                nanosmps=np.vstack((nanosmps, data2))
+            # nanosmps is overcounting, adjust nanosmps value for smooth transition to SMPS
+            nanosmps=qc_correction_nanosmps(nanosmps.T)
+            for tt in range(smps.shape[1]):
+                if any(t_nano==t_smps[tt]):
+                    smps[0:80,tt]=nanosmps[0:80,t_nano==t_smps[tt]].reshape(80)
+            
+        elif IOP=='IOP2':
+            data=read_smpsb_pnnl(smps_pnnl_path+'HiScaleSMPSb_SGP_20160827_R1.ict')
+            size=read_smps_bin(smps_pnnl_path+'NSD_column_size_chart.txt')
+            time=data[0,:]
+            smps=data[1:-1,:]
+            flag=data[-1,:]
+            smps=qc_mask_qcflag(smps.T,flag).T
+            cday=yyyymmdd2cday('2016-08-27')
+            # average in time for quicker plot
+            time2=np.arange(time[0],time[-1]+1800,3600)
+            data2 = avg_time_2d(time,smps.T,time2)
+            t_smps=cday+time2/86400
+            smps=data2.T
+            
+        time = np.array(t_smps)
+        size = np.array(size)
+        obs = np.array(smps)  
+        
+        # SMPS is already divided by log10
+        
+    else:
+        raise ValueError('does not recognize this campaign: '+campaign)
+        
+    #%% read in models
+    model = []
+    nmodels = len(Model_List)
+    for mm in range(nmodels):
+        tmp_data=np.empty((3000,0))
+        timem=np.empty(0)
+        for cday in range(cday1,cday2+1):
+            mmdd=cday2mmdd(cday)
+            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
+            
+            filename_input = E3SM_sfc_path+'SFC_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            (time,ncn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCNall')
+            
+            timem = np.hstack((timem,time))
+            tmp_data = np.hstack((tmp_data,ncn*1e-6))
+        
+        # change to dN/dlog10Dp
+        for bb in range(3000):
+            dlnDp=np.log10((bb+2)/(bb+1))
+            tmp_data[bb,:]=tmp_data[bb,:]/dlnDp
+        
+        model.append(tmp_data)
+            
+    #%% calculate mean pdf
+    pdf_obs=np.nanmean(obs,1)
+    pdf_model=[None]*nmodels
+    for mm in range(nmodels):
+        pdf_model[mm]=np.nanmean(model[mm],1)
+    
+    #%%
+    pct1_o = [np.nanpercentile(obs[i,:],10) for i in range(len(size))]
+    pct2_o = [np.nanpercentile(obs[i,:],90) for i in range(len(size))]
+    pct1_m = [[] for mm in range(nmodels)]
+    pct2_m = [[] for mm in range(nmodels)]
+    for mm in range(nmodels):
+        pct1_m[mm] = [np.nanpercentile(model[mm][i,:],10) for i in range(3000)]
+        pct2_m[mm] = [np.nanpercentile(model[mm][i,:],90) for i in range(3000)]
+    
+    # import scipy.stats as stats
+    # sem_o = np.ma.filled(stats.sem(obs,1,nan_policy='omit'),np.nan)
+    # sem_m = [[] for mm in range(nmodels)]
+    # for mm in range(nmodels):
+    #     sem_m[mm] = np.ma.filled(stats.sem(model[mm],1,nan_policy='omit'),np.nan)
+    
+    #%% make plot
+    # not plotting data if the mean value is 0
+    pdf_obs[pdf_obs==0] = np.nan
+    
+    figname = figpath_sfc_statistics+'pdf_AerosolSize_'+campaign+'_'+IOP+'.png'
+    
+    print('plotting figures to '+figname)
+    
+    fig,ax = plt.subplots(figsize=(4,2.5))   # figsize in inches
+    
+    ax.plot(size,pdf_obs,color='k',label='Obs')
+    for mm in range(nmodels):
+        ax.plot(np.arange(1,3001),pdf_model[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    
+    ax.fill_between(size,pct1_o,pct2_o, alpha=0.5, facecolor='gray')
+    for mm in range(nmodels):
+        ax.fill_between(np.arange(1,3001),pct1_m[mm],pct2_m[mm], alpha=0.2, facecolor=color_model[mm])
+    
+    ax.legend(loc='upper right', shadow=False, fontsize='medium')
+    ax.tick_params(color='k',labelsize=12)
+    ax.set_xscale('log')
+    ax.set_yscale('log')
+    ax.set_ylim(0.01,1e4)
+    ax.set_xlim(0.67,4500)
+    ax.set_xlabel('Diameter (nm)',fontsize=13)
+    ax.set_ylabel('#/dlnDp (cm$^{-3}$)',fontsize=13)
+    ax.set_title(campaign+' '+IOP,fontsize=14)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    # plt.close()
diff --git a/src/esmac_diags/plotting/plot_sfc_pie_AerosolComposition.py b/src/esmac_diags/plotting/plot_sfc_pie_AerosolComposition.py
new file mode 100644
index 0000000..13fa642
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_sfc_pie_AerosolComposition.py
@@ -0,0 +1,180 @@
+"""
+# plot surface aerosol composition in a pie plot
+# plot models and surface measurements separately
+"""
+
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import yyyymmdd2cday,cday2mmdd
+from ..subroutines.read_ARMdata import read_acsm
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.quality_control import qc_remove_neg,qc_acsm_org_max
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    acsmpath = settings['acsmpath']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+    figpath_sfc_statistics = settings['figpath_sfc_statistics']
+
+    IOP = settings.get('IOP', None)
+
+    #%% other settings
+    
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date,'noleap')
+    cday2 = yyyymmdd2cday(end_date,'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+        
+    import os
+    if not os.path.exists(figpath_sfc_statistics):
+        os.makedirs(figpath_sfc_statistics)
+        
+                
+    #%% read in obs data
+    if campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst = glob.glob(acsmpath+'enaaosacsmC1.a1.201706*') + glob.glob(acsmpath+'enaaosacsmC1.a1.201707*')
+        elif IOP=='IOP2':
+            lst = glob.glob(acsmpath+'enaaosacsmC1.a1.201801*') + glob.glob(acsmpath+'enaaosacsmC1.a1.201802*')
+        lst.sort()
+    elif campaign=='HISCALE':  
+        if IOP=='IOP1':
+            lst = glob.glob(acsmpath+'sgpaosacsmC1.b1.201604*') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201605*') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201606*')
+        elif IOP=='IOP2':
+            lst = glob.glob(acsmpath+'sgpaosacsmC1.b1.201608*.cdf') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201609*.cdf')
+        lst.sort()
+    else:
+        raise ValueError('surface aerosol composition is only available in HISCALE or ACEENA. check: '+campaign)
+        
+    t_obs=np.empty(0)
+    so4_obs=np.empty(0)
+    org_obs=np.empty(0)
+    nh4_obs=np.empty(0)
+    no3_obs=np.empty(0)
+    chl_obs=np.empty(0)
+    for filename in lst:
+        (times_obs,so4sfc,timeunit,so4sfcunit)=read_acsm(filename,'sulfate')
+        (times_obs,orgsfc,timeunit,orgsfcunit)=read_acsm(filename,'total_organics')
+        (times_obs,nh4sfc,timeunit,nh4sfcunit)=read_acsm(filename,'ammonium')
+        (times_obs,no3sfc,timeunit,no3sfcunit)=read_acsm(filename,'nitrate')
+        (times_obs,chlsfc,timeunit,chlsfcunit)=read_acsm(filename,'chloride')
+        timestr=timeunit.split(' ')
+        date=timestr[2]
+        cday=yyyymmdd2cday(date,'noleap')
+        so4_obs=np.hstack((so4_obs, so4sfc))
+        org_obs=np.hstack((org_obs, orgsfc))
+        nh4_obs=np.hstack((nh4_obs, nh4sfc))
+        no3_obs=np.hstack((no3_obs, no3sfc))
+        chl_obs=np.hstack((chl_obs, chlsfc))
+    so4_obs=qc_remove_neg(so4_obs)
+    nh4_obs=qc_remove_neg(nh4_obs)
+    no3_obs=qc_remove_neg(no3_obs)
+    chl_obs=qc_remove_neg(chl_obs)
+    org_obs=qc_remove_neg(org_obs)
+    org_obs=qc_acsm_org_max(org_obs)
+    
+    #%% read in models
+    nmodels = len(Model_List)
+    model_org = list()
+    model_so4 = list()
+    model_bc = list()
+    model_dst = list()
+    model_ncl = list()
+    
+    for mm in range(nmodels):
+        bcvarname=['bc_a1','bc_a3','bc_a4']
+        dstvarname=['dst_a1','dst_a3']
+        nclvarname=['ncl_a1','ncl_a2','ncl_a3']
+        so4varname=['so4_a1','so4_a2','so4_a3']
+        orgvarname=['soa_a1','soa_a2','soa_a3','pom_a1','pom_a3','pom_a4',\
+                    'mom_a1','mom_a2','mom_a3','mom_a4']
+        if Model_List[mm]=='NucSoaCond':
+            so4varname.append('so4_a5')
+            orgvarname.append('soa_a5')
+    
+        timem2 = np.array([])
+        tmp_so4 = np.empty(0)
+        tmp_org = np.empty(0)
+        tmp_bc = np.empty(0)
+        tmp_dst = np.empty(0)
+        tmp_ncl = np.empty(0)
+        ps = np.empty(0)
+        ts = np.empty(0)
+        for cday in range(cday1,cday2+1):
+            mmdd=cday2mmdd(cday)
+            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
+            filename_input = E3SM_sfc_path+'SFC_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            
+            (timem,so4all,timeunitm,so4unit,so4name)=read_E3SM(filename_input,so4varname)
+            (timem,orgall,timeunitm,orgunit,orgname)=read_E3SM(filename_input,orgvarname)
+            (timem,bcall,timeunitm,bcunit,bcname)=read_E3SM(filename_input,bcvarname)
+            (timem,dstall,timeunitm,dstunit,dstname)=read_E3SM(filename_input,dstvarname)
+            (timem,nclall,timeunitm,nclunit,nclname)=read_E3SM(filename_input,nclvarname)
+            (timem,[psm,tsm],timeunitm,varunit,varlongname)=read_E3SM(filename_input,['PS','T'])
+            
+            tmp_so4 = np.hstack((tmp_so4,sum(so4all)))
+            tmp_org = np.hstack((tmp_org,sum(orgall)))
+            tmp_bc = np.hstack((tmp_bc,sum(bcall)))
+            tmp_dst = np.hstack((tmp_dst,sum(dstall)))
+            tmp_ncl = np.hstack((tmp_ncl,sum(nclall)))
+            ps = np.hstack((ps,psm))
+            ts = np.hstack((ts,tsm))
+            timem2 = np.hstack((timem2,timem))
+        
+        model_so4.append(tmp_so4)
+        model_org.append(tmp_org)
+        model_bc.append(tmp_bc)
+        model_dst.append(tmp_dst)
+        model_ncl.append(tmp_ncl)
+    
+    # change E3SM unit from kg/kg to ug/m3 
+    rho = ps/287.06/ts
+    
+    for mm in range(nmodels):
+        model_so4[mm]=model_so4[mm]*1e9*rho
+        model_org[mm]=model_org[mm]*1e9*rho
+        model_bc[mm]=model_bc[mm]*1e9*rho
+        model_dst[mm]=model_dst[mm]*1e9*rho
+        model_ncl[mm]=model_ncl[mm]*1e9*rho
+        
+    #%% Pie plot
+    
+    figname = figpath_sfc_statistics+'Pieplot_AerosolComposition_'+campaign+'_'+IOP+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,ax = plt.subplots(1,nmodels+1,figsize=((nmodels+1)*3.5,3.5))   # figsize in inches
+    # colors = ['limegreen', 'red', 'b', 'y', 'orange' ]
+    
+    colors_o = ['limegreen', 'red', 'orange', 'lightblue', 'yellow']
+    labels_o = ['ORG', 'SO4', 'NO3', 'NH4', 'CHL']
+    sizeo = [np.nanmean(org_obs),np.nanmean(so4_obs),np.nanmean(no3_obs),np.nanmean(nh4_obs),np.nanmean(chl_obs)]
+    
+    colors_m = ['limegreen', 'red', 'k', 'silver','gray']
+    labels_m = ['ORG', 'SO4', 'BC', 'DST', 'NCL']
+    sizem = []
+    for mm in range(nmodels):
+        sizem.append([np.mean(model_org[mm]),np.mean(model_so4[mm]),np.mean(model_bc[mm]),np.mean(model_dst[mm]),np.mean(model_ncl[mm])])
+    
+    def absolute_value(val):
+        a=np.round(val*sum(sizeo))/100
+        return a
+    ax[0].pie(sizeo/sum(sizeo),labels=labels_o,colors=colors_o, autopct=absolute_value)  # autopct='%1.1f%%'
+    for mm in range(nmodels):
+        def absolute_valuemm(val):
+            a=np.round(val*sum(sizem[mm]))/100
+            return a
+        ax[mm+1].pie(sizem[mm]/sum(sizem[mm]),labels=labels_m, colors=colors_m, autopct=absolute_valuemm)
+        
+    ax[0].set_title('Obs',fontsize=14)
+    for mm in range(nmodels):
+        ax[mm+1].set_title(Model_List[mm],fontsize=14)
+    fig.text(.5,.15,'unit: $\mu$g/m$^3$')
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_sfc_timeseries_AerosolComposition.py b/src/esmac_diags/plotting/plot_sfc_timeseries_AerosolComposition.py
new file mode 100644
index 0000000..6cb3220
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_sfc_timeseries_AerosolComposition.py
@@ -0,0 +1,149 @@
+"""
+# plot surface timeseries of aerosol composition
+# compare models and surface measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import yyyymmdd2cday,cday2mmdd
+from ..subroutines.read_ARMdata import read_acsm
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.specific_data_treatment import  avg_time_1d
+from ..subroutines.quality_control import qc_remove_neg,qc_acsm_org_max
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    acsmpath = settings['acsmpath']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+    figpath_sfc_timeseries = settings['figpath_sfc_timeseries']
+
+    IOP = settings.get('IOP', None)
+
+    #%% other settings
+    
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date,'noleap')
+    cday2 = yyyymmdd2cday(end_date,'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+        
+    if not os.path.exists(figpath_sfc_timeseries):
+        os.makedirs(figpath_sfc_timeseries)
+        
+    #%% read in obs data
+    if campaign=='ACEENA':
+        if IOP=='IOP1':
+            lst = glob.glob(acsmpath+'enaaosacsmC1.a1.201706*') + glob.glob(acsmpath+'enaaosacsmC1.a1.201707*')
+        elif IOP=='IOP2':
+            lst = glob.glob(acsmpath+'enaaosacsmC1.a1.201801*') + glob.glob(acsmpath+'enaaosacsmC1.a1.201802*')
+        lst.sort()
+    elif campaign=='HISCALE':  
+        if IOP=='IOP1':
+            lst = glob.glob(acsmpath+'sgpaosacsmC1.b1.201604*') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201605*') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201606*')
+        elif IOP=='IOP2':
+            lst = glob.glob(acsmpath+'sgpaosacsmC1.b1.201608*.cdf') + glob.glob(acsmpath+'sgpaosacsmC1.b1.201609*.cdf')
+        lst.sort()
+        
+    t_obs=np.empty(0)
+    so4_obs=np.empty(0)
+    org_obs=np.empty(0)
+    for filename in lst:
+        (times_obs,so4sfc,timeunit,so4sfcunit)=read_acsm(filename,'sulfate')
+        (times_obs,orgsfc,timeunit,orgsfcunit)=read_acsm(filename,'total_organics')
+        timestr=timeunit.split(' ')
+        date=timestr[2]
+        cday=yyyymmdd2cday(date,'noleap')
+        # average in time for quicker plot
+        time2=np.arange(1800,86400,3600)
+        so42 = avg_time_1d(np.array(times_obs),np.array(so4sfc),time2)
+        org2 = avg_time_1d(np.array(times_obs),np.array(orgsfc),time2)
+        t_obs=np.hstack((t_obs, cday+time2/86400))
+        so4_obs=np.hstack((so4_obs, so42))
+        org_obs=np.hstack((org_obs, org2))
+    so4_obs=qc_remove_neg(so4_obs)
+    org_obs=qc_remove_neg(org_obs)
+    org_obs=qc_acsm_org_max(org_obs)
+        
+    
+    #%% read in models
+    nmodels = len(Model_List)
+    model_org = list()
+    model_so4 = list()
+    
+    for mm in range(nmodels):
+        so4varname=['so4_a1','so4_a2','so4_a3']
+        orgvarname=['soa_a1','soa_a2','soa_a3','pom_a1','pom_a3','pom_a4',\
+                    'mom_a1','mom_a2','mom_a3','mom_a4']
+        if Model_List[mm]=='NucSoaCond':
+            so4varname.append('so4_a5')
+            orgvarname.append('soa_a5')
+    
+        timem2 = np.array([])
+        tmp_so4 = np.empty(0)
+        tmp_org = np.empty(0)
+        ps = np.empty(0)
+        ts = np.empty(0)
+        for cday in range(cday1,cday2+1):
+            mmdd=cday2mmdd(cday)
+            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
+            filename_input = E3SM_sfc_path+'SFC_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            
+            (timem,so4all,timeunitm,so4unit,so4name)=read_E3SM(filename_input,so4varname)
+            (timem,orgall,timeunitm,orgunit,orgname)=read_E3SM(filename_input,orgvarname)
+            (timem,[psm,tsm],timeunitm,varunit,varlongname)=read_E3SM(filename_input,['PS','T'])
+            
+            tmp_so4 = np.hstack((tmp_so4,sum(so4all)))
+            tmp_org = np.hstack((tmp_org,sum(orgall)))
+            ps = np.hstack((ps,psm))
+            ts = np.hstack((ts,tsm))
+            timem2 = np.hstack((timem2,timem))
+        
+        model_org.append(tmp_org)
+        model_so4.append(tmp_so4)
+    
+    # change E3SM unit from kg/kg to ug/m3 
+    rho = ps/287.06/ts
+    
+    for mm in range(nmodels):
+        model_so4[mm]=model_so4[mm]*1e9*rho
+        model_org[mm]=model_org[mm]*1e9*rho
+    
+    #%% make plot
+        
+    figname = figpath_sfc_timeseries+'timeseries_AerosolComposition_'+campaign+'_'+IOP+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=0.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+    
+    ax1.plot(t_obs,so4_obs,color='k',linewidth=1,label='OBS (SO4)')
+    for mm in range(nmodels):
+        ax1.plot(timem2, model_so4[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    # ax1.set_yscale('log')
+    ax1.tick_params(color='k',labelsize=12)
+    
+    ax2.plot(t_obs,org_obs,color='k',linewidth=1,label='OBS (ORG)')
+    for mm in range(nmodels):
+        ax2.plot(timem2, model_org[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    # ax2.set_yscale('log')
+    ax2.tick_params(color='k',labelsize=12)
+    
+    ax1.set_xlim(cday1,cday2)
+    ax2.set_xlim(cday1,cday2)
+    
+    ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
+    ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
+    
+    ax2.set_xlabel('Calendar Day',fontsize=14)
+    
+    ax1.set_title('Aerosol Sulfate and Organic Concentration ($\mu$g/m$^3$) '+campaign+' '+IOP,fontsize=14)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
diff --git a/src/esmac_diags/plotting/plot_sfc_timeseries_CCN.py b/src/esmac_diags/plotting/plot_sfc_timeseries_CCN.py
new file mode 100644
index 0000000..49ad30a
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_sfc_timeseries_CCN.py
@@ -0,0 +1,168 @@
+"""
+# plot surface timeseries of CCN size distribution
+# compare models and surface measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import yyyymmdd2cday,cday2mmdd
+from ..subroutines.read_ARMdata import read_ccn
+from ..subroutines.read_surface import read_CCN_hiscale_IOP1, read_CCN_hiscale_IOP2
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.quality_control import qc_remove_neg,qc_mask_qcflag,qc_ccn_max
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    ccnsfcpath = settings['ccnsfcpath']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+    figpath_sfc_timeseries = settings['figpath_sfc_timeseries']
+
+    IOP = settings.get('IOP', None)
+
+    #%% other settings
+    
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date,'noleap')
+    cday2 = yyyymmdd2cday(end_date,'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+        
+    if not os.path.exists(figpath_sfc_timeseries):
+        os.makedirs(figpath_sfc_timeseries)
+        
+        
+    #%% read in obs data
+    if campaign=='ACEENA':
+        # cpc
+        if IOP=='IOP1':
+            lst = glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201706*')+glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201707*')
+        elif IOP=='IOP2':
+            lst = glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201801*')+glob.glob(ccnsfcpath+'enaaosccn1colavgC1.b1.201802*')
+        lst.sort()
+        t_ccn=np.empty(0)
+        ccn=np.empty(0)
+        SS=np.empty(0)
+        for filename in lst:
+            (time,timeunit,data,qc,dataunit,SS0)=read_ccn(filename)
+            data=qc_mask_qcflag(data,qc)
+            timestr=timeunit.split(' ')
+            date=timestr[2]
+            cday=yyyymmdd2cday(date,'noleap')
+            t_ccn=np.hstack((t_ccn, cday+time/86400))
+            ccn=np.hstack((ccn, data))
+            SS=np.hstack((SS, SS0))
+        ccn=qc_remove_neg(ccn)
+        ccn=qc_ccn_max(ccn,SS)
+        # SS=0.1%
+        idx = np.logical_and(SS>0.05, SS<0.15)
+        t_ccna = t_ccn[idx]
+        ccna = ccn[idx]
+        SSa = 0.1
+        # SS=0.5%
+        idx = np.logical_and(SS>0.4, SS<0.6)
+        t_ccnb = t_ccn[idx]
+        ccnb = ccn[idx]
+        SSb = 0.5
+    
+    elif campaign=='HISCALE':  
+        if IOP=='IOP1':
+            (times_ccn,ccnsfc,sssfc,timeunit)=read_CCN_hiscale_IOP1(ccnsfcpath)
+            sssfc=[int(x*10) for x in sssfc]
+            sssfc=np.array(sssfc)/10.
+            times_ccn=np.array(times_ccn)
+            ccnsfc=np.array(ccnsfc)
+        elif IOP=='IOP2':
+            (times_ccn,ccnsfc,sssfc,timeunit)=read_CCN_hiscale_IOP2(ccnsfcpath)
+            sssfc=[int(x*10) for x in sssfc]
+            sssfc=np.array(sssfc)/10.
+            times_ccn=np.array(times_ccn)
+            ccnsfc=np.array(ccnsfc)
+        # find the nearest Supersaturation in Obs comparing to model
+        # 0.1%
+        idx = sssfc==0.1
+        ccna = ccnsfc[idx]
+        t_ccna = times_ccn[idx]
+        SSa = 0.1
+        # 0.5%
+        idx = sssfc==0.5
+        ccnb = ccnsfc[idx]
+        t_ccnb = times_ccn[idx]
+        SSb = 0.5
+        
+    #%% read in models
+    ccna_m = []
+    ccnb_m = []
+    nmodels = len(Model_List)
+    for mm in range(nmodels):
+        tmp_CCN3=np.empty(0)
+        tmp_CCN5=np.empty(0)
+        timem=np.empty(0)
+        for cday in range(cday1,cday2+1):
+            mmdd=cday2mmdd(cday)
+            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
+            
+            filename_input = E3SM_sfc_path+'SFC_vars_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            (time,ccn3,timemunit,dataunit,ccn3_longname)=read_E3SM(filename_input,'CCN3')
+            (time,ccn5,timemunit,dataunit,ccn5_longname)=read_E3SM(filename_input,'CCN5')
+            
+            timem = np.hstack((timem,time))
+            tmp_CCN3 = np.hstack((tmp_CCN3,ccn3))
+            tmp_CCN5 = np.hstack((tmp_CCN5,ccn5))
+        
+        ccna_m.append(tmp_CCN3)
+        ccnb_m.append(tmp_CCN5)
+        
+        # get supersaturation
+        SS3 = ccn3_longname.split('=')[-1]
+        SS5 = ccn5_longname.split('=')[-1]
+        
+    #%% make plot
+        
+    figname = figpath_sfc_timeseries+'timeseries_CCN_'+campaign+'_'+IOP+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+    
+    ax1.plot(t_ccna,ccna,color='k',linewidth=1,label='Obs')
+    for mm in range(nmodels):
+        ax1.plot(timem, ccna_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax1.set_yscale('log')
+    ax1.tick_params(color='k',labelsize=12)
+    ylim1 = ax1.get_ylim()
+    
+    ax2.plot(t_ccnb,ccnb,color='k',linewidth=1,label='Obs')
+    for mm in range(nmodels):
+        ax2.plot(timem, ccnb_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax2.set_yscale('log')
+    ax2.tick_params(color='k',labelsize=12)
+    ylim2 = ax2.get_ylim()
+    
+    # ax1.set_yticks([10,100,1000,10000,100000])
+    # ax2.set_yticks([10,100,1000,10000,100000])
+    ax1.set_xlim(cday1,cday2)
+    ax2.set_xlim(cday1,cday2)
+    
+    # set ylimit consistent in subplots
+    ax1.set_ylim([ylim1[0], ylim2[1]])
+    ax2.set_ylim([ylim1[0], ylim2[1]])
+    
+    # supersaturation
+    fig.text(0.67,0.9,'SS_obs='+format(np.nanmean(SSa),'.2f')+'%, SS_model='+SS3)
+    fig.text(0.67,0.4,'SS_obs='+format(np.nanmean(SSb),'.2f')+'%, SS_model='+SS5)
+    
+    ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
+    ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
+    
+    ax2.set_xlabel('Calendar Day',fontsize=14)
+    ax1.set_title('CCN Number Concentration (cm$^{-3}$) '+campaign+' '+IOP,fontsize=15)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
diff --git a/src/esmac_diags/plotting/plot_sfc_timeseries_CN.py b/src/esmac_diags/plotting/plot_sfc_timeseries_CN.py
new file mode 100644
index 0000000..c5de59d
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_sfc_timeseries_CN.py
@@ -0,0 +1,175 @@
+"""
+# plot surface timeseries of aerosol number concentration
+# compare models and surface measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.time_format_change import yyyymmdd2cday,cday2mmdd
+from ..subroutines.read_ARMdata import read_cpc
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.specific_data_treatment import  avg_time_1d
+from ..subroutines.quality_control import qc_remove_neg,qc_mask_qcflag_cpc
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    cpcsfcpath = settings['cpcsfcpath']
+    cpcusfcpath = settings['cpcusfcpath']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+    figpath_sfc_timeseries = settings['figpath_sfc_timeseries']
+
+    IOP = settings.get('IOP', None)
+
+    #%% other settings
+    
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date,'noleap')
+    cday2 = yyyymmdd2cday(end_date,'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+        
+    if not os.path.exists(figpath_sfc_timeseries):
+        os.makedirs(figpath_sfc_timeseries)
+        
+    #%% read in obs data
+    if campaign=='ACEENA':
+        # cpc
+        if IOP=='IOP1':
+            lst = glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.2017062*')+glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201707*')
+        elif IOP=='IOP2':
+            lst = glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201801*')+glob.glob(cpcsfcpath+'enaaoscpcfC1.b1.201802*')
+        lst.sort()
+        t_cpc=np.empty(0)
+        cpc=np.empty(0)
+        for filename in lst:
+            (time,data,qc,timeunit,cpcunit)=read_cpc(filename)
+            data=qc_mask_qcflag_cpc(data,qc)
+            timestr=timeunit.split(' ')
+            date=timestr[2]
+            cday=yyyymmdd2cday(date,'noleap')
+            # average in time for quicker and clearer plot
+            time2=np.arange(1800,86400,3600)
+            data2 = avg_time_1d(np.array(time),np.array(data),time2)
+            t_cpc=np.hstack((t_cpc, cday+time2/86400))
+            cpc=np.hstack((cpc, data2))
+        # no cpcu
+        t_cpcu = np.array(np.nan)
+        cpcu = np.array(np.nan)
+        
+    elif campaign=='HISCALE':  
+        # cpc
+        if IOP=='IOP1':
+            lst = glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201604*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201605*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201606*')
+        elif IOP=='IOP2':
+            lst = glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201608*')+glob.glob(cpcsfcpath+'sgpaoscpcC1.b1.201609*')
+        lst.sort()
+        t_cpc=np.empty(0)
+        cpc=np.empty(0)
+        if len(lst)==0:
+            t_cpc = np.array(np.nan)
+            cpc = np.array(np.nan)
+        else:
+            for filename in lst:
+                (time,data,qc,timeunit,cpcunit)=read_cpc(filename)
+                data=qc_mask_qcflag_cpc(data,qc)
+                timestr=timeunit.split(' ')
+                date=timestr[2]
+                cday=yyyymmdd2cday(date,'noleap')
+                t_cpc=np.hstack((t_cpc, cday+time/86400))
+                cpc=np.hstack((cpc, data))
+            cpc=qc_remove_neg(cpc)
+      
+        # cpcu
+        if IOP=='IOP1':
+            lst = glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201604*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201605*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201606*')
+        elif IOP=='IOP2':
+            lst = glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201608*')+glob.glob(cpcusfcpath+'sgpaoscpcuS01.b1.201609*')
+        lst.sort()
+        t_cpcu=np.empty(0)
+        cpcu=np.empty(0)
+        if len(lst)==0:
+            t_cpcu = np.array(np.nan)
+            cpcu = np.array(np.nan)
+        else:
+            for filename in lst:
+                (time,data,qc,timeunit,cpcuunit)=read_cpc(filename)
+                data=qc_mask_qcflag_cpc(data,qc)
+                timestr=timeunit.split(' ')
+                date=timestr[2]
+                cday=yyyymmdd2cday(date,'noleap')
+                t_cpcu=np.hstack((t_cpcu, cday+time/86400))
+                cpcu=np.hstack((cpcu, data))
+            cpcu=qc_remove_neg(cpcu)
+        
+    #%% read in models
+    ncn_m = []
+    nucn_m = []
+    nmodels = len(Model_List)
+    for mm in range(nmodels):
+        tmp_ncn=np.empty(0)
+        tmp_nucn=np.empty(0)
+        timem=np.empty(0)
+        for cday in range(cday1,cday2+1):
+            mmdd=cday2mmdd(cday)
+            date=year0+'-'+mmdd[0:2]+'-'+mmdd[2:4]
+            
+            filename_input = E3SM_sfc_path+'SFC_CNsize_'+campaign+'_'+Model_List[mm]+'_'+date+'.nc'
+            (time,ncn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NCN')
+            (time,nucn,timemunit,dataunit,long_name)=read_E3SM(filename_input,'NUCN')
+            
+            timem = np.hstack((timem,time))
+            tmp_ncn = np.hstack((tmp_ncn,ncn*1e-6))
+            tmp_nucn = np.hstack((tmp_nucn,nucn*1e-6))
+        
+        ncn_m.append(tmp_ncn)
+        nucn_m.append(tmp_nucn)
+        
+    
+    #%% make plot
+        
+    figname = figpath_sfc_timeseries+'timeseries_CN_'+campaign+'_'+IOP+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+    
+    ax1.plot(t_cpc,cpc,color='k',linewidth=1,label='CPC(>10nm)')
+    for mm in range(nmodels):
+        ax1.plot(timem, ncn_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    # ax1.set_yscale('log')
+    ax1.tick_params(color='k',labelsize=12)
+    # ylim1 = ax1.get_ylim()
+    
+    ax2.plot(t_cpcu,cpcu,color='k',linewidth=1,label='CPC(>3nm)')
+    for mm in range(nmodels):
+        ax2.plot(timem, nucn_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    # ax2.set_yscale('log')
+    ax2.tick_params(color='k',labelsize=12)
+    # ylim2 = ax2.get_ylim()
+    
+    # ax1.set_yticks([10,100,1000,10000,100000])
+    # ax2.set_yticks([10,100,1000,10000,100000])
+    ax1.set_xlim(cday1,cday2)
+    ax2.set_xlim(cday1,cday2)
+    
+    # # set ylimit consistent in subplots
+    # ax1.set_ylim([ylim1[0], ylim2[1]])
+    # ax2.set_ylim([ylim1[0], ylim2[1]])
+    
+    
+    ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
+    ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.25, .5))
+    
+    ax2.set_xlabel('Calendar Day',fontsize=14)
+    ax1.set_title('Aerosol Number Concentration (cm$^{-3}$) '+campaign+' '+IOP,fontsize=15)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    
diff --git a/src/esmac_diags/plotting/plot_ship_latitude_rainLWP.py b/src/esmac_diags/plotting/plot_ship_latitude_rainLWP.py
new file mode 100644
index 0000000..ba89cf4
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_ship_latitude_rainLWP.py
@@ -0,0 +1,242 @@
+"""
+# plot ship-track meteorological variables binned by different latitudes
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy.stats
+from ..subroutines.read_ship import read_marmet
+from ..subroutines.read_ARMdata import read_mwr, read_met
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.time_format_change import yyyymmdd2cday, cday2mmdd
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    latbin = settings['latbin']
+    shipmetpath = settings['shipmetpath']
+    shipmwrpath = settings['shipmwrpath']
+    E3SM_ship_path = settings['E3SM_ship_path']
+    figpath_ship_statistics = settings['figpath_ship_statistics']
+
+    #%% other settings
+    
+    dlat = latbin[1]-latbin[0]
+    latmin = latbin-dlat/2
+    latmax = latbin+dlat/2
+    latlen = len(latbin)
+    
+    if not os.path.exists(figpath_ship_statistics):
+        os.makedirs(figpath_ship_statistics)
+    
+    
+    #%% read in observation
+    
+    # initialize variables by latitude bins
+    lwp_o = list()
+    rain_o = list()
+    for bb in range(latlen):
+        lwp_o.append(np.empty(0))
+        rain_o.append(np.empty(0))
+    
+    if campaign=='MAGIC':
+        lst = glob.glob(shipmetpath+'marmet*.txt')
+        lst.sort()
+    
+        for ll in range(len(lst)):
+            legnum=lst[ll][-6:-4]
+            
+            # read in MET
+            filenameo = shipmetpath+'marmet'+legnum+'.txt'
+            (shipdata,shipvarlist) = read_marmet(filenameo)        
+            # get variables
+            lat=np.array([float(a[shipvarlist.index('lat')]) for a in shipdata]) 
+            lon=np.array([float(a[shipvarlist.index('lon')]) for a in shipdata]) 
+            rain=np.array([float(a[shipvarlist.index('org')]) for a in shipdata]) 
+            lat[lat==-999]=np.nan
+            lon[lon==-999]=np.nan
+            rain[rain==-999]=np.nan
+            # rain rate in leg 19 are unrealistic. mask all data
+            if legnum=='19':
+                rain=rain*np.nan
+            # separate into latitude bins
+            for bb in range(latlen):
+                idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
+                rain_o[bb]=np.hstack((rain_o[bb],rain[idx]))
+                
+            # read in MWR
+            t_lwp=np.empty(0)
+            lwp=np.empty(0)
+            # find the days related to the ship leg
+            year=[a[1] for a in shipdata]
+            month=[a[2] for a in shipdata]
+            day=[a[3] for a in shipdata]
+            hh=[int(a[4]) for a in shipdata]
+            mm=[int(a[5]) for a in shipdata]
+            ss=[int(a[6]) for a in shipdata]
+            
+            yyyymmdd = [year[i]+month[i]+day[i] for i in range(len(year))]   # yyyymmdd
+            time0 = np.array(hh)/24. + np.array(mm)/1440. + np.array(ss)/86400.
+            time0 = np.array([time0[i] + yyyymmdd2cday(yyyymmdd[i],'noleap') for i in range(len(time0))])
+        
+            sday = [year[a]+month[a]+day[a] for a in range(len(mm))]
+            sday = list(set(sday))
+            sday.sort()
+            for dd in sday:
+                filenameo = glob.glob(shipmwrpath+'magmwrret1liljclouM1.s2.'+dd+'.*')
+                if len(filenameo)==0:
+                    continue  # some days may be missing
+                (time,obs,timeunit,lwpunit,lwpflag)=read_mwr(filenameo[0],'be_lwp')
+                t_lwp=np.hstack((t_lwp, yyyymmdd2cday(dd,'noleap')+time/86400))
+                obs[obs<-9000]=np.nan
+                lwp=np.hstack((lwp, obs))
+            # if no obs available, fill one data with NaN
+            if len(t_lwp)==0:
+                t_lwp=[time0[0],time0[1]]
+                lwp=np.full((2),np.nan)
+            # if time expands two years, add 365 days to the second year
+            if t_lwp[0]>t_lwp[-1]:
+                t_lwp[t_lwp<=t_lwp[-1]]=t_lwp[t_lwp<=t_lwp[-1]]+365
+            lat1=np.interp(t_lwp,time0,lat)
+            lon1=np.interp(t_lwp,time0,lon)
+            # separate into latitude bins
+            for bb in range(latlen):
+                idx = np.logical_and(lat1>=latmin[bb], lat1<latmax[bb])
+                lwp_o[bb]=np.hstack((lwp_o[bb],lwp[idx]))  
+            
+    elif campaign=='MARCUS':
+        
+        startdate='2017-10-30'
+        enddate='2018-03-22'
+        cday1=yyyymmdd2cday(startdate,'noleap')
+        cday2=yyyymmdd2cday(enddate,'noleap')
+        if startdate[0:4]!=enddate[0:4]:
+            cday2=cday2+365  # cover two years
+                
+        for cc in range(cday1,cday2+1):
+            if cc<=365:
+                yyyymmdd=startdate[0:4]+cday2mmdd(cc)
+            else:
+                yyyymmdd=enddate[0:4]+cday2mmdd(cc-365)
+                
+            lst0 = glob.glob(shipmetpath+'maraadmetX1.b1.'+yyyymmdd+'*')
+            if len(lst0)==0:
+                continue
+            (time0,lon,timeunit,lonunit,lon_long_name)=read_met(lst0[0],'lon')
+            (time0,lat,timeunit,lonunit,lon_long_name)=read_met(lst0[0],'lat')
+            
+            lat[lat==-999]=np.nan
+            lon[lon==-999]=np.nan
+            rain=np.array(lon)*np.nan
+            # separate into latitude bins
+            for bb in range(latlen):
+                idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
+                rain_o[bb]=np.hstack((rain_o[bb],rain[idx]))
+            
+            # read in MWR
+            filenameo = glob.glob(shipmwrpath+'marmwrret1liljclouM1.s2.'+yyyymmdd+'.*')
+            if len(filenameo)==0:
+                continue  # some days may be missing
+            (time,lwp,timeunit,lwpunit,lwpflag)=read_mwr(filenameo[0],'be_lwp')
+            lwp[lwp<-9000]=np.nan
+            # if no obs available, fill one data with NaN
+            if len(time)==0:
+                continue
+            
+            lat1=np.interp(time,time0,lat)
+            lon1=np.interp(time,time0,lon)
+            # separate into latitude bins
+            for bb in range(latlen):
+                idx = np.logical_and(lat1>=latmin[bb], lat1<latmax[bb])
+                lwp_o[bb]=np.hstack((lwp_o[bb],lwp[idx]))  
+            
+                
+    #%% read in model
+    nmodels=len(Model_List)
+    lwp_m = list()
+    rain_m = list()
+    for mm in range(nmodels):
+        
+        # initialize variables by latitude bins
+        lwp_tmp = list()
+        rain_tmp = list()
+        for bb in range(latlen):
+            lwp_tmp.append(np.empty(0))
+            rain_tmp.append(np.empty(0))
+            
+        lst = glob.glob(E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg*.nc')
+        lst.sort()   
+        for ll in range(len(lst)):
+            filenamem = lst[ll]
+            (timem,varm,timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['TGCLDLWP','PRECT','lat','lon'])
+            for ii in range(len(varm)):
+                varm[ii][varm[ii]<-9000] = np.nan
+                
+            lat0=varm[2]
+            lon0=varm[3]
+            
+            # separate into latitude bins
+            for bb in range(latlen):
+                idx = np.logical_and(lat0>=latmin[bb], lat0<latmax[bb])
+                lwp_tmp[bb]=np.hstack((lwp_tmp[bb],varm[0][idx]*1000))
+                rain_tmp[bb]=np.hstack((rain_tmp[bb],varm[1][idx]*3600*1000))
+            
+        lwp_m.append(lwp_tmp)
+        rain_m.append(rain_tmp)
+        
+    # change the unit
+    varmunit[0]='g/m2'
+    varmunit[1]='mm/hr'
+    varmlongname[0]='LWP'
+    varmlongname[1]='Rainrate'
+    
+    #%% calculate the mean and standard error for each bin
+    mean_lwp_o = np.array([np.nanmean(a) for a in lwp_o])
+    mean_rain_o = np.array([np.nanmean(a) for a in rain_o])
+    sem_lwp_o = np.array([scipy.stats.sem(a,nan_policy='omit') for a in lwp_o])
+    sem_rain_o = np.array([scipy.stats.sem(a,nan_policy='omit') for a in rain_o])
+    
+    mean_lwp_m = list()
+    mean_rain_m = list()
+    sem_lwp_m = list()
+    sem_rain_m = list()
+    for mm in range(nmodels):
+        mean_lwp_m.append(np.array([np.nanmean(a) for a in lwp_m[mm]]))
+        mean_rain_m.append(np.array([np.nanmean(a) for a in rain_m[mm]]))
+        sem_lwp_m.append(np.array([scipy.stats.sem(a,nan_policy='omit') for a in lwp_m[mm]]))
+        sem_rain_m.append(np.array([scipy.stats.sem(a,nan_policy='omit') for a in rain_m[mm]]))
+    
+    #%% make plot
+        
+    figname = figpath_ship_statistics+'composite_LWPrain_bylat_'+campaign+'_all.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+    
+    ax1.plot(latbin,mean_lwp_o,color='k',linewidth=1,label='OBS')
+    ax1.fill_between(latbin, mean_lwp_o-sem_lwp_o, mean_lwp_o+sem_lwp_o, alpha=0.5, facecolor='gray')
+    for mm in range(nmodels):
+        ax1.plot(latbin, mean_lwp_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax1.tick_params(color='k',labelsize=15)
+    
+    ax2.plot(latbin,mean_rain_o,color='k',linewidth=1,label='OBS') 
+    ax2.fill_between(latbin,mean_rain_o-sem_rain_o, mean_rain_o+sem_rain_o, alpha=0.5, facecolor='gray')
+    for mm in range(nmodels):
+        ax2.plot(latbin, mean_rain_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    ax2.tick_params(color='k',labelsize=15)
+    
+    ax1.set_xticks(np.arange(np.floor(latbin[0]),int(latbin[-1])+1, dlat*2))
+    ax2.set_xticks(np.arange(np.floor(latbin[0]),int(latbin[-1])+1, dlat*2))
+    ax1.set_xticklabels([])
+    ax2.set_xlabel('Latitude',fontsize=16)
+    ax1.set_title(varmlongname[0]+' ('+varmunit[0]+')',fontsize=17)
+    ax2.set_title(varmlongname[1]+' ('+varmunit[1]+')',fontsize=17)
+    
+    ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
diff --git a/src/esmac_diags/plotting/plot_ship_pdf_AerosolSize.py b/src/esmac_diags/plotting/plot_ship_pdf_AerosolSize.py
new file mode 100644
index 0000000..e4e7591
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_ship_pdf_AerosolSize.py
@@ -0,0 +1,169 @@
+"""
+# plot mean aerosol size ditribution for ship track data
+# average for all data
+# compare models and aircraft measurements
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_ARMdata import read_uhsas
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.time_format_change import cday2mmdd,yyyymmdd2cday
+from ..subroutines.specific_data_treatment import  avg_time_2d
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    shipuhsaspath = settings['shipuhsaspath']
+    E3SM_ship_path = settings['E3SM_ship_path']
+    figpath_ship_statistics = settings['figpath_ship_statistics']
+
+    #%% other settings
+    
+    if not os.path.exists(figpath_ship_statistics):
+        os.makedirs(figpath_ship_statistics)
+    
+    
+    #%% read in model
+    nmodels=len(Model_List)
+    pdf_model = list()
+    pdfall_m = [np.empty((3000,0)) for mm in range(nmodels)]
+    
+    lst = glob.glob(E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[0]+'_shipleg*.nc')
+    
+    for mm in range(nmodels):
+        data2=list()
+        ntimes = list()
+        for ll in range(len(lst)):
+            if campaign=='MAGIC':
+                legnum=lst[ll][-5:-3]
+            elif campaign=='MARCUS':
+                legnum=lst[ll][-4]
+            print(legnum)
+            
+            filenamem = E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
+            (timem,data,timeunitm,datamunit,datamlongname)=read_E3SM(filenamem,'NCNall')
+            
+            # average for each file to reduce computational time
+            ntimes.append(sum(data[0,:]>0))  # number of valid values
+            data=data*1e-6   # change unit from 1/m3 to 1/cm3
+            
+            # average in time for quicker plot
+            time0=np.arange(timem[0],timem[-1]+0.04,1./24)
+            data0 = avg_time_2d(timem,data.T,time0)
+            pdfall_m[mm] = np.column_stack((pdfall_m[mm],data0.T))
+            
+            meandata=np.nanmean(data,1)
+            data2.append(meandata)
+            
+        # mean pdf
+        ntotal=sum(ntimes)
+        data3=[data2[ii]*ntimes[ii]/ntotal for ii in range(len(ntimes))]
+        pdf_model.append(sum(data3))
+        
+    #%% read in observations
+    
+    nbins = 99 # for UHSAS at MAGIC
+    pdfall_o = np.empty((nbins,0))
+    
+    if campaign=='MAGIC':
+        startdate='2012-09-22'
+        enddate='2013-09-26'
+    elif campaign=='MARCUS':
+        startdate='2017-10-30'
+        enddate='2018-03-22'
+    cday1=yyyymmdd2cday(startdate,'noleap')
+    cday2=yyyymmdd2cday(enddate,'noleap')
+    if startdate[0:4]!=enddate[0:4]:
+        cday2=cday2+365  # cover two years
+    
+    uhsasall=list()
+    ntimes = list()
+    for cc in range(cday1,cday2+1):
+        if cc<=365:
+            yyyymmdd=startdate[0:4]+cday2mmdd(cc)
+        else:
+            yyyymmdd=enddate[0:4]+cday2mmdd(cc-365)
+            
+        if campaign=='MAGIC':
+            filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.'+yyyymmdd+'*')
+        elif campaign=='MARCUS':
+            filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.'+yyyymmdd+'*')
+        if len(filenameo)==0:
+            continue  
+        elif len(filenameo)>1:
+            raise ValueError('find too many files')
+        
+        print(yyyymmdd)
+        
+        (time,dmin,dmax,uhsas,timeunit,uhunit,uhlongname)=read_uhsas(filenameo[0])
+        
+        uhsas=np.ma.filled(uhsas)
+        # average in time for quicker plot
+        time0=np.arange(1800,86400,3600)
+        data0 = avg_time_2d(time,uhsas,time0)
+        pdfall_o = np.column_stack((pdfall_o,data0.T))
+        
+        # average for each file to reduce computational time
+        ntimes.append(sum(uhsas[:,0]>=0))  # number of valid values
+        meandata=np.nanmean(uhsas,0)
+        meandata[np.isnan(meandata)]=0
+        uhsasall.append(meandata) 
+        
+    size_u = (dmin+dmax)/2
+    
+    # mean pdf
+    ntotal=sum(ntimes)
+    pdf_obs=sum([uhsasall[ii]*ntimes[ii]/ntotal for ii in range(len(ntimes))])
+    
+    #%% change to dN/dlnDp
+    dlnDp_u=np.empty(nbins)
+    for bb in range(len(size_u)):
+        dlnDp_u[bb]=np.log(dmax[bb]/dmin[bb])
+    dlnDp=np.empty(3000)
+    for bb in range(3000):
+        dlnDp[bb]=np.log((bb+2)/(bb+1))
+    pdf_obs=pdf_obs/dlnDp_u
+    for mm in range(nmodels):
+        pdf_model[mm]=pdf_model[mm]/dlnDp
+    
+    #%%
+    pct1_o = [np.nanpercentile(pdfall_o[i,:]/dlnDp_u[i],10) for i in range(nbins)]
+    pct2_o = [np.nanpercentile(pdfall_o[i,:]/dlnDp_u[i],90) for i in range(nbins)]
+    pct1_m = [[] for mm in range(nmodels)]
+    pct2_m = [[] for mm in range(nmodels)]
+    for mm in range(nmodels):
+        pct1_m[mm] = [np.nanpercentile(pdfall_m[mm][i,:]/dlnDp[i],10) for i in range(3000)]
+        pct2_m[mm] = [np.nanpercentile(pdfall_m[mm][i,:]/dlnDp[i],90) for i in range(3000)]
+    
+    #%% plot
+    figname = figpath_ship_statistics+'pdf_AerosolSize_'+campaign+'.png'
+    
+    print('plotting figures to '+figname)
+    
+    #fig = plt.figure()
+    fig,ax = plt.subplots(figsize=(4,2.5))   # figsize in inches
+    
+    ax.plot(size_u,pdf_obs,color='k',label='Obs')
+    for mm in range(nmodels):
+        ax.plot(np.arange(1,3001),pdf_model[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+    
+    ax.fill_between(size_u,pct1_o,pct2_o, alpha=0.5, facecolor='gray')
+    for mm in range(nmodels):
+        ax.fill_between(np.arange(1,3001),pct1_m[mm],pct2_m[mm], alpha=0.2, facecolor=color_model[mm])
+        
+    ax.legend(loc='upper right', shadow=False, fontsize='medium')
+    ax.tick_params(color='k',labelsize=12)
+    ax.set_xscale('log')
+    ax.set_yscale('log')
+    ax.set_ylim(0.01,1e4)
+    ax.set_xlabel('Diameter (nm)',fontsize=13)
+    ax.set_ylabel('#/dlnDp (cm$^{-3}$)',fontsize=13)
+    ax.set_title(campaign,fontsize=14)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    
diff --git a/src/esmac_diags/plotting/plot_ship_percentile_lat_CCN.py b/src/esmac_diags/plotting/plot_ship_percentile_lat_CCN.py
new file mode 100644
index 0000000..ecf9b08
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_ship_percentile_lat_CCN.py
@@ -0,0 +1,228 @@
+"""
+# plot ship-track CCN number concentration binned by different latitudes
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_ARMdata import read_ccn_magic, read_ccn
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.time_format_change import  cday2mmdd
+from ..subroutines.quality_control import qc_mask_qcflag,qc_ccn_max
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    latbin = settings['latbin']
+    shipccnpath = settings['shipccnpath']
+    E3SM_ship_path = settings['E3SM_ship_path']
+    figpath_ship_statistics = settings['figpath_ship_statistics']
+
+    #%% other settings
+    
+    dlat = latbin[1]-latbin[0]
+    latmin = latbin-dlat/2
+    latmax = latbin+dlat/2
+    latlen = len(latbin)
+    
+    if not os.path.exists(figpath_ship_statistics):
+        os.makedirs(figpath_ship_statistics)
+    
+    #%% read in model
+    nmodels=len(Model_List)
+    ccn3_m = list()
+    ccn5_m = list()
+    for mm in range(nmodels):
+        # initialize variables by latitude bins
+        ccn3_tmp = list()
+        ccn5_tmp = list()
+        for bb in range(latlen):
+            ccn3_tmp.append(np.empty(0))
+            ccn5_tmp.append(np.empty(0))
+        
+        lst = glob.glob(E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg*.nc')
+            
+        for ll in range(len(lst)):        
+            filenamem = lst[ll]
+            (timem,varm,timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['CCN3','CCN5','lat','lon'])
+            for ii in range(len(varm)):
+                varm[ii][varm[ii]<-9000] = np.nan
+            
+            lat0=varm[2]
+            lon0=varm[3]
+            ccn3=varm[0]        
+            ccn5=varm[1]
+            
+            # separate into latitude bins
+            for bb in range(latlen):
+                idx = np.logical_and(lat0>=latmin[bb], lat0<latmax[bb])
+                ccn3_tmp[bb]=np.hstack((ccn3_tmp[bb],ccn3[idx]))  
+                ccn5_tmp[bb]=np.hstack((ccn5_tmp[bb],ccn5[idx]))
+            
+        ccn3_m.append(ccn3_tmp)
+        ccn5_m.append(ccn5_tmp)
+            
+    #%% read in observation
+    ccn3_o = list()
+    ccn5_o = list()
+    for bb in range(latlen):
+        ccn3_o.append(np.empty(0))
+        ccn5_o.append(np.empty(0))
+        
+    for ll in range(len(lst)):       
+        # use lat/lon from extracted model data
+        filenamem = lst[ll]
+        if campaign=='MAGIC':
+            legnum=lst[ll][-5:-3]
+        elif campaign=='MARCUS':
+            legnum=lst[ll][-4]
+            
+        (timem,[lat0,lon0],timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['lat','lon'])
+        lat0[lat0<-9000]=np.nan
+        lon0[lon0<-9000]=np.nan
+        # if time expands two years, add 365 days to the second year
+        if timem[0]>timem[-1]:
+            timem[timem<=timem[-1]]=timem[timem<=timem[-1]]+365
+            
+        # find the days related to the ship leg
+        day = [int(a) for a in timem]
+        day = list(set(day))
+        day.sort()
+        
+        # read in CCN
+        t_ccn=np.empty(0)
+        ccn=np.empty(0)
+        SS=np.empty(0)
+        for dd in day:
+            if campaign=='MAGIC':
+                if int(legnum)<=9:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipccnpath+'magaosccn100M1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
+                    else:
+                        filenameo = glob.glob(shipccnpath+'magaosccn100M1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*.cdf')
+                else:
+                    filenameo = glob.glob(shipccnpath+'magaosccn100M1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
+                if len(filenameo)==0:
+                    continue  # some days may be missing
+                (time,timeunit,obs,dataunit,SS0)=read_ccn_magic(filenameo[0])
+            elif campaign=='MARCUS':
+                if int(legnum)<=2:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipccnpath+'maraosccn1colavgM1.b1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
+                    else:
+                        filenameo = glob.glob(shipccnpath+'maraosccn1colavgM1.b1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
+                else:
+                    filenameo = glob.glob(shipccnpath+'maraosccn1colavgM1.b1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
+                if len(filenameo)==0:
+                    continue  # some days may be missing
+                (time,timeunit,obs,qc,dataunit,SS0)=read_ccn(filenameo[0])     
+                obs=qc_mask_qcflag(obs,qc)
+            t_ccn=np.hstack((t_ccn, dd+time/86400))
+            ccn=np.hstack((ccn, obs))
+            SS=np.hstack((SS, SS0))
+            
+        ccn=qc_ccn_max(ccn,SS)
+        
+        # if time expands two years, add 365 days to the second year
+        if t_ccn[0]>t_ccn[-1]:
+            t_ccn[t_ccn<=t_ccn[-1]]=t_ccn[t_ccn<=t_ccn[-1]]+365
+        # SS=0.1%
+        idx = np.logical_and(SS>0.05, SS<0.15)
+        t_ccn1 = t_ccn[idx]
+        ccn1o = ccn[idx]
+        SS1 = 0.1
+        # SS=0.5%
+        idx = np.logical_and(SS>0.4, SS<0.6)
+        t_ccn5 = t_ccn[idx]
+        ccn5o = ccn[idx]
+        SS5 = 0.5
+        
+        lat1=np.interp(t_ccn1,timem,lat0)
+        lon1=np.interp(t_ccn1,timem,lon0)
+        lat5=np.interp(t_ccn5,timem,lat0)
+        lon5=np.interp(t_ccn5,timem,lon0)
+        # separate into latitude bins
+        for bb in range(latlen):
+            idx = np.logical_and(lat1>=latmin[bb], lat1<latmax[bb])
+            ccn3_o[bb]=np.hstack((ccn3_o[bb],ccn1o[idx]))  
+            idx = np.logical_and(lat5>=latmin[bb], lat5<latmax[bb])
+            ccn5_o[bb]=np.hstack((ccn5_o[bb],ccn5o[idx]))  
+    
+    #%%
+    for bb in range(latlen):
+        ccn3_o[bb] = ccn3_o[bb][~np.isnan(ccn3_o[bb])]
+        ccn5_o[bb] = ccn5_o[bb][~np.isnan(ccn5_o[bb])]
+    
+    
+    #%% make plot
+    # set position shift so that models and obs are not overlapped
+    p_shift = np.arange(nmodels+1)
+    p_shift = (p_shift - p_shift.mean())*0.2
+    
+        
+    figname = figpath_ship_statistics+'percentile_lat_CCN_'+campaign+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+    ax1.boxplot(ccn3_o,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax1.boxplot(ccn3_m[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    ax1.tick_params(color='k',labelsize=15)
+    #ax1.set_yscale('log')
+    ax1.set_xlim(-1,latlen)
+    ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+    ax1.set_xticklabels([])
+    # plot temporal lines for label
+    ax1.plot([],c='k',label='OBS')
+    for mm in range(nmodels):
+        ax1.plot([],c=color_model[mm],label=Model_List[mm])
+        
+    ax2.boxplot(ccn5_o,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax2.boxplot(ccn5_m[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    ax2.tick_params(color='k',labelsize=15)
+    #ax2.set_yscale('log')
+    ax2.set_xlim(-1,latlen)
+    ax2.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+    ax2.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+    # plot temporal lines for label
+    ax2.plot([],c='k',label='OBS')
+    for mm in range(nmodels):
+        ax2.plot([],c=color_model[mm],label=Model_List[mm])
+        
+    # ax1.legend(loc='upper right', fontsize='large')
+    ax2.legend(loc='upper right', fontsize='x-large')
+    
+    # supersaturation
+    fig.text(0.08,0.98,'SS='+str(SS1)+'%')
+    fig.text(0.08,0.47,'SS='+str(SS5)+'%')
+        
+    ax2.set_xlabel('Latitude',fontsize=16)
+    ax1.set_title('CCN Number Concentration (cm$^{-3}$)',fontsize=17)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    
+    
diff --git a/src/esmac_diags/plotting/plot_ship_percentile_lat_CN.py b/src/esmac_diags/plotting/plot_ship_percentile_lat_CN.py
new file mode 100644
index 0000000..b0d3917
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_ship_percentile_lat_CN.py
@@ -0,0 +1,253 @@
+"""
+# plot ship-track aerosol number concentration binned by different latitudes
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_ARMdata import read_cpc, read_uhsas
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.time_format_change import  cday2mmdd
+from ..subroutines.quality_control import qc_mask_qcflag,qc_remove_neg,qc_cn_max
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    latbin = settings['latbin']
+    shipcpcpath = settings['shipcpcpath']
+    shipuhsaspath = settings['shipuhsaspath']
+    E3SM_ship_path = settings['E3SM_ship_path']
+    figpath_ship_statistics = settings['figpath_ship_statistics']
+
+    #%% other settings
+    
+    dlat = latbin[1]-latbin[0]
+    latmin = latbin-dlat/2
+    latmax = latbin+dlat/2
+    latlen = len(latbin)
+    
+    if not os.path.exists(figpath_ship_statistics):
+        os.makedirs(figpath_ship_statistics)
+    
+    
+    #%% read in model
+    nmodels=len(Model_List)
+    cpc_m = list()
+    uhsas_m = list()
+    for mm in range(nmodels):
+        # initialize variables by latitude bins
+        cpc_tmp = list()
+        uh_tmp = list()
+        for bb in range(latlen):
+            cpc_tmp.append(np.empty(0))
+            uh_tmp.append(np.empty(0))
+        
+        lst = glob.glob(E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[mm]+'_shipleg*.nc')
+            
+        for ll in range(len(lst)):        
+            filenamem = lst[ll]
+            (timem,varm,timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['NCN','NCNall','lat','lon'])
+            for ii in range(len(varm)):
+                varm[ii][varm[ii]<-9000] = np.nan
+            
+            lat0=varm[2]
+            NCN=varm[0]        
+            NCN_uh = np.nansum(varm[1][54:1000,:],0)   # UHSAS CN size range: 55-1000nm
+            
+            # separate into latitude bins
+            for bb in range(latlen):
+                idx = np.logical_and(lat0>=latmin[bb], lat0<latmax[bb])
+                cpc_tmp[bb]=np.hstack((cpc_tmp[bb],NCN[idx]*1e-6))   # change unit from 1/m3 to 1/cm3
+                uh_tmp[bb]=np.hstack((uh_tmp[bb],NCN_uh[idx]*1e-6))
+            
+        cpc_m.append(cpc_tmp)
+        uhsas_m.append(uh_tmp)
+            
+    #%% read in observation
+    cpc_o = list()
+    uhsas_o = list()
+    for bb in range(latlen):
+        cpc_o.append(np.empty(0))
+        uhsas_o.append(np.empty(0))
+        
+    for ll in range(len(lst)):       
+        # use lat/lon from extracted model data
+        filenamem = lst[ll]
+        if campaign=='MAGIC':
+            legnum=lst[ll][-5:-3]
+        elif campaign=='MARCUS':
+            legnum=lst[ll][-4]
+        (timem,[lat1,lon1],timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['lat','lon'])
+        lat1[lat1<-9000]=np.nan
+        lon1[lon1<-9000]=np.nan
+            
+        # find the days related to the ship leg
+        day = [int(a) for a in timem]
+        day = list(set(day))
+        day.sort()
+        
+        # read in CPC    
+        t_cpc=np.empty(0)
+        cpc=np.empty(0)
+        for dd in day:
+            if campaign=='MAGIC':
+                if int(legnum)<=9:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*')
+                    else:
+                        filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*')
+                else:
+                    filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*')
+                if len(filenameo)==0:
+                    continue  # some days may be missing
+            elif campaign=='MARCUS':
+                if int(legnum)<=2:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
+                    else:
+                        filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
+                else:
+                    filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
+                if len(filenameo)==0:
+                    continue  # some days may be missing
+                 
+            (time,obs,qc,timeunit,dataunit)=read_cpc(filenameo[0])
+            obs=qc_mask_qcflag(obs,qc)
+            t_cpc=np.hstack((t_cpc, dd+time/86400))
+            cpc=np.hstack((cpc, obs))
+        cpc=qc_remove_neg(cpc)
+        cpc=qc_cn_max(cpc,10)
+        # if time expands two years, add 365 days to the second year
+        if t_cpc[0]>t_cpc[-1]:
+            t_cpc[t_cpc<=t_cpc[-1]]=t_cpc[t_cpc<=t_cpc[-1]]+365
+        lat2=np.interp(t_cpc,timem,lat1)
+        # separate into latitude bins
+        for bb in range(latlen):
+            idx = np.logical_and(lat2>=latmin[bb], lat2<latmax[bb])
+            cpc_o[bb]=np.hstack((cpc_o[bb],cpc[idx]))  
+        
+        # read in UHSAS
+        t_uh=np.empty(0)
+        uhsas=np.empty(0)
+        for dd in day:
+            if campaign=='MAGIC':
+                if int(legnum)<=9:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
+                    else:
+                        filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*.cdf')
+                else:
+                    filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
+            elif campaign=='MARCUS':
+                if int(legnum)<=2:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
+                    else:
+                        filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
+                else:
+                    filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
+            
+            if len(filenameo)==0:
+                continue  # some days may be missing
+            if len(filenameo)>1:
+                raise ValueError('find too many files')
+                
+            (time,dmin,dmax,obs,timeunit,uhunit,uhlongname)=read_uhsas(filenameo[0])
+            obs=np.ma.filled(obs)
+            obs=qc_remove_neg(obs)
+            uhsas=np.hstack((uhsas, np.nansum(obs,1)))
+            t_uh = np.hstack((t_uh,time/86400+dd))
+            
+        uhsas=qc_cn_max(uhsas,100)
+        # if no obs available, fill one data with NaN
+        if len(t_uh)==0:
+            t_uh=[timem[0],timem[1]]
+            uhsas=np.full((2),np.nan)
+        # if time expands two years, add 365 days to the second year
+        if t_uh[0]>t_uh[-1]:
+            t_uh[t_uh<=t_uh[-1]]=t_uh[t_uh<=t_uh[-1]]+365
+        lat3=np.interp(t_uh,timem,lat1)
+        # separate into latitude bins
+        for bb in range(latlen):
+            idx = np.logical_and(lat3>=latmin[bb], lat3<latmax[bb])
+            uhsas_o[bb]=np.hstack((uhsas_o[bb],uhsas[idx]))  
+            
+    #%% 
+    for bb in range(latlen):
+        cpc_o[bb] = cpc_o[bb][~np.isnan(cpc_o[bb])]
+        uhsas_o[bb] = uhsas_o[bb][~np.isnan(uhsas_o[bb])]
+    
+    
+    #%% make plot
+    # set position shift so that models and obs are not overlapped
+    p_shift = np.arange(nmodels+1)
+    p_shift = (p_shift - p_shift.mean())*0.2
+    
+        
+    figname = figpath_ship_statistics+'percentile_lat_CN_'+campaign+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+    ax1.boxplot(cpc_o,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax1.boxplot(cpc_m[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    ax1.tick_params(color='k',labelsize=15)
+    ax1.set_yscale('log')
+    ax1.set_xlim(-1,latlen)
+    ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+    ax1.set_xticklabels([])
+    # plot temporal lines for label
+    ax1.plot([],c='k',label='CPC')
+    for mm in range(nmodels):
+        ax1.plot([],c=color_model[mm],label=Model_List[mm])
+    ax1.legend(loc='upper left', fontsize='x-large')
+        
+    ax2.boxplot(uhsas_o,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax2.boxplot(uhsas_m[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    ax2.tick_params(color='k',labelsize=15)
+    ax2.set_yscale('log')
+    ax2.set_ylim(10,3000)
+    ax2.set_yticks([10,100,1000])
+    ax2.set_xlim(-1,latlen)
+    ax2.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+    ax2.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+    # plot temporal lines for label
+    ax2.plot([],c='k',label='UHSAS')
+    for mm in range(nmodels):
+        ax2.plot([],c=color_model[mm],label=Model_List[mm])
+    ax2.legend(loc='upper left', fontsize='x-large')
+        
+    # # set xlimit consistent in subplots
+    # ylim1 = ax1.get_ylim()
+    # ylim2 = ax2.get_ylim()
+    # ax1.set_ylim([min(ylim1[0],ylim2[0]), max(ylim1[1],ylim2[1])])
+    # ax2.set_ylim([min(ylim1[0],ylim2[0]), max(ylim1[1],ylim2[1])])
+    
+    ax2.set_xlabel('Latitude',fontsize=16)
+    ax1.set_title('Aerosol Number Concentration (cm$^{-3}$)',fontsize=17)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
diff --git a/src/esmac_diags/plotting/plot_ship_percentile_lat_LWP.py b/src/esmac_diags/plotting/plot_ship_percentile_lat_LWP.py
new file mode 100644
index 0000000..978422d
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_ship_percentile_lat_LWP.py
@@ -0,0 +1,183 @@
+"""
+# plot ship-track liquid water path binned by different latitudes
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_ARMdata import read_mwr
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.time_format_change import yyyymmdd2cday, cday2mmdd
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    latbin = settings['latbin']
+    shipmwrpath = settings['shipmwrpath']
+    E3SM_ship_path = settings['E3SM_ship_path']
+    figpath_ship_statistics = settings['figpath_ship_statistics']
+
+    #%% other settings
+    
+    
+    dlat = latbin[1]-latbin[0]
+    latmin = latbin-dlat/2
+    latmax = latbin+dlat/2
+    latlen = len(latbin)
+    
+    if not os.path.exists(figpath_ship_statistics):
+        os.makedirs(figpath_ship_statistics)
+    
+    #%% read in model
+    nmodels=len(Model_List)
+    lwp_m = list()
+    rain_m = list()
+    lonm = np.empty(0)
+    latm = np.empty(0)
+    timem = np.empty(0)
+    
+    for mm in range(nmodels):
+        
+        # initialize variables by latitude bins
+        lwp_tmp = list()
+        rain_tmp = list()
+        for bb in range(latlen):
+            lwp_tmp.append(np.empty(0))
+            rain_tmp.append(np.empty(0))
+            
+        lst = glob.glob(E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg*.nc')
+        lst.sort()   
+        for ll in range(len(lst)):
+            filenamem = lst[ll]
+            (timem0,varm,timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['TGCLDLWP','PRECT','lat','lon'])
+            for ii in range(len(varm)):
+                varm[ii][varm[ii]<-9000] = np.nan
+                
+            lat0=varm[2]
+            lon0=varm[3]
+            
+            # save all time and lon/lat to interpolate observation
+            if mm==0:
+                timem=np.hstack((timem,timem0))
+                lonm=np.hstack((lonm,lon0))
+                latm=np.hstack((latm,lat0))
+            
+            # separate into latitude bins
+            for bb in range(latlen):
+                idx = np.logical_and(lat0>=latmin[bb], lat0<latmax[bb])
+                lwp_tmp[bb]=np.hstack((lwp_tmp[bb],varm[0][idx]*1000))
+            
+        lwp_m.append(lwp_tmp)
+        
+    # change the unit
+    varmunit[0]='g/m2'
+    varmunit[1]='mm/hr'
+    varmlongname[0]='LWP'
+    varmlongname[1]='Rainrate'
+    
+    idx_neg = np.where((timem[1:]-timem[:-1])<0)  # find the day from Dec 31 to Jan 1.
+    if len(idx_neg)!=1:
+        raise ValueError('this code is only designed for two continuous years. If more than two years please edit this part')
+    else:
+        timem[idx_neg[0][0]+1:] = timem[idx_neg[0][0]+1:]+365
+    
+    #%% read in observation
+    
+    # initialize variables by latitude bins
+    lwp_o = list()
+    rain_o = list()
+    for bb in range(latlen):
+        lwp_o.append(np.empty(0))
+        rain_o.append(np.empty(0))
+    
+    timeo = np.empty(0)
+    lwpo = np.empty(0)
+    
+    if campaign=='MAGIC':
+        startdate='2012-10-05'
+        enddate='2013-09-26'
+        # startdate='2013-07-01'
+        # enddate='2013-08-31'
+        mwrdatastream='magmwrret1liljclouM1'
+    elif campaign=='MARCUS':
+        startdate='2017-10-30'
+        enddate='2018-03-22'
+        mwrdatastream='marmwrret1liljclouM1'
+        
+    cday1=yyyymmdd2cday(startdate,'noleap')
+    cday2=yyyymmdd2cday(enddate,'noleap')
+    if startdate[0:4]!=enddate[0:4]:
+        cday2=cday2+365  # cover two years
+            
+    for cc in range(cday1,cday2+1):
+        if cc<=365:
+            yyyymmdd=startdate[0:4]+cday2mmdd(cc)
+        else:
+            yyyymmdd=enddate[0:4]+cday2mmdd(cc-365)
+                
+        # read in MWR
+        filenameo = glob.glob(shipmwrpath+mwrdatastream+'.s2.'+yyyymmdd+'.*')
+        if len(filenameo)==0:
+            continue  # some days may be missing
+        (time,lwp,timeunit,lwpunit,lwpflag)=read_mwr(filenameo[0],'be_lwp')
+        lwp[lwp<-9000]=np.nan
+        # if no obs available, fill one data with NaN
+        if len(time)==0:
+            continue
+        
+        timeo=np.hstack((timeo,cc+time/86400.))
+        lwpo = np.hstack((lwpo,lwp))
+        
+    lat1=np.interp(timeo,timem,latm)
+    # separate into latitude bins
+    for bb in range(latlen):
+        idx = np.logical_and(lat1>=latmin[bb], lat1<latmax[bb])
+        lwp_o[bb]=np.hstack((lwp_o[bb],lwpo[idx]))  
+                
+    #%% 
+    for bb in range(latlen):
+        lwp_o[bb] = lwp_o[bb][~np.isnan(lwp_o[bb])]
+    
+    #%% make plot
+    # set position shift so that models and obs are not overlapped
+    p_shift = np.arange(nmodels+1)
+    p_shift = (p_shift - p_shift.mean())*0.2
+    
+    figname = figpath_ship_statistics+'percentile_lat_LWP_'+campaign+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1) = plt.subplots(figsize=(8,2))   # figsize in inches
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+    ax1.boxplot(lwp_o,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax1.boxplot(lwp_m[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    ax1.tick_params(color='k',labelsize=15)
+    # ax1.set_yscale('log')
+    ax1.set_xlim(-1,latlen)
+    ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+    ax1.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+    # plot temporal lines for label
+    ax1.plot([],c='k',label='OBS')
+    for mm in range(nmodels):
+        ax1.plot([],c=color_model[mm],label=Model_List[mm])
+    ax1.legend(loc='upper left', fontsize='x-large')
+        
+    
+    ax1.set_xlabel('Latitude',fontsize=16)
+    ax1.set_title('LWP (g/m$^{2}$)',fontsize=17)
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+    
diff --git a/src/esmac_diags/plotting/plot_ship_percentile_lat_met.py b/src/esmac_diags/plotting/plot_ship_percentile_lat_met.py
new file mode 100644
index 0000000..6f14b14
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_ship_percentile_lat_met.py
@@ -0,0 +1,294 @@
+"""
+# plot ship-track meteorological variables binned by different latitudes
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_ship import read_marmet
+from ..subroutines.read_ARMdata import read_met
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.time_format_change import yyyymmdd2cday,  cday2mmdd
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    latbin = settings['latbin']
+    shipmetpath = settings['shipmetpath']
+    E3SM_ship_path = settings['E3SM_ship_path']
+    figpath_ship_statistics = settings['figpath_ship_statistics']
+
+    #%% other settings
+    
+    
+    dlat = latbin[1]-latbin[0]
+    latmin = latbin-dlat/2
+    latmax = latbin+dlat/2
+    latlen = len(latbin)
+    
+    if not os.path.exists(figpath_ship_statistics):
+        os.makedirs(figpath_ship_statistics)
+    
+    
+    #%% read in observation
+    
+    # initialize variables by latitude bins
+    T_o = list()
+    RH_o = list()
+    ps_o = list()
+    # rain_o = list()
+    for bb in range(latlen):
+        T_o.append(np.empty(0))
+        RH_o.append(np.empty(0))
+        ps_o.append(np.empty(0))
+        # rain_o.append(np.empty(0))
+    
+    if campaign=='MAGIC':
+        lst = glob.glob(shipmetpath+'marmet*.txt')
+        lst.sort()
+        for ll in range(len(lst)):
+            legnum=lst[ll][-6:-4]
+            
+            filenameo = shipmetpath+'marmet'+legnum+'.txt'
+            (shipdata,shipvarlist) = read_marmet(filenameo)        
+            
+            # get variables
+            lat=np.array([float(a[shipvarlist.index('lat')]) for a in shipdata]) 
+            lon=np.array([float(a[shipvarlist.index('lon')]) for a in shipdata]) 
+            sst=np.array([float(a[shipvarlist.index('ssst')]) for a in shipdata]) 
+            ps=np.array([float(a[shipvarlist.index('bp')]) for a in shipdata])    
+            rh=np.array([float(a[shipvarlist.index('rh')]) for a in shipdata]) 
+            ta=np.array([float(a[shipvarlist.index('ta')]) for a in shipdata]) 
+            rain=np.array([float(a[shipvarlist.index('org')]) for a in shipdata]) 
+        
+            lat[lat==-999]=np.nan
+            lon[lon==-999]=np.nan
+            sst[sst==-999]=np.nan
+            ps[ps==-999]=np.nan
+            rh[rh==-999]=np.nan
+            ta[ta==-999]=np.nan
+            rain[rain==-999]=np.nan
+            
+            # rain rate in leg 19 are unrealistic. mask all data
+            if legnum=='19':
+                rain=rain*np.nan
+        
+            # separate into latitude bins
+            for bb in range(latlen):
+                idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
+                T_o[bb]=np.hstack((T_o[bb],ta[idx]))
+                RH_o[bb]=np.hstack((RH_o[bb],rh[idx]))
+                ps_o[bb]=np.hstack((ps_o[bb],ps[idx]))
+                # rain_o[bb]=np.hstack((rain_o[bb],rain[idx]))
+                
+    elif campaign=='MARCUS':
+        
+        startdate='2017-10-30'
+        enddate='2018-03-22'
+        cday1=yyyymmdd2cday(startdate,'noleap')
+        cday2=yyyymmdd2cday(enddate,'noleap')
+        if startdate[0:4]!=enddate[0:4]:
+            cday2=cday2+365  # cover two years
+                
+        for cc in range(cday1,cday2+1):
+            if cc<=365:
+                yyyymmdd=startdate[0:4]+cday2mmdd(cc)
+            else:
+                yyyymmdd=enddate[0:4]+cday2mmdd(cc-365)
+                
+            lst0 = glob.glob(shipmetpath+'maraadmetX1.b1.'+yyyymmdd+'*')
+            if len(lst0)==0:
+                continue
+            (time0,lon,timeunit,lonunit,lon_long_name)=read_met(lst0[0],'lon')
+            (time0,lat,timeunit,lonunit,lon_long_name)=read_met(lst0[0],'lat')
+            (time0,ta1,timeunit,taunit,ta_long_name)=read_met(lst0[0],'air_temperature_port')
+            (time0,ta2,timeunit,taunit,ta_long_name)=read_met(lst0[0],'air_temperature_starboard')
+            (time0,rh1,timeunit,rhunit,rh_long_name)=read_met(lst0[0],'relative_humidity_port')
+            (time0,rh2,timeunit,rhunit,rh_long_name)=read_met(lst0[0],'relative_humidity_starboard')
+            (time0,ps,timeunit,psunit,ps_long_name)=read_met(lst0[0],'atmospheric_pressure')
+            
+            ta = (ta1+ta2)/2
+            rh = (rh1+rh2)/2
+            
+            ps[ps<=-999]=np.nan
+            rh[rh<=-999]=np.nan
+            ta[ta<=-999]=np.nan
+            
+            # separate into latitude bins
+            for bb in range(latlen):
+                idx = np.logical_and(lat>=latmin[bb], lat<latmax[bb])
+                T_o[bb]=np.hstack((T_o[bb],ta[idx]))
+                RH_o[bb]=np.hstack((RH_o[bb],rh[idx]))
+                ps_o[bb]=np.hstack((ps_o[bb],ps[idx]))
+            
+    
+        
+    #%% read in model
+    nmodels=len(Model_List)
+    T_m = list()
+    RH_m = list()
+    ps_m = list()
+    rain_m = list()
+    for mm in range(nmodels):
+        
+        # initialize variables by latitude bins
+        T_tmp = list()
+        RH_tmp = list()
+        ps_tmp = list()
+        rain_tmp = list()
+        for bb in range(latlen):
+            T_tmp.append(np.empty(0))
+            RH_tmp.append(np.empty(0))
+            ps_tmp.append(np.empty(0))
+            rain_tmp.append(np.empty(0))
+        
+        lst = glob.glob(E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg*.nc')
+        lst.sort()
+        for filenamem in lst:    
+            (timem,varm,timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['T','RELHUM','PS','PRECT','lat','lon'])
+            for ii in range(len(varm)):
+                varm[ii][varm[ii]<-9000] = np.nan
+                
+            lat0=varm[4]
+            lon0=varm[5]
+            
+            # separate into latitude bins
+            for bb in range(latlen):
+                idx = np.logical_and(lat0>=latmin[bb], lat0<latmax[bb])
+                T_tmp[bb]=np.hstack((T_tmp[bb],varm[0][idx]-273.16))
+                RH_tmp[bb]=np.hstack((RH_tmp[bb],varm[1][idx]))
+                ps_tmp[bb]=np.hstack((ps_tmp[bb],varm[2][idx]*0.01))
+                rain_tmp[bb]=np.hstack((rain_tmp[bb],varm[3][idx]*3600*1000))
+            
+        T_m.append(T_tmp)
+        RH_m.append(RH_tmp)
+        ps_m.append(ps_tmp)
+        rain_m.append(rain_tmp)
+        
+    # change the unit
+    varmunit[0]='C'
+    varmunit[2]='hPa'
+    varmunit[3]='mm/hr'
+    varmlongname[3]='Rainrate'
+    
+    #%% calculate the mean and standard error for each bin
+    for bb in range(latlen):
+        T_o[bb] = T_o[bb][~np.isnan(T_o[bb])]
+        RH_o[bb] = RH_o[bb][~np.isnan(RH_o[bb])]
+        ps_o[bb] = ps_o[bb][~np.isnan(ps_o[bb])]
+        # rain_o[bb] = rain_o[bb][~np.isnan(rain_o[bb])]
+        
+    #%% make plot
+    # set position shift so that models and obs are not overlapped
+    p_shift = np.arange(nmodels+1)
+    p_shift = (p_shift - p_shift.mean())*0.2
+    
+        
+    figname = figpath_ship_statistics+'percentile_lat_met_'+campaign+'.png'
+    print('plotting figures to '+figname)
+    
+    fig,(ax1,ax2,ax3) = plt.subplots(3,1,figsize=(8,6))   # figsize in inches
+    plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+    ax1.boxplot(T_o,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax1.boxplot(T_m[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    ax1.tick_params(color='k',labelsize=15)
+    # ax1.set_yscale('log')
+    ax1.set_xlim(-1,latlen)
+    ax1.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+    # plot temporal lines for label
+    ax1.plot([],c='k',label='OBS')
+    for mm in range(nmodels):
+        ax1.plot([],c=color_model[mm],label=Model_List[mm])
+        
+    ax2.boxplot(RH_o,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax2.boxplot(RH_m[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    ax2.tick_params(color='k',labelsize=15)
+    # ax2.set_yscale('log')
+    ax2.set_xlim(-1,latlen)
+    ax2.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+    # plot temporal lines for label
+    ax2.plot([],c='k',label='OBS')
+    for mm in range(nmodels):
+        ax2.plot([],c=color_model[mm],label=Model_List[mm])
+        
+    ax3.boxplot(ps_o,whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+                boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    for mm in range(nmodels):
+        c = color_model[mm]
+        ax3.boxplot(ps_m[mm],whis=(5,95),showmeans=False,showfliers=False,
+                positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+                boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+                medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+                vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    ax3.tick_params(color='k',labelsize=15)
+    # ax3.set_yscale('log')
+    ax3.set_xlim(-1,latlen)
+    ax3.set_xticks(np.arange(-0.5*dlat,latlen-1,2))
+    # plot temporal lines for label
+    ax3.plot([],c='k',label='OBS')
+    for mm in range(nmodels):
+        ax3.plot([],c=color_model[mm],label=Model_List[mm])
+        
+    # ax4.boxplot(rain_o,whis=(5,95),showmeans=False,showfliers=False,
+    #             positions=np.array(range(latlen))+p_shift[-1],widths=0.15,
+    #             boxprops=dict(facecolor='k', color='k'),whiskerprops=dict(color='k'),
+    #             medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color='k'),
+    #             vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    # for mm in range(nmodels):
+    #     c = color_model[mm]
+    #     ax4.boxplot(rain_m[mm],whis=(5,95),showmeans=False,showfliers=False,
+    #             positions=np.array(range(latlen))+p_shift[mm],widths=0.15,
+    #             boxprops=dict(facecolor=c, color=c),whiskerprops=dict(color=c),
+    #             medianprops=dict(color='lightyellow',linewidth=1),capprops=dict(color=c),
+    #             vert=True, patch_artist=True)    # need patch_artist to fill color in box
+    # ax4.tick_params(color='k',labelsize=12)
+    # # ax4.set_yscale('log')
+    # ax4.set_xlim(-1,latlen)
+    # ax4.set_xticks(np.arange(-.25,latlen,2))
+    # # plot temporal lines for label
+    # ax4.plot([],c='k',label='OBS')
+    # for mm in range(nmodels):
+    #     ax4.plot([],c=color_model[mm],label=Model_List[mm])
+        
+    
+    ax3.set_xlabel('Latitude',fontsize=16)
+    
+    ax1.set_xticklabels([])
+    ax2.set_xticklabels([])
+    ax3.set_xticklabels([])
+    ax3.set_xticklabels([int(np.floor(a)) for a in latbin[0::2]])
+    ax1.set_title(varmlongname[0]+' ('+varmunit[0]+')',fontsize=17)
+    ax2.set_title(varmlongname[1]+' ('+varmunit[1]+')',fontsize=17)
+    ax3.set_title(varmlongname[2]+' ('+varmunit[2]+')',fontsize=17)
+    # ax4.set_title(varmlongname[3]+' ('+varmunit[3]+')',fontsize=15)
+    
+    ax1.legend(loc='upper right', shadow=False, fontsize='x-large')
+    
+    fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
diff --git a/src/esmac_diags/plotting/plot_ship_timeseries_CCN.py b/src/esmac_diags/plotting/plot_ship_timeseries_CCN.py
new file mode 100644
index 0000000..1f2ad61
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_ship_timeseries_CCN.py
@@ -0,0 +1,163 @@
+"""
+# plot timeseries of surface CCN number concentration along each ship leg
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_ARMdata import read_ccn_magic, read_ccn
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.time_format_change import cday2mmdd
+from ..subroutines.specific_data_treatment import mask_model_ps
+from ..subroutines.quality_control import qc_mask_qcflag,qc_ccn_max
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    shipccnpath = settings['shipccnpath']
+    shipmetpath = settings['shipmetpath']
+    E3SM_ship_path = settings['E3SM_ship_path']
+    figpath_ship_timeseries = settings['figpath_ship_timeseries']
+
+    #%% other settings
+    
+    if not os.path.exists(figpath_ship_timeseries):
+        os.makedirs(figpath_ship_timeseries)
+    
+    lst = glob.glob(E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[0]+'_shipleg*.nc')
+    lst.sort()
+    
+    for ll in range(len(lst)):
+        
+        if campaign=='MAGIC':
+            legnum=lst[ll][-5:-3]
+        elif campaign=='MARCUS':
+            legnum=lst[ll][-4]
+        
+         
+        #%% read in model
+        nmodels=len(Model_List)
+        ccn1m = list()
+        ccn5m = list()
+        for mm in range(nmodels):
+            filenamem = E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
+        
+            (timem,ccn1,timeunitm,ccn1unit,ccn1longname)=read_E3SM(filenamem,'CCN3')
+            (timem,ccn5,timeunitm,ccn5unit,ccn5longname)=read_E3SM(filenamem,'CCN5')
+        
+            ccn1m.append(ccn1)
+            ccn5m.append(ccn5)
+            
+        # mask data where model grid is not at ocean surface (Ps is too different than obs)
+        (timem,psm,timeunitm,psmunit,psmlongname)=read_E3SM(filenamem,'PS')
+        datamask = mask_model_ps(timem,0.01*psm,legnum,campaign,shipmetpath)
+        # for mm in range(nmodels):
+        #     ccn1m[mm][datamask]=np.nan
+        #     ccn5m[mm][datamask]=np.nan
+        
+        year0 = str(int(timeunitm.split()[2][0:4])+1)
+        
+        #%% read in observations
+        # find the days related to the ship leg
+        day = [int(a) for a in timem]
+        day = list(set(day))
+        day.sort()
+        
+        t_ccn=np.empty(0)
+        ccn=np.empty(0)
+        SS=np.empty(0)
+        for dd in day:
+            
+            if campaign=='MAGIC':
+                if int(legnum)<=9:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipccnpath+'magaosccn100M1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
+                    else:
+                        filenameo = glob.glob(shipccnpath+'magaosccn100M1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*.cdf')
+                else:
+                    filenameo = glob.glob(shipccnpath+'magaosccn100M1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
+                if len(filenameo)==0:
+                    continue  # some days may be missing
+                (time,timeunit,obs,dataunit,SS0)=read_ccn_magic(filenameo[0])
+            elif campaign=='MARCUS':
+                if int(legnum)<=2:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipccnpath+'maraosccn1colavgM1.b1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
+                    else:
+                        filenameo = glob.glob(shipccnpath+'maraosccn1colavgM1.b1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
+                else:
+                    filenameo = glob.glob(shipccnpath+'maraosccn1colavgM1.b1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
+                if len(filenameo)==0:
+                    continue  # some days may be missing
+                (time,timeunit,obs,qc,dataunit,SS0)=read_ccn(filenameo[0])            
+                obs=qc_mask_qcflag(obs,qc)
+                
+            t_ccn=np.hstack((t_ccn, dd+time/86400))
+            ccn=np.hstack((ccn, obs))
+            SS=np.hstack((SS, SS0))
+            ccn=qc_ccn_max(ccn,SS)
+            
+        # if time expands two years, add 365 days to the second year
+        if t_ccn[0]>t_ccn[-1]:
+            t_ccn[t_ccn<=t_ccn[-1]]=t_ccn[t_ccn<=t_ccn[-1]]+365
+            
+        # ccn[np.logical_or(ccn<0,ccn>1500)]=np.nan
+        # SS=0.1%
+        idx = np.logical_and(SS>0.05, SS<0.15)
+        t_ccn1 = t_ccn[idx]
+        ccn1o = ccn[idx]
+        SS1 = 0.1
+        # SS=0.5%
+        idx = np.logical_and(SS>0.4, SS<0.6)
+        t_ccn5 = t_ccn[idx]
+        ccn5o = ccn[idx]
+        SS5 = 0.5
+        
+        #%% make plot
+            
+        figname = figpath_ship_timeseries+'timeseries_CCN_'+campaign+'_ship'+legnum+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+        ax1.plot(t_ccn1,ccn1o,color='k',linewidth=1,label='OBS')
+        for mm in range(nmodels):
+            ax1.plot(timem, ccn1m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        # ax1.set_yscale('log')
+        ax1.tick_params(color='k',labelsize=12)
+        ylim1 = ax1.get_ylim()
+        
+        ax2.plot(t_ccn5,ccn5o,color='k',linewidth=1,label='OBS')
+        for mm in range(nmodels):
+            ax2.plot(timem, ccn5m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        # ax2.set_yscale('log')
+        ax2.tick_params(color='k',labelsize=12)
+        ylim2 = ax2.get_ylim()
+        
+        # set ylimit consistent in subplots
+        # ax1.set_ylim([ylim1[0], ylim2[1]])
+        # ax2.set_ylim([ylim1[0], ylim2[1]])
+        
+        ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.2, .5))
+        ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.2, .5))
+        
+        # supersaturation
+        fig.text(0.08,0.9,'SS='+str(SS1)+'%')
+        fig.text(0.08,0.4,'SS='+str(SS5)+'%')
+        
+        ax2.set_xlabel('Calendar Day in '+year0,fontsize=14)
+            
+        ax1.set_title('CCN Number Concentration (cm$^{-3}$)',fontsize=15)
+        
+        fig.text(.08, .999,'trip # '+legnum, fontsize=12)
+        
+        # mask non-ocean model grid (ps is inconsistent with obs)
+        ax1.vlines(timem[datamask],ylim1[0],ylim1[1],color='lightgray')
+        ax2.vlines(timem[datamask],ylim2[0],ylim2[1],color='lightgray')
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        plt.close()
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_ship_timeseries_CN.py b/src/esmac_diags/plotting/plot_ship_timeseries_CN.py
new file mode 100644
index 0000000..5787ce6
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_ship_timeseries_CN.py
@@ -0,0 +1,196 @@
+"""
+# plot timeseries of surface aerosol number concentration along each ship leg
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_ARMdata import read_cpc, read_uhsas
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.time_format_change import  cday2mmdd
+from ..subroutines.specific_data_treatment import mask_model_ps
+from ..subroutines.quality_control import qc_mask_qcflag,qc_remove_neg,qc_cn_max
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    shipcpcpath = settings['shipcpcpath']
+    shipuhsaspath = settings['shipuhsaspath']
+    shipmetpath = settings['shipmetpath']
+    E3SM_ship_path = settings['E3SM_ship_path']
+    figpath_ship_timeseries = settings['figpath_ship_timeseries']
+
+    #%% other settings
+    
+    if not os.path.exists(figpath_ship_timeseries):
+        os.makedirs(figpath_ship_timeseries)
+    
+    lst = glob.glob(E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[0]+'_shipleg*.nc')
+    lst.sort()
+    
+    for ll in range(len(lst)):
+        
+        if campaign=='MAGIC':
+            legnum=lst[ll][-5:-3]
+        elif campaign=='MARCUS':
+            legnum=lst[ll][-4]
+        
+        #%% read in model
+        nmodels=len(Model_List)
+        datam = list()
+        databins = list()
+        for mm in range(nmodels):
+            filenamem = E3SM_ship_path+'Ship_CNsize_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
+        
+            (timem,NCNall,timeunitm,datamunit,datamlongname)=read_E3SM(filenamem,'NCNall')
+            (timem,data,timeunitm,datamunit,datamlongname)=read_E3SM(filenamem,'NCN')
+        
+            datam.append(data*1e-6)    # change unit from 1/m3 to 1/cm3
+            databins.append(NCNall*1e-6)    # change unit from 1/m3 to 1/cm3
+            
+            # mask data where model grid is not at ocean surface (Ps is too different than obs)
+            filenamem = E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
+            (timem,psm,timeunitx,psmunit,psmlongname)=read_E3SM(filenamem,'PS')
+            datamask = mask_model_ps(timem,0.01*psm,legnum,campaign,shipmetpath)
+            # for mm in range(nmodels):
+            #     datam[mm][datamask]=np.nan
+            
+        year0 = str(int(timeunitm.split()[2][0:4])+1)
+        
+        #%% read in observations
+        # find the days related to the ship leg
+        day = [int(a) for a in timem]
+        day = list(set(day))
+        day.sort()
+    
+        # CPC    
+        t_cpc=np.empty(0)
+        cpc=np.empty(0)
+        for dd in day:
+            
+            if campaign=='MAGIC':
+                if int(legnum)<=9:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*')
+                    else:
+                        filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*')
+                else:
+                    filenameo = glob.glob(shipcpcpath+'magaoscpcfM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*')
+                if len(filenameo)==0:
+                    continue  # some days may be missing
+            elif campaign=='MARCUS':
+                if int(legnum)<=2:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
+                    else:
+                        filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
+                else:
+                    filenameo = glob.glob(shipcpcpath+'maraoscpcf1mM1.b1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
+                if len(filenameo)==0:
+                    continue  # some days may be missing
+                    
+                    
+            (time,obs,qc,timeunit,dataunit)=read_cpc(filenameo[0])
+            obs=qc_mask_qcflag(obs,qc)
+            t_cpc=np.hstack((t_cpc, dd+time/86400))
+            cpc=np.hstack((cpc, obs))
+            
+        cpc=qc_remove_neg(cpc)
+        cpc=qc_cn_max(cpc,10)
+        # if time expands two years, add 365 days to the second year
+        if t_cpc[0]>t_cpc[-1]:
+            t_cpc[t_cpc<=t_cpc[-1]]=t_cpc[t_cpc<=t_cpc[-1]]+365
+    
+        # UHSAS
+        t_uh=np.empty(0)
+        uhsas=np.empty(0)
+        for dd in day:
+            
+            if campaign=='MAGIC':
+                if int(legnum)<=9:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2012'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
+                    else:
+                        filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd-365,calendar='noleap')+'.*.cdf')
+                else:
+                    filenameo = glob.glob(shipuhsaspath+'magaosuhsasM1.a1.2013'+cday2mmdd(dd,calendar='noleap')+'.*.cdf')
+            elif campaign=='MARCUS':
+                if int(legnum)<=2:
+                    if dd<=365:  # year 2012
+                        filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2017'+cday2mmdd(dd,calendar='noleap')+'.*')
+                    else:
+                        filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd-365,calendar='noleap')+'.*')
+                else:
+                    filenameo = glob.glob(shipuhsaspath+'maraosuhsasM1.a1.2018'+cday2mmdd(dd,calendar='noleap')+'.*')
+            
+            if len(filenameo)==0:
+                continue  # some days may be missing
+            if len(filenameo)>1:
+                raise ValueError('find too many files')
+                
+            (time,dmin,dmax,obs,timeunit,uhunit,uhlongname)=read_uhsas(filenameo[0])
+            obs=np.ma.filled(obs)
+            obs=qc_remove_neg(obs)
+            uhsas=np.hstack((uhsas, np.nansum(obs,1)))
+            t_uh = np.hstack((t_uh,time/86400+dd))
+            
+        uhsas=qc_cn_max(uhsas,100)
+        # if no obs available, fill one data with NaN
+        if len(t_uh)==0:
+            t_uh=[timem[0],timem[1]]
+            uhsas=np.full((2),np.nan)
+            
+        # if time expands two years, add 365 days to the second year
+        if t_uh[0]>t_uh[-1]:
+            t_uh[t_uh<=t_uh[-1]]=t_uh[t_uh<=t_uh[-1]]+365
+                
+        #%% Calculate model aerosol number concentration for UHSAS size range
+        b1 = int(dmin[0])
+        b2 = int(dmax[-1])
+        datam2=list()
+        for mm in range(nmodels):
+            datam2.append(np.nansum(databins[mm][b1-1:b2,:],0))
+            # datam2[mm][datamask]=np.nan
+        
+        #%% make plot
+            
+        figname = figpath_ship_timeseries+'timeseries_CN_'+campaign+'_ship'+legnum+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax1,ax2) = plt.subplots(2,1,figsize=(8,4))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+        ax1.plot(t_cpc,cpc,color='k',linewidth=1,label='CPC')
+        for mm in range(nmodels):
+            ax1.plot(timem, datam[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        # ax1.set_yscale('log')
+        ax1.tick_params(color='k',labelsize=12)
+        ylim1 = ax1.get_ylim()
+        
+        ax2.plot(t_uh,uhsas,color='k',linewidth=1,label='UHSAS')
+        # ax2.plot(t_uh,uhsas,color='k',linewidth=1,label='UHSAS ('+str(b1)+'-'+str(b2)+'nm)')
+        for mm in range(nmodels):
+            ax2.plot(timem, datam2[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        # ax1.set_yscale('log')
+        ax2.tick_params(color='k',labelsize=12)
+        ylim2 = ax2.get_ylim()
+        
+        ax1.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.2, .5))
+        ax2.legend(loc='center right', shadow=False, fontsize='large',bbox_to_anchor=(1.2, .5))
+        
+        ax2.set_xlabel('Calendar Day in '+year0,fontsize=14)
+            
+        ax1.set_title('Aerosol Number Concentration (cm$^{-3}$)',fontsize=15)
+        
+        fig.text(.08, .999,'trip # '+legnum, fontsize=12)
+        
+        # mask non-ocean model grid (ps is inconsistent with obs)
+        ax1.vlines(timem[datamask],ylim1[0],ylim1[1],color='lightgray')
+        ax2.vlines(timem[datamask],ylim2[0],ylim2[1],color='lightgray')
+        
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        plt.close()
\ No newline at end of file
diff --git a/src/esmac_diags/plotting/plot_ship_timeseries_met.py b/src/esmac_diags/plotting/plot_ship_timeseries_met.py
new file mode 100644
index 0000000..c4b95c6
--- /dev/null
+++ b/src/esmac_diags/plotting/plot_ship_timeseries_met.py
@@ -0,0 +1,206 @@
+"""
+# plot timeseries of basic meteorological variables along ship track
+"""
+
+import os
+import glob
+import matplotlib.pyplot as plt
+import numpy as np
+from ..subroutines.read_ship import read_marmet
+from ..subroutines.read_ARMdata import read_met
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.time_format_change import yyyymmdd2cday,  cday2mmdd
+from ..subroutines.specific_data_treatment import  avg_time_1d
+
+def run_plot(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    color_model = settings['color_model']
+    shipmetpath = settings['shipmetpath']
+    E3SM_ship_path = settings['E3SM_ship_path']
+    figpath_ship_timeseries = settings['figpath_ship_timeseries']
+
+    #%% other settings
+    
+    if not os.path.exists(figpath_ship_timeseries):
+        os.makedirs(figpath_ship_timeseries)
+    
+    
+    lst = glob.glob(E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[0]+'_shipleg*.nc')
+    lst.sort()
+    
+    for ll in range(len(lst)):
+        
+        #%% for MAGIC, read each ship leg
+        if campaign=='MAGIC':
+            legnum=lst[ll][-5:-3]
+            filenameo = shipmetpath+'marmet'+legnum+'.txt'
+            (shipdata,shipvarlist) = read_marmet(filenameo)
+            year=[a[1] for a in shipdata]
+            month=[a[2] for a in shipdata]
+            day=[a[3] for a in shipdata]
+            hh=[int(a[4]) for a in shipdata]
+            mm=[int(a[5]) for a in shipdata]
+            ss=[int(a[6]) for a in shipdata]
+            yyyymmdd = [year[i]+month[i]+day[i] for i in range(len(year))]   # yyyymmdd
+            # get time in calendar day
+            time = np.array(hh)/24. + np.array(mm)/1440. + np.array(ss)/86400. 
+            time = np.array([time[i] + yyyymmdd2cday(yyyymmdd[i],'noleap') for i in range(len(time))])
+            if time[-1]<time[0]:
+                time[time<=time[-1]]=time[time<=time[-1]]+365
+            
+            
+            # get variables
+            ps=np.array([float(a[shipvarlist.index('bp')]) for a in shipdata])    
+            rh=np.array([float(a[shipvarlist.index('rh')]) for a in shipdata]) 
+            ta=np.array([float(a[shipvarlist.index('ta')]) for a in shipdata]) 
+            rain=np.array([float(a[shipvarlist.index('org')]) for a in shipdata]) 
+            lat=np.array([float(a[7]) for a in shipdata])
+        
+            lat[lat==-999]=np.nan
+            ps[ps==-999]=np.nan
+            rh[rh==-999]=np.nan
+            ta[ta==-999]=np.nan
+            rain[rain==-999]=np.nan
+            
+            # rain needs to be averaged into 1-hr timewindow for comparison with model
+            time1hr = np.arange(int(time[0]*24),int(time[-1]*24)+1)/24.
+            rain2=avg_time_1d(time,rain,time1hr)
+            
+            # rain rate in leg 19 are unrealistic. mask all data
+            if legnum=='19':
+                rain=rain*np.nan
+                rain2=rain2*np.nan
+        
+        #%% for MARCUS, read files for each vessel trip
+        elif campaign=='MARCUS':
+            legnum=lst[ll][-4]
+            if legnum=='1':
+                startdate='2017-10-30'
+                enddate='2017-12-02'
+            elif legnum=='2':
+                startdate='2017-12-13'
+                enddate='2018-01-11'
+            elif legnum=='3':
+                startdate='2018-01-16'
+                enddate='2018-03-04'
+            elif legnum=='4':
+                startdate='2018-03-09'
+                enddate='2018-03-22'
+                
+            year=[startdate[0:4]]
+            
+            cday1=yyyymmdd2cday(startdate,'noleap')
+            cday2=yyyymmdd2cday(enddate,'noleap')
+            if startdate[0:4]!=enddate[0:4]:
+                cday2=cday2+365  # cover two years
+    
+            time=np.empty(0)
+            lon=np.empty(0)
+            lat=np.empty(0)
+            ta=np.empty(0)
+            rh=np.empty(0)
+            ps=np.empty(0)
+            for cc in range(cday1,cday2+1):
+                if cc<=365:
+                    yyyymmdd=startdate[0:4]+cday2mmdd(cc)
+                else:
+                    yyyymmdd=enddate[0:4]+cday2mmdd(cc-365)
+                    
+                lst0 = glob.glob(shipmetpath+'maraadmetX1.b1.'+yyyymmdd+'*')
+                (time0,lon0,timeunit,lonunit,lon_long_name)=read_met(lst0[0],'lon')
+                (time0,lat0,timeunit,lonunit,lon_long_name)=read_met(lst0[0],'lat')
+                (time0,ta1,timeunit,taunit,ta_long_name)=read_met(lst0[0],'air_temperature_port')
+                (time0,ta2,timeunit,taunit,ta_long_name)=read_met(lst0[0],'air_temperature_starboard')
+                (time0,rh1,timeunit,rhunit,rh_long_name)=read_met(lst0[0],'relative_humidity_port')
+                (time0,rh2,timeunit,rhunit,rh_long_name)=read_met(lst0[0],'relative_humidity_starboard')
+                (time0,ps0,timeunit,psunit,ps_long_name)=read_met(lst0[0],'atmospheric_pressure')
+                
+                time = np.hstack((time, time0/86400. + cc))
+                lat = np.hstack((lat,lat0))
+                lon = np.hstack((lon,lon0))
+                ta = np.hstack((ta,(ta1+ta2)/2))
+                rh = np.hstack((rh,(rh1+rh2)/2))
+                ps = np.hstack((ps,ps0))
+                
+            ps[ps<=-999]=np.nan
+            rh[rh<=-999]=np.nan
+            ta[ta<=-999]=np.nan
+            lat[lat<=-999]=np.nan
+            
+            
+        #%% read in model
+        nmodels=len(Model_List)
+        T_m = list()
+        RH_m = list()
+        ps_m = list()
+        rain_m = list()
+        for mm in range(nmodels):
+            filenamem = E3SM_ship_path+'Ship_vars_'+campaign+'_'+Model_List[mm]+'_shipleg'+legnum+'.nc'
+        
+            (timem,varm,timeunitm,varmunit,varmlongname)=read_E3SM(filenamem,['T','RELHUM','PS','PRECT','TREFHT'])
+        
+            T_m.append(varm[0]-273.16)
+            RH_m.append(varm[1])
+            ps_m.append(varm[2]*0.01)
+            rain_m.append(varm[3]*3600*1000)
+            
+        # change the unit
+        varmunit[0]='C'
+        varmunit[2]='hPa'
+        varmunit[3]='mm/hr'
+        varmlongname[3]='Rainrate'
+            
+        if len(time)!=len(timem):
+            raise ValueError('model and observation have inconsistent time dimension')
+        
+        #%% make plot
+            
+        figname = figpath_ship_timeseries+'timeseries_met_'+campaign+'_ship'+legnum+'.png'
+        print('plotting figures to '+figname)
+        
+        fig,(ax0,ax1,ax2,ax3) = plt.subplots(4,1,figsize=(8,7))   # figsize in inches
+        plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.5)   #pad=0.4, w_pad=0.5, h_pad=1.0
+        
+        ax0.plot(time,lat,color='k')
+        ax0.tick_params(color='k',labelsize=12)
+        
+        ax1.plot(time,ta,color='k',linewidth=1,label='OBS')
+        for mm in range(nmodels):
+            ax1.plot(time, T_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        ax1.tick_params(color='k',labelsize=12)
+        
+        ax2.plot(time,rh,color='k',linewidth=1,label='OBS')
+        for mm in range(nmodels):
+            ax2.plot(time, RH_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        ax2.tick_params(color='k',labelsize=12)
+        
+        ax3.plot(time,ps,color='k',linewidth=1,label='OBS')
+        for mm in range(nmodels):
+            ax3.plot(time, ps_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        ax3.tick_params(color='k',labelsize=12)
+        ax3.set_ylim(int(min(np.nanmin(ps), np.nanmin(ps_m[0]-2))),int(max(np.nanmax(ps),np.nanmax(ps_m[0])))+2)
+        
+        # # ax4.plot(time,rain,color='k',linewidth=1,label='OBS')
+        # ax4.plot(time1hr,rain2,color='k',linewidth=1,label='OBS')  # 1hr averaged rain. 
+        # for mm in range(nmodels):
+        #     ax4.plot(time, rain_m[mm],color=color_model[mm],linewidth=1, label=Model_List[mm])
+        # ax4.tick_params(color='k',labelsize=12)
+        
+        ax0.set_xticklabels([])
+        ax1.set_xticklabels([])
+        ax2.set_xticklabels([])
+        ax3.set_xlabel('Calenday Day in '+year[0],fontsize=14)
+        ax0.set_title('latitude',fontsize=15)
+        ax1.set_title(varmlongname[0]+' ('+varmunit[0]+')',fontsize=15)
+        ax2.set_title(varmlongname[1]+' ('+varmunit[1]+')',fontsize=15)
+        ax3.set_title(varmlongname[2]+' ('+varmunit[2]+')',fontsize=15)
+        # ax4.set_title(varmlongname[3]+' ('+varmunit[3]+')',fontsize=15)
+        
+        ax3.legend(loc='lower right', shadow=False, fontsize='large')
+        
+        fig.text(.1, .999,'trip # '+legnum, fontsize=15)
+        
+        fig.savefig(figname,dpi=fig.dpi,bbox_inches='tight', pad_inches=1)
+        plt.close()
\ No newline at end of file
diff --git a/src/esmac_diags/preprocessing/__init__.py b/src/esmac_diags/preprocessing/__init__.py
new file mode 100644
index 0000000..2d659f2
--- /dev/null
+++ b/src/esmac_diags/preprocessing/__init__.py
@@ -0,0 +1,4 @@
+import glob
+from os.path import basename, dirname, join
+all_files = glob.glob(join(dirname(__file__), '*.py'))
+__all__ = [basename(fname)[:-3] for fname in all_files if not fname.endswith('__.py')]
diff --git a/src/esmac_diags/preprocessing/prep_E3SM_flighttrack_allvars.py b/src/esmac_diags/preprocessing/prep_E3SM_flighttrack_allvars.py
new file mode 100644
index 0000000..1f060ab
--- /dev/null
+++ b/src/esmac_diags/preprocessing/prep_E3SM_flighttrack_allvars.py
@@ -0,0 +1,213 @@
+"""
+# prepare E3SM aerosol variables for flight tracks
+# input data is IWG measurements from aircraft and E3SM regional output
+# output is aerosol variables for each flight
+"""
+
+import glob
+import os
+import numpy as np
+from ..subroutines.time_format_change import hhmmss2sec, timeunit2cday
+from ..subroutines.read_aircraft import read_iwg1, read_RF_NCAR
+from ..subroutines.read_netcdf import read_E3SM
+from netCDF4 import Dataset
+
+def find_nearest(xall, yall, x, y):
+    distance = np.square(xall-x) + np.square(yall-y)
+    idx = distance.argmin()
+    return(idx)
+
+def run_prep(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    E3SM_hourly_path = settings['E3SM_hourly_path']
+    E3SM_hourly_filehead = settings['E3SM_hourly_filehead']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+
+    if campaign in ['HISCALE', 'ACEENA']:
+        IOP = settings['IOP']
+        iwgpath = settings['iwgpath']
+    elif campaign in ['CSET', 'SOCRATES']:
+        RFpath = settings['RFpath']
+    else:
+        raise ValueError('this aircraft campaign is not recognized: ' + campaign)
+
+    #%% other settings
+    
+    if not os.path.exists(E3SM_aircraft_path):
+        os.makedirs(E3SM_aircraft_path)
+    
+    
+    #%% find all flight data
+    if campaign == 'HISCALE':
+        lst = glob.glob(iwgpath + '*a2.txt')
+        lst.sort()
+        if IOP == 'IOP1':
+            lst = lst[0:17]
+        elif IOP == 'IOP2':
+            lst = lst[17:]
+        elif IOP[0:4] == '2016':
+            a = lst[0].split('_' + campaign + '_')
+            lst = glob.glob(a[0] + '*' + IOP + '*')
+            lst.sort()
+    elif campaign == 'ACEENA':
+        lst = glob.glob(iwgpath + '*a2.txt')
+        lst.sort()
+        if IOP == 'IOP1':
+            lst = lst[0:20]
+        elif IOP == 'IOP2':
+            lst = lst[20:]
+        elif IOP[0:4] == '2017' or IOP[0:4] == '2018':
+            a = lst[0].split('_' + campaign + '_')
+            lst = glob.glob(a[0] + '*' + IOP + '*')
+            lst.sort()
+    elif campaign in ['CSET', 'SOCRATES']:
+        lst = glob.glob(RFpath + 'RF*.PNI.nc')
+        lst.sort()
+    else:
+        raise ValueError('this aircraft campaign is not recognized: ' + campaign)
+        
+    print('total number of files:' + str(len(lst)))
+    
+    for filename in lst:
+        
+        fname = filename.split('.')
+        #%% read in flight data
+        if campaign in ['HISCALE', 'ACEENA']:
+            date = fname[-3]
+            print('input data for ' + date)
+            # year = date[0:4]
+            # month = date[4:6]
+            
+            (flight, flightvars) = read_iwg1(filename)
+            timelen = len(flight)
+            # get lat, lon, height, time
+            lon = np.empty(timelen)
+            lat = np.empty(timelen)
+            height = np.empty(timelen)
+            time = np.empty(timelen)
+            if np.logical_and(campaign == 'ACEENA', date == '20180216a'):
+                flight.insert(1403, list(flight[1403]))
+                tstr = flight[1403][1]
+                tstr = tstr[0:-1] + str(int(tstr[-1])-1)
+                flight[1403][1] = tstr
+                del flight[-1]
+            for t in range(timelen):
+                lat[t] = float(flight[t][2])
+                lon[t] = float(flight[t][3]) + 360
+                height[t] = float(flight[t][4])
+                timestr = flight[t][1].split(' ')
+                time[t] = hhmmss2sec(timestr[1])
+        
+        elif campaign in ['CSET', 'SOCRATES']:
+            date = fname[-4]
+            print('input data for ' + date)
+            # year = date[0:4]
+            # month = date[4:6]
+            (time, height, timeunit, hunit, hlongname, cellsize, cellunit) = read_RF_NCAR(filename, 'ALT')
+            (time, lat, timeunit, latunit, latlongname, cellsize, cellunit) = read_RF_NCAR(filename, 'LAT')
+            (time, lon, timeunit, lonunit, lonlongname, cellsize, cellunit) = read_RF_NCAR(filename, 'LON')
+            lon[lon<0] = lon[lon<0] + 360
+            
+        #%% set variables and model region
+        for mm in range(len(Model_List)):
+            model = Model_List[mm]
+            variable_names = ['T', 'U', 'V', 'Q', 'RELHUM', 'RHW', 'RHI', 'CLOUD', 'CLDLIQ', 'NUMLIQ',
+                              'CLDICE', 'NUMICE', 'RAINQM', 'NUMRAI', 'AWNI', 'AWNC', 'AQRAIN', 'AQSNOW',
+                              'AREI', 'AREL', 'FICE', 'IWC', 'LWC', 'ICLDIWP', 'ICLDTWP', 'ICWNC', 'ICINC',
+                              'WP2_CLUBB', 'CCN1', 'CCN3', 'CCN5', 'bc_a1', 'bc_a3', 'bc_a4', 'dst_a1', 'dst_a3',
+                              'mom_a1', 'mom_a2', 'mom_a3', 'mom_a4', 'ncl_a1', 'ncl_a2', 'ncl_a3',
+                              'pom_a1', 'pom_a3', 'pom_a4', 'so4_a1', 'so4_a2', 'so4_a3',
+                              'soa_a1', 'soa_a2', 'soa_a3', 'num_a1', 'num_a2', 'num_a3', 'num_a4',
+                              'num_c1', 'num_c2', 'num_c3', 'num_c4', "dgnd_a01", "dgnd_a02", "dgnd_a03", "dgnd_a04",
+                              "dgnw_a01", "dgnw_a02", "dgnw_a03", "dgnw_a04", 'EXTINCT', 'ABSORB']
+        
+            if model == 'NucSoaCond': # with so4 and soa in nucleation mode
+                variable_names = variable_names + ['so4_a5', 'soa_a5', 'num_a5', 'num_c5', "dgnd_a05", "dgnw_a05"]
+            elif model == 'Nuc':      # only with so4 in nucleation mode
+                variable_names = variable_names + ['so4_a5', 'num_a5', 'num_c5', "dgnd_a05", "dgnw_a05"]
+            varlen = len(variable_names)
+            
+            if campaign == 'HISCALE':
+                E3SMdomain_range = '260e_to_265e_34n_to_39n'    # domain range in E3SM regional output
+            elif campaign == 'ACEENA':
+                E3SMdomain_range = '330e_to_335e_37n_to_42n'   
+            elif campaign == 'CSET':
+                E3SMdomain_range = '202e_to_240e_19n_to_40n'   
+            elif campaign == 'SOCRATES':
+                E3SMdomain_range = '133e_to_164e_42s_to_63s'   
+            else:
+                raise ValueError('this aircraft campaign is not recognized: ' + campaign)
+            
+            #%% read in E3SM data
+            variables_out = list()
+            pblh_out = list()
+            for varname in variable_names:
+                variables_out.append([])
+            
+            date2 = date[0:4] + '-' + date[4:6] + '-' + date[6:8]
+            filename_input = E3SM_hourly_path[mm] + E3SM_hourly_filehead[mm] + '.cam.h3.' + date2 + '-00000.nc'
+        
+            (timem, lonm, timeunitm, lonmunit, lonmname) = read_E3SM(filename_input, 'lon_' + E3SMdomain_range)
+            (timem, latm, timeunitm, latmunit, latmname) = read_E3SM(filename_input, 'lat_' + E3SMdomain_range)
+            (timem, z3, timeunitm, zunit, zname) = read_E3SM(filename_input, 'Z3_' + E3SMdomain_range)
+            (timem, pblh, timeunitm, pblhunit, pblhname) = read_E3SM(filename_input, 'PBLH_' + E3SMdomain_range)
+            # read in all variables
+            (timem, variables, timeunitm, var_units, var_longnames) = \
+                         read_E3SM(filename_input, [a + '_' + E3SMdomain_range for a in variable_names])
+                         
+            # cdaym = timeunit2cday(timeunitm, 'noleap')
+            # yearm = timeunitm.split(' ')[2][0:4]
+            timem = 86400* (timem.data - int(timem[0]))
+            
+            for tt in range(len(time)):
+                t_idx = np.abs(timem-time[tt]).argmin()
+                x_idx = find_nearest(lonm, latm, lon[tt], lat[tt])
+                z_idx = np.abs(z3[t_idx, :, x_idx]-height[tt]).argmin()
+                for vv in range(varlen):
+                    variables_out[vv].append(variables[vv][t_idx, z_idx, x_idx])
+                pblh_out.append(pblh[t_idx, x_idx])
+                
+             # %% output extacted file
+            outputname = 'Aircraft_vars_' + campaign + '_' + model + '_' + date + '.nc'
+            print('output to this file: ' + E3SM_aircraft_path + outputname)
+            
+            # define filename
+            f = Dataset(E3SM_aircraft_path + outputname, 'w', format = 'NETCDF4')
+            
+            # define dimensions
+            t = f.createDimension('time', None)  # unlimited
+            
+            # create variable list
+            time_o = f.createVariable("time", "f8", ("time",))
+            height_o = f.createVariable("height", 'f8', ("time",))
+            pblh_o = f.createVariable('PBLH', 'f8', ("time",))
+            var_o = list()
+            for vv in range(varlen):
+                var_o.append (f.createVariable(variable_names[vv], 'f8', ("time", )))
+            
+            # write data
+            time_o[:] = time
+            height_o[:] = height
+            pblh_o[:] = np.array(pblh_out)
+            for vv in range(varlen):
+                var_o[vv][:] = np.array(variables_out[vv])
+            
+            # attributes
+            time_o.units = "Seconds since " + date2 + ' 00:00 UTC'
+            height_o.units = 'm MSL'
+            pblh_o.units = pblhunit
+            pblh_o.long_name = pblhname
+            for vv in range(varlen):
+                var_o[vv].units = var_units[vv]
+                var_o[vv].long_name = var_longnames[vv]
+            
+            # global attributes
+            import time as ttt
+            f.description = model + " extact for aircraft track for " + campaign
+            f.aircraftfile = filename.split('\\')[-1]
+            f.create_time = ttt.ctime(ttt.time())
+            
+            f.close()
+            
diff --git a/src/esmac_diags/preprocessing/prep_E3SM_flighttrack_bins.py b/src/esmac_diags/preprocessing/prep_E3SM_flighttrack_bins.py
new file mode 100644
index 0000000..01d81c7
--- /dev/null
+++ b/src/esmac_diags/preprocessing/prep_E3SM_flighttrack_bins.py
@@ -0,0 +1,239 @@
+"""
+# prepare E3SM aerosol size distribution for flight tracks
+# input data is IWG measurements from aircraft and E3SM regional output
+# output is  aerosol size distribution for each flight
+"""
+
+import glob
+import os
+import numpy as np
+from ..subroutines.time_format_change import hhmmss2sec, timeunit2cday
+from ..subroutines.read_aircraft import read_iwg1, read_RF_NCAR
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.CN_mode_to_size import calc_CNsize_cutoff_0_3000nm
+from netCDF4 import Dataset
+
+
+def find_nearest(xall,yall,x,y):
+    distance = np.square(xall-x) + np.square(yall-y)
+    idx = distance.argmin()
+    return(idx)
+
+
+def run_prep(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    Model_List = settings['Model_List']
+    E3SM_hourly_path = settings['E3SM_hourly_path']
+    E3SM_hourly_filehead = settings['E3SM_hourly_filehead']
+    E3SM_aircraft_path = settings['E3SM_aircraft_path']
+
+    if campaign in ['HISCALE', 'ACEENA']:
+        IOP = settings['IOP']
+        iwgpath = settings['iwgpath']
+    elif campaign in ['CSET', 'SOCRATES']:
+        RFpath = settings['RFpath']
+    else:
+        raise ValueError('this aircraft campaign is not recognized: ' + campaign)
+
+    #%% other settings
+    
+    if not os.path.exists(E3SM_aircraft_path):
+        os.makedirs(E3SM_aircraft_path)
+    
+    
+    if campaign=='HISCALE':
+        E3SMdomain_range='260e_to_265e_34n_to_39n'    # domain range in E3SM regional output
+    elif campaign=='ACEENA':
+        E3SMdomain_range='330e_to_335e_37n_to_42n'   
+    elif campaign=='CSET':
+        E3SMdomain_range='202e_to_240e_19n_to_40n'   
+    elif campaign=='SOCRATES':
+        E3SMdomain_range='133e_to_164e_42s_to_63s'  
+    else:
+        raise ValueError('this aircraft campaign is not recognized: '+campaign)
+    
+    #%% find all flight data
+    if campaign=='HISCALE':
+        lst = glob.glob(iwgpath+'*a2.txt')
+        lst.sort()
+        if IOP=='IOP1':
+            lst=lst[0:17]
+        elif IOP=='IOP2':
+            lst=lst[17:]
+        elif IOP[0:4]=='2016':
+            a=lst[0].split('_'+campaign+'_')
+            lst = glob.glob(a[0]+'*'+IOP+'*')
+            lst.sort()
+    elif campaign=='ACEENA':
+        lst = glob.glob(iwgpath+'*a2.txt')
+        lst.sort()
+        if IOP=='IOP1':
+            lst=lst[0:20]
+        elif IOP=='IOP2':
+            lst=lst[20:]
+        elif IOP[0:4]=='2017' or IOP[0:4]=='2018':
+            a=lst[0].split('_'+campaign+'_')
+            lst = glob.glob(a[0]+'*'+IOP+'*')
+            lst.sort()
+    elif campaign in ['CSET', 'SOCRATES']:
+        lst = glob.glob(RFpath+'RF*.PNI.nc')
+        lst.sort()
+    else:
+        raise ValueError('this aircraft campaign is not recognized: '+campaign)
+        
+    print('total number of files:'+str(len(lst)))
+    
+    for filename in lst:
+        
+        fname=filename.split('.')
+        #%% read in flight data
+        if campaign in ['HISCALE', 'ACEENA']:
+            date=fname[-3]
+            print('input data for '+date)
+            # year=date[0:4]
+            # month=date[4:6]
+            
+            (flight,flightvars)=read_iwg1(filename)
+            timelen = len(flight)
+            # get lat, lon, height, time
+            lon=np.empty(timelen)
+            lat=np.empty(timelen)
+            height=np.empty(timelen)
+            time=np.empty(timelen)
+            if np.logical_and(campaign=='ACEENA', date=='20180216a'):
+                flight.insert(1403,list(flight[1403]))
+                tstr=flight[1403][1]
+                tstr=tstr[0:-1]+str(int(tstr[-1])-1)
+                flight[1403][1]=tstr
+                del flight[-1]
+            for t in range(timelen):
+                lat[t]=float(flight[t][2])
+                lon[t]=float(flight[t][3])+360
+                height[t]=float(flight[t][4])
+                timestr=flight[t][1].split(' ')
+                time[t]=hhmmss2sec(timestr[1])
+        
+        elif campaign in ['CSET', 'SOCRATES']:
+            date=fname[-4]
+            print('input data for '+date)
+            # year=date[0:4]
+            # month=date[4:6]
+            (time,height,timeunit,hunit,hlongname,cellsize,cellunit)=read_RF_NCAR(filename,'ALT')
+            (time,lat,timeunit,latunit,latlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LAT')
+            (time,lon,timeunit,lonunit,lonlongname,cellsize,cellunit)=read_RF_NCAR(filename,'LON')
+            lon[lon<0]=lon[lon<0]+360
+            timelen = len(time)
+        
+            #%% read in E3SM data
+        for mm in range(len(Model_List)):
+            model=Model_List[mm]  
+            date2 = date[0:4]+'-'+date[4:6]+'-'+date[6:8]
+            filename_input = E3SM_hourly_path[mm]+E3SM_hourly_filehead[mm]+'.cam.h3.'+date2+'-00000.nc'
+        
+            (timem,lonm,timeunitm,lonmunit,lonmname)=read_E3SM(filename_input,'lon_'+E3SMdomain_range)
+            (timem,latm,timeunitm,latmunit,latmname)=read_E3SM(filename_input,'lat_'+E3SMdomain_range)
+            (timem,z3,timeunitm,latmunit,latmname)=read_E3SM(filename_input,'Z3_'+E3SMdomain_range)
+            # do not use read_E3SM because hyam and hybm don't have units
+            f = Dataset(filename_input,'r')
+            P0 = f.variables['P0'][:]
+            hyam = f.variables['hyam'][:]
+            hybm = f.variables['hybm'][:]
+            T = f.variables['T_'+E3SMdomain_range][:]
+            PS = f.variables['PS_'+E3SMdomain_range][:]
+            num_a1 = f.variables['num_a1_'+E3SMdomain_range][:]
+            num_a2 = f.variables['num_a2_'+E3SMdomain_range][:]
+            num_a3 = f.variables['num_a3_'+E3SMdomain_range][:]
+            num_a4 = f.variables['num_a4_'+E3SMdomain_range][:]
+            dn1 = f.variables['dgnd_a01_'+E3SMdomain_range][:]
+            dn2 = f.variables['dgnd_a02_'+E3SMdomain_range][:]
+            dn3 = f.variables['dgnd_a03_'+E3SMdomain_range][:]
+            dn4 = f.variables['dgnd_a04_'+E3SMdomain_range][:]
+            if model[0:3]=='Nuc':  # with nucleation mode
+                num_a5 = f.variables['num_a5_'+E3SMdomain_range][:]
+                dn5 = f.variables['dgnd_a05_'+E3SMdomain_range][:]
+            f.close()
+            
+            Pres = np.nan*T
+            zlen=T.shape[1]
+            for kk in range(zlen):
+                Pres[:,kk,:] = hyam[kk]*P0 + hybm[kk]*PS
+        
+            #% find the nearest time and height of the aircraft measurements
+            cdaym = timeunit2cday(timeunitm,'noleap')
+            timem = 86400* (timem.data - int(timem[0]))
+            NCNall=np.full((3000,timelen),np.nan)
+            tzx0 = [0,0,0]
+            t0 = 0
+            for tt in range(timelen):
+                t_idx = np.abs(timem-time[tt]).argmin()
+                x_idx = find_nearest(lonm,latm,lon[tt],lat[tt])
+                z_idx = np.abs(z3[t_idx,:,x_idx]-height[tt]).argmin()
+                
+                # copy the same grid to save time
+                if [t_idx,x_idx,z_idx]==tzx0:
+                    NCNall[:,tt] = NCNall[:,t0]
+                else:
+                    numall = [num_a1[t_idx,z_idx,x_idx],num_a2[t_idx,z_idx,x_idx],num_a3[t_idx,z_idx,x_idx],num_a4[t_idx,z_idx,x_idx]]
+                    dnall = [dn1[t_idx,z_idx,x_idx],dn2[t_idx,z_idx,x_idx],dn3[t_idx,z_idx,x_idx],dn4[t_idx,z_idx,x_idx]]
+                    if model[0:3]=='Nuc':  # with nucleation mode
+                        numall.append(num_a5[t_idx,z_idx,x_idx])
+                        dnall.append(dn5[t_idx,z_idx,x_idx])
+                    NCNall[:,tt] = calc_CNsize_cutoff_0_3000nm(dnall,numall,T[t_idx,z_idx,x_idx],Pres[t_idx,z_idx,x_idx])
+                    # update the time of this unique grid
+                    tzx0=[t_idx,x_idx,z_idx]
+                    t0=tt
+    
+            # calculate total CN concentration for CPC (>10nm) and CPCU (>3nm)
+            NUCN = np.nansum(NCNall[3:,:],0)    # >3nm
+            NCN = np.nansum(NCNall[10:,:],0)    # >10nm
+            
+                
+            #%% output extacted file
+            outputname = 'Aircraft_CNsize_'+campaign+'_'+model+'_'+date+'.nc'
+            print('output to this file: '+E3SM_aircraft_path+outputname)
+            
+            # define filename
+            f = Dataset(E3SM_aircraft_path+outputname, 'w', format='NETCDF4')
+            
+            # define dimensions
+            t = f.createDimension('time', None)  # unlimited
+            size=f.createDimension('size',3000)
+            
+            # create variable list
+            time_o = f.createVariable("time","f8",("time",))
+            height_o = f.createVariable("height",'f8',("time",))
+            size_o = f.createVariable("size",'i8',("size"))
+            
+            data_o = f.createVariable('NCNall','f8',("size","time"))
+            ncn_o = f.createVariable("NCN","f8",("time",))
+            nucn_o = f.createVariable("NUCN","f8",("time",))
+            
+            # write data
+            time_o[:] = time
+            height_o[:] = height
+            size_o[:] = np.arange(1,3001)
+            data_o[:,:]=NCNall
+            ncn_o[:]=NCN
+            nucn_o[:]=NUCN
+            
+            # attributes
+            time_o.units = "Seconds since "+date2+' 00:00 UTC'
+            height_o.units = 'm MSL'
+            size_o.units = 'nm'
+            size_o.long_name="0 to 3000nm with 1nm increment"
+            data_o.units = '#/m3'
+            data_o.long_name = 'aerosol size distribution'
+            ncn_o.units = '#/m3'
+            ncn_o.long_name = 'aerosol number concentration for size >10nm'
+            nucn_o.units = '#/m3'
+            nucn_o.long_name = 'aerosol number concentration for size >3nm'
+            
+            # global attributes
+            import time as ttt
+            f.description = model+" extact for aircraft track for "+campaign
+            f.aircraftfile = filename.split('\\')[-1]
+            f.create_time = ttt.ctime(ttt.time())
+            
+            f.close()
+    
diff --git a/src/esmac_diags/preprocessing/prep_E3SM_profile_allvars.py b/src/esmac_diags/preprocessing/prep_E3SM_profile_allvars.py
new file mode 100644
index 0000000..59024eb
--- /dev/null
+++ b/src/esmac_diags/preprocessing/prep_E3SM_profile_allvars.py
@@ -0,0 +1,145 @@
+"""
+# prepare E3SM vertical profiles at ARM sites
+# input data is E3SM regional output
+# output is variables at the nearest column
+"""
+
+import os
+import numpy as np
+from ..subroutines.time_format_change import timeunit2cday, yyyymmdd2cday, cday2mmdd
+from ..subroutines.read_netcdf import read_E3SM
+from netCDF4 import Dataset
+
+def find_nearest(xall, yall, x, y):
+    distance = np.square(xall-x) + np.square(yall-y)
+    idx = distance.argmin()
+    return(idx)
+
+def run_prep(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    lat0 = settings['lat0']
+    lon0 = settings['lon0']
+    site = settings['site']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    Model_List = settings['Model_List']
+    E3SM_hourly_path = settings['E3SM_hourly_path']
+    E3SM_hourly_filehead = settings['E3SM_hourly_filehead']
+    E3SM_profile_path = settings['E3SM_profile_path']
+
+    #%% other settings
+    
+    # output height above ground. data will be interpolated into z_f
+    z_f = np.hstack((np.arange(0, 500, 50), np.arange(500, 2000, 100), np.arange(2000, 5000, 300),
+                     np.arange(5000, 10000, 500), np.arange(10000, 20001, 1000)))
+    zlen = len(z_f)
+    
+    if not os.path.exists(E3SM_profile_path):
+        os.makedirs(E3SM_profile_path)
+    
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date, 'noleap')
+    cday2 = yyyymmdd2cday(end_date, 'noleap')
+    if start_date[0:4]!=end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+    
+    #%% set variables for profiles
+    variable_names = ['T', 'U', 'V', 'Q', 'RELHUM', 'RHW', 'RHI', 'CLOUD', 'LWC', 'IWC',
+                'CLDLIQ', 'CLDICE', 'NUMLIQ', "AREI",  "AREL",  "ICLDIWP",  "ICLDTWP"]
+    varlen = len(variable_names)
+        
+    if site=='SGP':
+        E3SMdomain_range = '260e_to_265e_34n_to_39n'    # domain range in E3SM regional output
+    elif site=='ENA':
+        E3SMdomain_range = '330e_to_335e_37n_to_42n'   
+    else:
+        raise ValueError('data for this site is not specified: ' + site)
+    
+    for mm in range(len(Model_List)):
+        model = Model_List[mm]
+        
+        #%%  process data
+        for cday in range(cday1, cday2 + 1):
+            mmdd = cday2mmdd(cday)
+            date = year0 + '-' + mmdd[0:2] + '-' + mmdd[2:4]
+            
+            # read in E3SM data
+            variables = list()
+            var_units = list()
+            var_longnames = list()
+             
+            filename_input = E3SM_hourly_path[mm] + E3SM_hourly_filehead[mm] + '.cam.h3.' + date + '-00000.nc'
+        
+            (timem, lonm, timeunitm, lonmunit, lonmname) = read_E3SM(filename_input, 'lon_' + E3SMdomain_range)
+            (timem, latm, timeunitm, latmunit, latmname) = read_E3SM(filename_input, 'lat_' + E3SMdomain_range)
+            (timem, z3, timeunitm, zunit, zname) = read_E3SM(filename_input, 'Z3_' + E3SMdomain_range)
+            
+            x_idx = find_nearest(lonm, latm, lon0, lat0)
+            zm = z3[:, :, x_idx]
+            
+            # read in all variables
+            (timem, var2d, timeunitm, var2dunit, var2dlongname) = \
+                         read_E3SM(filename_input, [a + '_' + E3SMdomain_range for a in variable_names])
+              
+            tlen = len(timem)
+            for vv in range(varlen):
+                var = var2d[vv][:, :, x_idx]
+                var2 = np.full((tlen, zlen), np.nan)
+                for tt in range(tlen):
+                    # interpolate height above sea level to height above ground
+                    var2[tt, :] = np.interp(z_f, np.flip(zm[tt, :]-zm[tt, -1]), np.flip(var[tt, :]))
+                variables.append(var2) 
+                var_units.append(var2dunit[vv])
+                var_longnames.append(var2dlongname[vv])      
+        
+            cdaym = timeunit2cday(timeunitm, 'noleap')
+            yearm = timeunitm.split(' ')[2][0:4]
+            time = timem.data - 365*(int(year0)-int(yearm)) + cdaym
+            
+        
+            # %% output extacted file
+            outputname = 'Profile_vars_' + campaign + '_' + model + '.' + date + '.nc'
+            print('output to this file: ' + E3SM_profile_path + outputname)
+            
+            # define filename
+            f = Dataset(E3SM_profile_path + outputname, 'w', format = 'NETCDF4')
+            
+            # define dimensions
+            t = f.createDimension('time', None)  # unlimited
+            z = f.createDimension('height', zlen)
+            
+            # create variable list
+            time_o = f.createVariable("time", "f8", ("time", ))
+            height_o = f.createVariable("height", "f8", ("height", ))
+            lat_o = f.createVariable("lat", "f8", ())
+            lon_o = f.createVariable("lon", "f8", ())
+            var_o = list()
+            for vv in range(varlen):
+                var_o.append (f.createVariable(variable_names[vv], 'f8', ("time", "height")))
+            
+            # write data
+            time_o[:] = time
+            height_o[:] = z_f
+            lat_o[:] = latm[x_idx]
+            lon_o[:] = lonm[x_idx]
+            for vv in range(varlen):
+                var_o[vv][:] = np.array(variables[vv])
+            
+            # attributes
+            time_o.units = "days since " + str(int(year0)-1) + "-12-31 00:00:00 UTC"
+            lat_o.units = "latitude"
+            lon_o.units = "longitude"
+            height_o.units = "gpm above ground"
+            for vv in range(varlen):
+                var_o[vv].units = var_units[vv]
+                var_o[vv].long_name = var_longnames[vv]
+            
+            # global attributes
+            import time as ttt
+            f.description = model + " extact vertical variables for " + campaign
+            f.modeldata = filename_input
+            f.create_time = ttt.ctime(ttt.time())
+            
+            f.close()
diff --git a/src/esmac_diags/preprocessing/prep_E3SM_sfc_allvars.py b/src/esmac_diags/preprocessing/prep_E3SM_sfc_allvars.py
new file mode 100644
index 0000000..4f8bdb8
--- /dev/null
+++ b/src/esmac_diags/preprocessing/prep_E3SM_sfc_allvars.py
@@ -0,0 +1,146 @@
+"""
+# prepare E3SM surface aerosol properties at ARM sites
+# input data is E3SM regional output
+# output is surface variables at the nearest grid
+"""
+
+import os
+import numpy as np
+from ..subroutines.time_format_change import timeunit2cday, yyyymmdd2cday, cday2mmdd
+from ..subroutines.read_netcdf import read_E3SM
+from netCDF4 import Dataset
+
+def find_nearest(xall, yall, x, y):
+    distance = np.square(xall - x) + np.square(yall - y)
+    idx = distance.argmin()
+    return(idx)
+
+def run_prep(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    lat0 = settings['lat0']
+    lon0 = settings['lon0']
+    site = settings['site']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    Model_List = settings['Model_List']
+    E3SM_hourly_path = settings['E3SM_hourly_path']
+    E3SM_hourly_filehead = settings['E3SM_hourly_filehead']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+
+    #%% other settings
+        
+    if not os.path.exists(E3SM_sfc_path):
+        os.makedirs(E3SM_sfc_path)
+    
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date, 'noleap')
+    cday2 = yyyymmdd2cday(end_date, 'noleap')
+    if start_date[0:4] != end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    year0 = start_date[0:4]
+        
+    if site == 'SGP':
+        E3SMdomain_range = '260e_to_265e_34n_to_39n'    # domain range in E3SM regional output
+    elif site == 'ENA':
+        E3SMdomain_range = '330e_to_335e_37n_to_42n'   
+    else:
+        raise ValueError('data for this site is not specified: ' + site)
+        
+        
+    #%% set variables
+    for mm in range(len(Model_List)):
+        model = Model_List[mm]
+        variable1d_names = ['PS', 'PBLH', 'FLNT', 'FSNT', 'FLNS', 'FSNS', "LHFLX", "SHFLX",
+                            'TREFHT','PRECT','PRECL', "TGCLDLWP", "TGCLDIWP"]
+        variable2d_names = ['T', 'U', 'V', 'Q', 'RELHUM', 'RHW', 'RHI', 'CLOUD',
+                            'CLDLIQ', 'CLDICE', 'NUMLIQ', 'NUMICE', 'NUMRAI', 'NUMSNO', 'RAINQM', 'SNOWQM',
+                            'CCN1', 'CCN3', 'CCN5', "AREI", "AREL", "ICLDIWP", "ICLDTWP",
+                            'bc_a1', 'bc_a3', 'bc_a4', 'dst_a1', 'dst_a3', 'mom_a1', 'mom_a2', 'mom_a3', 'mom_a4',
+                            'ncl_a1', 'ncl_a2', 'ncl_a3', 'pom_a1', 'pom_a3', 'pom_a4', 'so4_a1', 'so4_a2', 'so4_a3',
+                            'soa_a1', 'soa_a2', 'soa_a3', 'num_a1', 'num_a2', 'num_a3', 'num_a4',
+                            'num_c1', 'num_c2', 'num_c3', 'num_c4', "dgnd_a01", "dgnd_a02", "dgnd_a03", "dgnd_a04",
+                            "dgnw_a01", "dgnw_a02", "dgnw_a03", "dgnw_a04", 'EXTINCT', 'ABSORB']
+        if model == 'NucSoaCond': # with so4 and soa in nucleation mode
+            variable2d_names = variable2d_names + ['so4_a5', 'soa_a5', 'num_a5', 'num_c5', "dgnd_a05", "dgnw_a05"]
+        elif model == 'Nuc':      # only with so4 in nucleation mode
+            variable2d_names = variable2d_names + ['so4_a5','num_a5','num_c5', "dgnd_a05", "dgnw_a05"]
+        var1dlen = len(variable1d_names)
+        var2dlen = len(variable2d_names)
+        varlen = var1dlen + var2dlen
+        variable_names = variable1d_names + variable2d_names
+        
+        #%%  process data for each day
+        for cday in range(cday1, cday2 + 1):
+            mmdd = cday2mmdd(cday)
+            date = year0 + '-' + mmdd[0:2] + '-' + mmdd[2:4]
+            
+            filename_input = E3SM_hourly_path[mm] + E3SM_hourly_filehead[mm] + '.cam.h3.' + date + '-00000.nc'
+            
+            # read in E3SM data
+            variables = list()
+            var_units = list()
+            var_longnames = list()
+            
+            (timem, lonm, timeunitm, lonmunit, lonmname) = read_E3SM(filename_input, 'lon_' + E3SMdomain_range)
+            (timem, latm, timeunitm, latmunit, latmname) = read_E3SM(filename_input, 'lat_' + E3SMdomain_range)
+            x_idx = find_nearest(lonm, latm, lon0, lat0)
+            
+            
+            (timem, var1d, timeunitm, var1dunit, var1dlongname) = \
+                         read_E3SM(filename_input, [a + '_' + E3SMdomain_range for a in variable1d_names])
+            (timem, var2d, timeunitm, var2dunit, var2dlongname) = \
+                         read_E3SM(filename_input, [a + '_' + E3SMdomain_range for a in variable2d_names])
+            for vv in range(var1dlen):
+                variables.append(var1d[vv][:, x_idx])
+            for vv in range(var2dlen):
+                variables.append(var2d[vv][:, -1, x_idx])   # choose the lowest level
+            var_units = var1dunit + var2dunit
+            var_longnames = var1dlongname + var2dlongname
+        
+            cdaym = timeunit2cday(timeunitm, 'noleap')
+            yearm = timeunitm.split(' ')[2][0:4]
+            time = timem.data - 365*(int(year0) - int(yearm))  +  cdaym
+            
+            
+            # %% output extacted file
+            outputname = 'SFC_vars_' + campaign + '_' + model + '_' + date + '.nc'
+            print('output to this file: ' + E3SM_sfc_path + outputname)
+            
+            # define filename
+            f = Dataset(E3SM_sfc_path + outputname, 'w', format = 'NETCDF4')
+            
+            # define dimensions
+            t = f.createDimension('time', None)  # unlimited
+            
+            # create variable list
+            time_o = f.createVariable("time", "f8", ("time", ))
+            lat_o = f.createVariable("lat", "f8", ())
+            lon_o = f.createVariable("lon", "f8", ())
+            var_o = list()
+            for vv in range(varlen):
+                var_o.append(f.createVariable(variable_names[vv], 'f8', ("time", )))
+            
+            # write data
+            time_o[:] = time
+            lat_o[:] = latm[x_idx]
+            lon_o[:] = lonm[x_idx]
+            for vv in range(varlen):
+                var_o[vv][:] = np.array(variables[vv])
+            
+            # attributes
+            time_o.units = "days since " + str(int(year0) - 1) + "-12-31 00:00:00 UTC"
+            lat_o.units = "latitude"
+            lon_o.units = "longitude"
+            for vv in range(varlen):
+                var_o[vv].units = var_units[vv]
+                var_o[vv].long_name = var_longnames[vv]
+            
+            # global attributes
+            import time as ttt
+            f.description = model + " extact surface variables for " + campaign
+            f.modeldata = filename_input
+            f.create_time = ttt.ctime(ttt.time())
+            
+            f.close()
+            
diff --git a/src/esmac_diags/preprocessing/prep_E3SM_sfc_bins.py b/src/esmac_diags/preprocessing/prep_E3SM_sfc_bins.py
new file mode 100644
index 0000000..b2c78bb
--- /dev/null
+++ b/src/esmac_diags/preprocessing/prep_E3SM_sfc_bins.py
@@ -0,0 +1,159 @@
+"""
+# prepare E3SM surface aerosol size distribution at ARM sites
+# input data is E3SM regional output
+# output is surface aerosol distribution at the nearest grid
+"""
+
+import os
+import numpy as np
+from ..subroutines.time_format_change import timeunit2cday, yyyymmdd2cday, cday2mmdd
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.CN_mode_to_size import calc_CNsize_cutoff_0_3000nm
+from netCDF4 import Dataset
+
+def find_nearest(xall, yall, x, y):
+    distance = np.square(xall-x) + np.square(yall-y)
+    idx = distance.argmin()
+    return(idx)
+
+def run_prep(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    lat0 = settings['lat0']
+    lon0 = settings['lon0']
+    site = settings['site']
+    start_date = settings['start_date']
+    end_date = settings['end_date']
+    Model_List = settings['Model_List']
+    E3SM_hourly_path = settings['E3SM_hourly_path']
+    E3SM_hourly_filehead = settings['E3SM_hourly_filehead']
+    E3SM_sfc_path = settings['E3SM_sfc_path']
+
+    #%% other settings
+    
+    if not os.path.exists(E3SM_sfc_path):
+        os.makedirs(E3SM_sfc_path)
+    
+    # change start date into calendar day
+    cday1 = yyyymmdd2cday(start_date, 'noleap')
+    cday2 = yyyymmdd2cday(end_date, 'noleap')
+    if start_date[0:4] != end_date[0:4]:
+        raise ValueError('currently not support multiple years. please set start_date and end_date in the same year')
+    
+    year0 = start_date[0:4]
+        
+    if site == 'SGP':
+        E3SMdomain_range = '260e_to_265e_34n_to_39n'    # domain range in E3SM regional output
+    elif site == 'ENA':
+        E3SMdomain_range = '330e_to_335e_37n_to_42n'   
+    else:
+        raise ValueError('data for this site is not specified: ' + site)
+        
+    #%% process data for each day
+    for mm in range(len(Model_List)):
+        model = Model_List[mm]
+        
+        for cday in range(cday1, cday2 + 1):
+            mmdd = cday2mmdd(cday)
+            date = year0 + '-' + mmdd[0:2] + '-' + mmdd[2:4]
+            
+            filename_input = E3SM_hourly_path[mm] + E3SM_hourly_filehead[mm] + '.cam.h3.' + date + '-00000.nc'
+            
+            (timem, lonm, timeunitm, lonmunit, lonmname) = read_E3SM(filename_input, 'lon_' + E3SMdomain_range)
+            (timem, latm, timeunitm, latmunit, latmname) = read_E3SM(filename_input, 'lat_' + E3SMdomain_range)
+            x_idx = find_nearest(lonm, latm, lon0, lat0)
+            
+            cdaym = timeunit2cday(timeunitm, 'noleap')
+            yearm = timeunitm.split(' ')[2][0:4]
+            time = timem.data - 365*(int(year0)-int(yearm)) + cdaym
+                
+            # do not use read_E3SM because hyam and hybm don't have units
+            f = Dataset(filename_input, 'r')
+            P0 = f.variables['P0'][:]
+            hyam = f.variables['hyam'][:]
+            hybm = f.variables['hybm'][:]
+            T = f.variables['T_' + E3SMdomain_range][:]
+            PS = f.variables['PS_' + E3SMdomain_range][:]
+            num_a1 = f.variables['num_a1_' + E3SMdomain_range][:]
+            num_a2 = f.variables['num_a2_' + E3SMdomain_range][:]
+            num_a3 = f.variables['num_a3_' + E3SMdomain_range][:]
+            num_a4 = f.variables['num_a4_' + E3SMdomain_range][:]
+            dn1 = f.variables['dgnd_a01_' + E3SMdomain_range][:]
+            dn2 = f.variables['dgnd_a02_' + E3SMdomain_range][:]
+            dn3 = f.variables['dgnd_a03_' + E3SMdomain_range][:]
+            dn4 = f.variables['dgnd_a04_' + E3SMdomain_range][:]
+            if model[0:3] == 'Nuc':  # with nucleation mode
+                num_a5 = f.variables['num_a5_' + E3SMdomain_range][:]
+                dn5 = f.variables['dgnd_a05_' + E3SMdomain_range][:]
+            f.close()
+        
+            Pres = np.nan*T
+            zlen = T.shape[1]
+            for kk in range(zlen):
+                Pres[:, kk, :] = hyam[kk]*P0  +  hybm[kk]*PS
+        
+            numall = [num_a1[:, -1, x_idx], num_a2[:, -1, x_idx], num_a3[:, -1, x_idx], num_a4[:, -1, x_idx]]
+            dnall = [dn1[:, -1, x_idx], dn2[:, -1, x_idx], dn3[:, -1, x_idx], dn4[:, -1, x_idx]]
+            if model[0:3] == 'Nuc':  # with nucleation mode
+                numall.append(num_a5[:, -1, x_idx])
+                dnall.append(dn5[:, -1, x_idx])
+               
+            
+            NCNall = calc_CNsize_cutoff_0_3000nm(dnall, numall, T[:, -1, x_idx], Pres[:, -1, x_idx])
+    
+            # calculate total CN concentration for CPC (>10nm) and CPCU (>3nm)
+            NUCN = np.nansum(NCNall[3:, :], 0)    # >3nm
+            NCN = np.nansum(NCNall[10:, :], 0)    # >10nm
+            
+            
+        
+            #%% output extacted file
+            outputname = 'SFC_CNsize_' + campaign + '_' + model + '_' + date + '.nc'
+            print('output to this file: ' + E3SM_sfc_path + outputname)
+            
+            # define filename
+            f = Dataset(E3SM_sfc_path + outputname, 'w', format='NETCDF4')
+            
+            # define dimensions
+            t = f.createDimension('time', None)  # unlimited
+            s = f.createDimension('size', 3000)  # unlimited
+            
+            # create variable list
+            time_o = f.createVariable("time", "f8", ("time", ))
+            size_o = f.createVariable("size", "f8", ("size", ))
+            lat_o = f.createVariable("lat", "f8", ())
+            lon_o = f.createVariable("lon", "f8", ())
+            
+            data_o = f.createVariable('NCNall', 'f8', ("size", "time"))
+            ncn_o = f.createVariable("NCN", "f8", ("time", ))
+            nucn_o = f.createVariable("NUCN", "f8", ("time", ))
+            
+            # write data
+            time_o[:] = time
+            lat_o[:] = latm[x_idx]
+            lon_o[:] = lonm[x_idx]
+            size_o[:] = np.arange(1, 3001)
+            data_o[:, :] = NCNall
+            ncn_o[:] = NCN
+            nucn_o[:] = NUCN
+            
+            # attributes
+            time_o.units = "days since " + str(int(year0)-1) + "-12-31 00:00:00 UTC"
+            lat_o.units = "latitude"
+            lon_o.units = "longitude"
+            size_o.units = 'nm'
+            size_o.long_name = "0 to 3000nm with 1nm increment"
+            data_o.units = '#/m3'
+            data_o.long_name = 'aerosol size distribution'
+            ncn_o.units = '#/m3'
+            ncn_o.long_name = 'aerosol number concentration for size >10nm'
+            nucn_o.units = '#/m3'
+            nucn_o.long_name = 'aerosol number concentration for size >3nm'
+            
+            # global attributes
+            import time as ttt
+            f.description = model + " extact surface aerosol size distribution for " + campaign
+            f.modeldata = filename_input
+            f.create_time = ttt.ctime(ttt.time())
+            
+            f.close()
diff --git a/src/esmac_diags/preprocessing/prep_E3SM_shiptrack_allvars.py b/src/esmac_diags/preprocessing/prep_E3SM_shiptrack_allvars.py
new file mode 100644
index 0000000..d80af6a
--- /dev/null
+++ b/src/esmac_diags/preprocessing/prep_E3SM_shiptrack_allvars.py
@@ -0,0 +1,242 @@
+"""
+# prepare E3SM surface variables at ARM ship-based field campaigns
+# input data is E3SM regional output
+# output is surface variables at the nearest grid of the ship track
+"""
+
+import os
+import glob
+import numpy as np
+from ..subroutines.time_format_change import timeunit2cday, yyyymmdd2cday, cday2mmdd
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.read_ship import read_marmet
+from ..subroutines.read_ARMdata import read_met
+from ..subroutines.netCDF4 import Dataset
+
+def find_nearest(xall, yall, x, y):
+    distance = np.square(xall-x) + np.square(yall-y)
+    idx = distance.argmin()
+    return(idx)
+
+def run_prep(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    shipmetpath = settings['shipmetpath']
+    Model_List = settings['Model_List']
+    E3SM_hourly_path = settings['E3SM_hourly_path']
+    E3SM_hourly_filehead = settings['E3SM_hourly_filehead']
+    E3SM_ship_path = settings['E3SM_ship_path']
+
+    #%% other settings
+    
+    if not os.path.exists(E3SM_ship_path):
+        os.makedirs(E3SM_ship_path)
+    
+    #%% get all ship data
+    if campaign=='MAGIC':
+        lst = glob.glob(shipmetpath+'marmet*.txt')
+        E3SMdomain_range='202e_to_243e_20n_to_35n'    # domain range in E3SM regional output
+    elif campaign=='MARCUS':
+        lst = [1, 2, 3, 4]     # there are 4 ship trips (legs) for MARCUS
+        E3SMdomain_range='60e_to_160e_42s_to_70s'   
+    else:
+        raise ValueError('data for this field campaign is not specified: ' + campaign)
+    
+    lst.sort()
+    print('total number of ship leg files:'+str(len(lst)))
+    
+    
+    for filename in lst:
+    
+        #%% read in ship data
+        
+        if campaign=='MAGIC':
+            # for each ship leg
+            legnum=filename[-6:-4]
+            
+            (shipdata, shipvarlist) = read_marmet(filename)
+            year=[a[1] for a in shipdata]
+            month=[a[2] for a in shipdata]
+            day=[a[3] for a in shipdata]
+            hh=[int(a[4]) for a in shipdata]
+            mm=[int(a[5]) for a in shipdata]
+            ss=[int(a[6]) for a in shipdata]
+            lat=np.array([float(a[7]) for a in shipdata])
+            lon=np.array([float(a[8]) for a in shipdata])
+            
+            # ymd = [year[i]+'-'+month[i]+'-'+day[i] for i in range(len(year))]   # yyyy-mm-dd
+            yyyymmdd = [year[i]+month[i]+day[i] for i in range(len(year))]   # yyyymmdd
+            ymd=list(set(yyyymmdd))  # unique date
+            ymd.sort()
+            
+            
+            time = np.array(hh)/24. + np.array(mm)/1440. + np.array(ss)/86400. 
+            for i in range(len(time)):
+                cday0 = yyyymmdd2cday(yyyymmdd[i], 'noleap') 
+                if year[i]==year[0]:
+                    time[i]=time[i]+cday0
+                else:
+                    time[i]=time[i]+cday0+365  # next year
+            
+        elif campaign=='MARCUS':
+            legnum=str(filename)
+            if legnum=='1':
+                startdate='2017-10-30'
+                enddate='2017-12-02'
+            elif legnum=='2':
+                startdate='2017-12-13'
+                enddate='2018-01-11'
+            elif legnum=='3':
+                startdate='2018-01-16'
+                enddate='2018-03-04'
+            elif legnum=='4':
+                startdate='2018-03-09'
+                enddate='2018-03-22'
+                
+            cday1=yyyymmdd2cday(startdate, 'noleap')
+            cday2=yyyymmdd2cday(enddate, 'noleap')
+            if startdate[0:4]!=enddate[0:4]:
+                cday2=cday2+365  # cover two years
+    
+            time=np.empty(0)
+            lon=np.empty(0)
+            lat=np.empty(0)
+            ymd=[]
+            for cc in range(cday1, cday2+1):
+                if cc<=365:
+                    yyyymmdd=startdate[0:4]+cday2mmdd(cc)
+                else:
+                    yyyymmdd=enddate[0:4]+cday2mmdd(cc-365)
+                    
+                lst0 = glob.glob(shipmetpath+'maraadmetX1.b1.'+yyyymmdd+'*')
+                (time0, lon0, timeunit, lonunit, lon_long_name)=read_met(lst0[0], 'lon')
+                (time0, lat0, timeunit, lonunit, lon_long_name)=read_met(lst0[0], 'lat')
+                ymd0 = timeunit.split()[2]
+                ymd.append(ymd0[0:4]+ymd0[5:7]+ymd0[8:10])
+                
+                time = np.hstack((time, time0/86400. + cc))
+                lat = np.hstack((lat, lat0))
+                lon = np.hstack((lon, lon0))
+        
+        print('date for shipleg '+legnum+': '+ymd[0]+'-'+ymd[-1])
+        
+        #%% set variables to be read
+        for mm in range(len(Model_List)):
+            model=Model_List[mm]
+            variable1d_names = ['PS', 'PBLH', 'FLNT', 'FSNT', 'FLNS', 'FSNS', "LHFLX", "SHFLX",
+                                 'TREFHT', 'PRECT', 'PRECL', "TGCLDLWP", "TGCLDIWP"]
+            variable2d_names = ['T', 'U', 'V', 'Q', 'RELHUM', 'RHW', 'RHI', 'CLOUD',
+                            'CLDLIQ', 'CLDICE', 'NUMLIQ', 'NUMICE', 'NUMRAI', 'NUMSNO', 'RAINQM', 'SNOWQM', 
+                             'CCN1', 'CCN3', 'CCN5', "AREI", "AREL", "ICLDIWP", "ICLDTWP",
+                             'bc_a1', 'bc_a3', 'bc_a4', 'dst_a1', 'dst_a3', 'mom_a1', 'mom_a2', 'mom_a3', 'mom_a4',
+                             'ncl_a1', 'ncl_a2', 'ncl_a3', 'pom_a1', 'pom_a3', 'pom_a4', 'so4_a1', 'so4_a2', 'so4_a3',
+                             'soa_a1', 'soa_a2', 'soa_a3', 'num_a1', 'num_a2', 'num_a3', 'num_a4',
+                             'num_c1', 'num_c2', 'num_c3', 'num_c4', "dgnd_a01", "dgnd_a02", "dgnd_a03", "dgnd_a04", 
+                             "dgnw_a01", "dgnw_a02", "dgnw_a03", "dgnw_a04", 'EXTINCT', 'ABSORB']
+            if model=='NucSoaCond': # with so4 and soa in nucleation mode
+                variable2d_names=variable2d_names+['so4_a5', 'soa_a5', 'num_a5', 'num_c5', "dgnd_a05", "dgnw_a05"]
+            elif model=='Nuc':      # only with so4 in nucleation mode
+                variable2d_names=variable2d_names+['so4_a5', 'num_a5', 'num_c5', "dgnd_a05", "dgnw_a05"]
+            var1dlen = len(variable1d_names)
+            var2dlen = len(variable2d_names)
+            variable_names = variable1d_names+variable2d_names
+            varlen = var1dlen+var2dlen     
+        
+            #%% read in E3SM data
+            variables = list()
+            var_units = list()
+            var_longnames = list()
+            
+            # read all days in the ship leg
+            for dd in range(len(ymd)):
+                ymd2 = ymd[dd][0:4]+'-'+ymd[dd][4:6]+'-'+ymd[dd][6:8]
+                print('read this date: '+ymd2)
+                filename_input = E3SM_hourly_path[mm]+E3SM_hourly_filehead[mm]+'.cam.h3.'+ymd2+'-00000.nc'
+                
+                (timem, lonm, timeunitm, lonmunit, lonmname)=read_E3SM(filename_input, 'lon_'+E3SMdomain_range)
+                (timem, latm, timeunitm, latmunit, latmname)=read_E3SM(filename_input, 'lat_'+E3SMdomain_range)
+                # (timem, psm, timeunitm, psmunit, psmname)=read_E3SM(filename_input, 'PS_'+E3SMdomain_range)
+                
+                cdaym = timeunit2cday(timeunitm, 'noleap')
+                yearm = timeunitm.split(' ')[2][0:4]
+                timem2 = timem.data-365*(int(ymd[0][0:4])-int(yearm)) + cdaym
+                
+                # ship measurement times during the model day
+                timeo = time[np.logical_and(time>=timem2[0], time<timem2[0]+1)]
+                lono = lon[np.logical_and(time>=timem2[0], time<timem2[0]+1)]
+                lato = lat[np.logical_and(time>=timem2[0], time<timem2[0]+1)]
+                
+                (timem, var1d, timeunitm, var1dunit, var1dlongname) = \
+                    read_E3SM(filename_input, [a+'_'+E3SMdomain_range for a in variable1d_names])
+                (timem, var2d, timeunitm, var2dunit, var2dlongname) = \
+                    read_E3SM(filename_input, [a+'_'+E3SMdomain_range for a in variable2d_names])
+                
+                # allocation variables and attributes
+                if dd==0:
+                    for vv in range(varlen):
+                        variables.append([])
+                    var_units=var1dunit+var2dunit
+                    var_longnames=var1dlongname+var2dlongname
+                    
+                for tt in range(len(timeo)):
+                    t_idx = np.abs(timem2-timeo[tt]).argmin()
+                    if lono[tt]<-900. or lato[tt]<-900:
+                        for vv in range(varlen):
+                            variables[vv].append(-9999.)
+                    else:
+                        x_idx = find_nearest(lonm, latm, lono[tt], lato[tt])
+                        for vv in range(var1dlen):
+                            variables[vv].append(var1d[vv][t_idx, x_idx])
+                        for vv in range(var2dlen):
+                            variables[var1dlen+vv].append(var2d[vv][t_idx, -1, x_idx])  # choose the lowest level
+    
+            
+            # %% output extacted file
+            outputname = 'Ship_vars_'+campaign+'_'+model+'_shipleg'+legnum+'.nc'
+            print('output to this file: '+E3SM_ship_path+outputname)
+            
+            # define filename
+            f = Dataset(E3SM_ship_path+outputname, 'w', format='NETCDF4')
+            
+            # define dimensions
+            t = f.createDimension('time', None)  # unlimited
+            
+            # create variable list
+            time_o = f.createVariable("time", "f8", ("time", ))
+            lat_o = f.createVariable("lat", "f8", ("time", ))
+            lon_o = f.createVariable("lon", "f8", ("time", ))
+            var_o=list()
+            for vv in range(varlen):
+                var_o.append (f.createVariable(variable_names[vv], 'f8', ("time", )))
+            
+            # write data
+            time_o[:] = time
+            lat[lat<-900]=-9999.
+            lon[lon<-900]=-9999.
+            lat_o[:] = lat
+            lon_o[:] = lon
+            for vv in range(varlen):
+                var_o[vv][:] = np.array(variables[vv])
+            
+            # attributes
+            time_o.units = "days since "+str(int(ymd[0][0:4])-1)+"-12-31 00:00:00 UTC"
+            lat_o.units = "degree north"
+            lon_o.units = "degree east"
+            time_o.long_name = "Calendar Day"
+            lat_o.long_name = "latitude"
+            lon_o.long_name = "longitude"
+            for vv in range(varlen):
+                var_o[vv].units = var_units[vv]
+                var_o[vv].long_name = var_longnames[vv]
+                var_o[vv].missing_value = -9999.
+            
+            # global attributes
+            import time as ttt
+            f.description = model+" extact variables along ship tracks for "+campaign
+            f.shiptrackdata = filename
+            f.modeldata = E3SM_hourly_path[mm]+E3SM_hourly_filehead[mm]+'.cam.h3.*.nc'
+            f.datanotes = 'variables are set as missing if GPS location is missing'
+            f.create_time = ttt.ctime(ttt.time())
+            
+            f.close()
+            
\ No newline at end of file
diff --git a/src/esmac_diags/preprocessing/prep_E3SM_shiptrack_bins.py b/src/esmac_diags/preprocessing/prep_E3SM_shiptrack_bins.py
new file mode 100644
index 0000000..7dbcef5
--- /dev/null
+++ b/src/esmac_diags/preprocessing/prep_E3SM_shiptrack_bins.py
@@ -0,0 +1,269 @@
+"""
+# prepare E3SM surface aerosol size distribution at ARM ship-based field campaigns
+# input data is E3SM regional output
+# output is surface variables at the nearest grid of the ship track
+"""
+
+import os
+import glob
+import numpy as np
+from ..subroutines.time_format_change import timeunit2cday, yyyymmdd2cday, cday2mmdd
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.read_ship import read_marmet
+from ..subroutines.read_ARMdata import read_met
+from ..subroutines.CN_mode_to_size import calc_CNsize_cutoff_0_3000nm
+from netCDF4 import Dataset
+
+def find_nearest(xall, yall, x, y):
+    distance = np.square(xall-x) + np.square(yall-y)
+    idx = distance.argmin()
+    return(idx)
+
+def run_prep(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    shipmetpath = settings['shipmetpath']
+    Model_List = settings['Model_List']
+    E3SM_hourly_path = settings['E3SM_hourly_path']
+    E3SM_hourly_filehead = settings['E3SM_hourly_filehead']
+    E3SM_ship_path = settings['E3SM_ship_path']
+
+    #%% other settings
+        
+    if not os.path.exists(E3SM_ship_path):
+        os.makedirs(E3SM_ship_path)
+        
+    
+    #%% get all ship data
+    if campaign == 'MAGIC':
+        lst = glob.glob(shipmetpath + 'marmet*.txt')
+        E3SMdomain_range = '202e_to_243e_20n_to_35n'    # domain range in E3SM regional output
+    elif campaign == 'MARCUS':
+        lst = [1, 2, 3, 4]     # there are 4 ship trips (legs) for MARCUS
+        E3SMdomain_range = '60e_to_160e_42s_to_70s'   
+    else:
+        raise ValueError('data for this field campaign is not specified: ' + campaign)
+    lst.sort()
+    print('total number of ship leg files:' + str(len(lst)))
+    
+       
+    for filename in lst:
+        
+        
+        #%% read in ship data
+        
+        if campaign == 'MAGIC':
+            # for each ship leg
+            legnum = filename[-6:-4]
+            (shipdata, shipvarlist) = read_marmet(filename)
+            year = [a[1] for a in shipdata]
+            month = [a[2] for a in shipdata]
+            day = [a[3] for a in shipdata]
+            hh = [int(a[4]) for a in shipdata]
+            mm = [int(a[5]) for a in shipdata]
+            ss = [int(a[6]) for a in shipdata]
+            lat = np.array([float(a[7]) for a in shipdata])
+            lon = np.array([float(a[8]) for a in shipdata])
+            
+            # ymd = [year[i] + '-' + month[i] + '-' + day[i] for i in range(len(year))]   # yyyy-mm-dd
+            yyyymmdd = [year[i] + month[i] + day[i] for i in range(len(year))]   # yyyymmdd
+            ymd = list(set(yyyymmdd))  # unique date
+            ymd.sort()
+            
+            
+            time = np.array(hh)/24. + np.array(mm)/1440. + np.array(ss)/86400. 
+            for i in range(len(time)):
+                cday0 = yyyymmdd2cday(yyyymmdd[i], 'noleap') 
+                if year[i] == year[0]:
+                    time[i] = time[i] + cday0
+                else:
+                    time[i] = time[i] + cday0 + 365  # next year
+    
+        elif campaign == 'MARCUS':
+            legnum = str(filename)
+            if legnum == '1':
+                startdate = '2017-10-30'
+                enddate = '2017-12-02'
+            elif legnum == '2':
+                startdate = '2017-12-13'
+                enddate = '2018-01-11'
+            elif legnum == '3':
+                startdate = '2018-01-16'
+                enddate = '2018-03-04'
+            elif legnum == '4':
+                startdate = '2018-03-09'
+                enddate = '2018-03-22'
+                
+            cday1 = yyyymmdd2cday(startdate, 'noleap')
+            cday2 = yyyymmdd2cday(enddate, 'noleap')
+            if startdate[0:4]!=enddate[0:4]:
+                cday2 = cday2 + 365  # cover two years
+    
+            time = np.empty(0)
+            lon = np.empty(0)
+            lat = np.empty(0)
+            ymd = []
+            for cc in range(cday1, cday2 + 1):
+                if cc <= 365:
+                    yyyymmdd = startdate[0:4] + cday2mmdd(cc)
+                else:
+                    yyyymmdd = enddate[0:4] + cday2mmdd(cc-365)
+                    
+                lst0 = glob.glob(shipmetpath + 'maraadmetX1.b1.' + yyyymmdd + '*')
+                (time0, lon0, timeunit, lonunit, lon_long_name) = read_met(lst0[0], 'lon')
+                (time0, lat0, timeunit, lonunit, lon_long_name) = read_met(lst0[0], 'lat')
+                ymd0 = timeunit.split()[2]
+                ymd.append(ymd0[0:4] + ymd0[5:7] + ymd0[8:10])
+                
+                time = np.hstack((time,  time0/86400. + cc))
+                lat = np.hstack((lat, lat0))
+                lon = np.hstack((lon, lon0))
+    
+        print('date for shipleg ' + legnum + ': ' + ymd[0] + '-' + ymd[-1])
+        
+        #%% read in E3SM data
+        for mm in range(len(Model_List)):
+            model = Model_List[mm]
+            
+            varname = ['T', 'Pres', 'num_a1', 'dgnd_a01', 'num_a2', 'dgnd_a02', 'num_a3', 'dgnd_a03', 'num_a4', 'dgnd_a04']
+            if model == 'Nuc':  # with nucleation mode
+                varname = varname + ['num_a5', 'dgnd_a05']
+            variables = list()
+            varlen = len(varname)
+            
+            # read all days in the ship leg
+            for dd in range(len(ymd)):
+                ymd2 = ymd[dd][0:4] + '-' + ymd[dd][4:6] + '-' + ymd[dd][6:8]
+                print('read this date: ' + ymd2)
+                filename_input = E3SM_hourly_path[mm] + E3SM_hourly_filehead[mm] + '.cam.h3.' + ymd2 + '-00000.nc'
+                
+                (timem, lonm, timeunitm, lonmunit, lonmname) = read_E3SM(filename_input, 'lon_' + E3SMdomain_range)
+                (timem, latm, timeunitm, latmunit, latmname) = read_E3SM(filename_input, 'lat_' + E3SMdomain_range)
+                # (timem, psm, timeunitm, psmunit, psmname) = read_E3SM(filename_input, 'PS_' + E3SMdomain_range)
+                
+                cdaym = timeunit2cday(timeunitm, 'noleap')
+                yearm = timeunitm.split(' ')[2][0:4]
+                timem2 = timem.data - 365*(int(ymd[0][0:4]) - int(yearm)) + cdaym
+                
+                # do not use read_E3SM because hyam and hybm don't have units
+                f = Dataset(filename_input, 'r')
+                P0 = f.variables['P0'][:]
+                hyam = f.variables['hyam'][:]
+                hybm = f.variables['hybm'][:]
+                T = f.variables['T_' + E3SMdomain_range][:]
+                PS = f.variables['PS_' + E3SMdomain_range][:]
+                num_a1 = f.variables['num_a1_' + E3SMdomain_range][:]
+                num_a2 = f.variables['num_a2_' + E3SMdomain_range][:]
+                num_a3 = f.variables['num_a3_' + E3SMdomain_range][:]
+                num_a4 = f.variables['num_a4_' + E3SMdomain_range][:]
+                dn1 = f.variables['dgnd_a01_' + E3SMdomain_range][:]
+                dn2 = f.variables['dgnd_a02_' + E3SMdomain_range][:]
+                dn3 = f.variables['dgnd_a03_' + E3SMdomain_range][:]
+                dn4 = f.variables['dgnd_a04_' + E3SMdomain_range][:]
+                if model == 'Nuc':  # with nucleation mode
+                    num_a5 = f.variables['num_a5_' + E3SMdomain_range][:]
+                    dn5 = f.variables['dgnd_a05_' + E3SMdomain_range][:]
+                f.close()
+            
+                Pres = np.nan*T
+                zlen = T.shape[1]
+                for kk in range(zlen):
+                    Pres[:, kk, :] = hyam[kk]*P0 + hybm[kk]*PS
+            
+                varall = [T, Pres, num_a1, dn1, num_a2, dn2, num_a3, dn3, num_a4, dn4]
+                if model == 'Nuc':  # with nucleation mode
+                    varall.append(num_a5)
+                    varall.append(dn5)
+                    
+                # ship measurement times during the model day
+                timeo = time[np.logical_and(time >= timem2[0], time < timem2[0] + 1)]
+                lono = lon[np.logical_and(time >= timem2[0], time < timem2[0] + 1)]
+                lato = lat[np.logical_and(time >= timem2[0], time < timem2[0] + 1)]
+              
+                    
+                # allocation variables and attributes
+                if dd == 0:
+                    for vv in range(varlen):
+                        variables.append([])
+                        
+                # extract the data at the time and location of ship
+                for tt in range(len(timeo)):
+                    t_idx = np.abs(timem2-timeo[tt]).argmin()
+                    if lono[tt]<-900. or lato[tt]<-900:
+                        for vv in range(varlen):
+                            variables[vv].append(np.nan)
+                    else:
+                        x_idx = find_nearest(lonm, latm, lono[tt], lato[tt])
+                        for vv in range(varlen):
+                            variables[vv].append(varall[vv][t_idx, -1, x_idx])  # choose the lowest level
+    
+            numall = [np.array(a) for a in variables[2::2]]
+            dnall = [np.array(a) for a in variables[3::2]]
+            
+            NCNall = calc_CNsize_cutoff_0_3000nm(dnall, numall, np.array(variables[0]), np.array(variables[1]))
+    
+            # calculate total CN concentration for CPC (>10nm) and CPCU (>3nm)
+            NUCN = np.nansum(NCNall[3:, :], 0)    # >3nm
+            NCN = np.nansum(NCNall[10:, :], 0)    # >10nm
+            
+            
+            # %% output extacted file
+            outputname = 'Ship_CNsize_' + campaign + '_' + model + '_shipleg' + legnum + '.nc'
+            print('output to this file: ' + E3SM_ship_path + outputname)
+            
+            # define filename
+            f = Dataset(E3SM_ship_path + outputname, 'w', format='NETCDF4')
+            
+            # define dimensions
+            t = f.createDimension('time', None)  # unlimited
+            size = f.createDimension('size', 3000)
+            
+            # create variable list
+            time_o = f.createVariable("time", "f8", ("time", ))
+            size_o = f.createVariable("size", 'i8', ("size"))
+            lat_o = f.createVariable("lat", "f8", ("time", ))
+            lon_o = f.createVariable("lon", "f8", ("time", ))
+            
+            data_o = f.createVariable('NCNall', 'f8', ("size", "time"))
+            ncn_o = f.createVariable("NCN", "f8", ("time", ))
+            nucn_o = f.createVariable("NUCN", "f8", ("time", ))
+            
+            # write data
+            time_o[:] = time
+            size_o[:] = np.arange(1, 3001)
+            lat[lat<-900] = -9999.
+            lon[lon<-900] = -9999.
+            lat_o[:] = lat
+            lon_o[:] = lon
+            NCNall[np.isnan(NCNall)] = -9999.
+            NCN[np.isnan(NCN)] = -9999.
+            NUCN[np.isnan(NUCN)] = -9999.
+            data_o[:, :] = NCNall
+            ncn_o[:] = NCN
+            nucn_o[:] = NUCN
+            
+            # attributes
+            time_o.units = "days since " + str(int(ymd[0][0:4])-1) + "-12-31 00:00:00 UTC"
+            lat_o.units = "degree north"
+            lon_o.units = "degree east"
+            time_o.long_name = "Calendar Day"
+            lat_o.long_name = "latitude"
+            lon_o.long_name = "longitude"
+            size_o.units = 'nm'
+            size_o.long_name = "0 to 3000nm with 1nm increment"
+            data_o.units = '#/m3'
+            data_o.long_name = 'aerosol size distribution'
+            ncn_o.units = '#/m3'
+            ncn_o.long_name = 'aerosol number concentration for size >10nm'
+            nucn_o.units = '#/m3'
+            nucn_o.long_name = 'aerosol number concentration for size >3nm'
+            
+            # global attributes
+            import time as ttt
+            f.description = model + " calculated aerosol size distribution along ship tracks for " + campaign
+            f.shiptrackdata = filename
+            f.modeldata = E3SM_hourly_path[mm] + E3SM_hourly_filehead[mm] + '.cam.h3.*.nc'
+            f.datanotes = 'variables are set as missing if GPS location is missing'
+            f.create_time = ttt.ctime(ttt.time())
+            
+            f.close()
\ No newline at end of file
diff --git a/src/esmac_diags/preprocessing/prep_E3SM_shiptrack_profiles.py b/src/esmac_diags/preprocessing/prep_E3SM_shiptrack_profiles.py
new file mode 100644
index 0000000..3733257
--- /dev/null
+++ b/src/esmac_diags/preprocessing/prep_E3SM_shiptrack_profiles.py
@@ -0,0 +1,237 @@
+"""
+# prepare E3SM vertical profiles at ARM ship-based field campaigns
+# input data is E3SM regional output
+# output is vertical profiles at the nearest grid of the ship track
+"""
+
+import os
+import glob
+import numpy as np
+from ..subroutines.time_format_change import timeunit2cday, yyyymmdd2cday, cday2mmdd
+from ..subroutines.read_netcdf import read_E3SM
+from ..subroutines.read_ship import read_marmet
+from ..subroutines.read_ARMdata import read_met
+from netCDF4 import Dataset
+
+def find_nearest(xall, yall, x, y):
+    distance = np.square(xall-x) + np.square(yall-y)
+    idx = distance.argmin()
+    return(idx)
+
+def run_prep(settings):
+    #%% variables from settings
+    campaign = settings['campaign']
+    shipmetpath = settings['shipmetpath']
+    Model_List = settings['Model_List']
+    E3SM_hourly_path = settings['E3SM_hourly_path']
+    E3SM_hourly_filehead = settings['E3SM_hourly_filehead']
+    E3SM_ship_path = settings['E3SM_ship_path']
+
+    #%% other settings
+        
+    if not os.path.exists(E3SM_ship_path):
+        os.makedirs(E3SM_ship_path)
+    
+    # output height above ground. data will be interpolated into z_f
+    z_f = np.hstack((np.arange(0,500,50), np.arange(500,2000,100), np.arange(2000,5000,300),
+                     np.arange(5000,10000,500), np.arange(10000,20001,1000)))
+    zlen = len(z_f)    
+        
+    #%% set variables to be read
+    variable_names = ['T', 'U', 'V', 'Q', 'RELHUM', 'RHW', 'RHI', 'CLOUD', 'LWC', 'IWC',
+                    'CLDLIQ', 'CLDICE', 'NUMLIQ', "AREI", "AREL", "ICLDIWP", "ICLDTWP"]
+    varlen = len(variable_names)
+    
+    #%% get all ship data
+    if campaign == 'MAGIC':
+        lst = glob.glob(shipmetpath + 'marmet*.txt')
+        E3SMdomain_range = '202e_to_243e_20n_to_35n'    # domain range in E3SM regional output
+    elif campaign == 'MARCUS':
+        lst = [1, 2, 3, 4]     # there are 4 ship trips (legs) for MARCUS
+        E3SMdomain_range = '60e_to_160e_42s_to_70s'   
+    else:
+        raise ValueError('data for this field campaign is not specified: ' + campaign)
+    lst.sort()
+    print('total number of ship leg files:' + str(len(lst)))
+    
+       
+    for filename in lst:
+        
+    
+        #%% read in ship data
+        
+        if campaign == 'MAGIC':
+            # for each ship leg
+            legnum = filename[-6:-4]
+        
+            (shipdata, shipvarlist) = read_marmet(filename)
+            year = [a[1] for a in shipdata]
+            month = [a[2] for a in shipdata]
+            day = [a[3] for a in shipdata]
+            hh = [int(a[4]) for a in shipdata]
+            mm = [int(a[5]) for a in shipdata]
+            ss = [int(a[6]) for a in shipdata]
+            lat = np.array([float(a[7]) for a in shipdata])
+            lon = np.array([float(a[8]) for a in shipdata])
+            
+            # ymd = [year[i] + '-' + month[i] + '-' + day[i] for i in range(len(year))]   # yyyy-mm-dd
+            yyyymmdd = [year[i] + month[i] + day[i] for i in range(len(year))]   # yyyymmdd
+            ymd = list(set(yyyymmdd))  # unique date
+            ymd.sort()
+            
+            
+            time = np.array(hh)/24. + np.array(mm)/1440. + np.array(ss)/86400. 
+            for i in range(len(time)):
+                cday0 = yyyymmdd2cday(yyyymmdd[i], 'noleap') 
+                if year[i] == year[0]:
+                    time[i] = time[i] + cday0
+                else:
+                    time[i] = time[i] + cday0 + 365  # next year
+        
+        elif campaign == 'MARCUS':
+            legnum = str(filename)
+            if legnum == '1':
+                startdate = '2017-10-30'
+                enddate = '2017-12-02'
+            elif legnum == '2':
+                startdate = '2017-12-13'
+                enddate = '2018-01-11'
+            elif legnum == '3':
+                startdate = '2018-01-16'
+                enddate = '2018-03-04'
+            elif legnum == '4':
+                startdate = '2018-03-09'
+                enddate = '2018-03-22'
+                
+            cday1 = yyyymmdd2cday(startdate, 'noleap')
+            cday2 = yyyymmdd2cday(enddate, 'noleap')
+            if startdate[0:4] != enddate[0:4]:
+                cday2 = cday2 + 365  # cover two years
+    
+            time = np.empty(0)
+            lon = np.empty(0)
+            lat = np.empty(0)
+            ymd = []
+            for cc in range(cday1, cday2 + 1):
+                if cc <= 365:
+                    yyyymmdd = startdate[0:4] + cday2mmdd(cc)
+                else:
+                    yyyymmdd = enddate[0:4] + cday2mmdd(cc-365)
+                    
+                lst0 = glob.glob(shipmetpath + 'maraadmetX1.b1.' + yyyymmdd + '*')
+                (time0, lon0, timeunit, lonunit, lon_long_name) = read_met(lst0[0], 'lon')
+                (time0, lat0, timeunit, lonunit, lon_long_name) = read_met(lst0[0], 'lat')
+                ymd0 = timeunit.split()[2]
+                ymd.append(ymd0[0:4] + ymd0[5:7] + ymd0[8:10])
+                
+                time = np.hstack((time, time0/86400. + cc))
+                lat = np.hstack((lat, lat0))
+                lon = np.hstack((lon, lon0))
+            
+        print('date for shipleg ' + legnum + ': ' + ymd[0] + '-' + ymd[-1])
+        
+        #%% read in E3SM data
+        for mm in range(len(Model_List)):
+            model = Model_List[mm]
+            variables = list()
+            var_units = list()
+            var_longnames = list()
+            
+            # read all days in the ship leg
+            for dd in range(len(ymd)):
+                ymd2 = ymd[dd][0:4] + '-' + ymd[dd][4:6] + '-' + ymd[dd][6:8]
+                print('read this date: ' + ymd2)
+                filename_input = E3SM_hourly_path[mm] + E3SM_hourly_filehead[mm] + '.cam.h3.' + ymd2 + '-00000.nc'
+                
+                (timem, lonm, timeunitm, lonmunit, lonmname) = read_E3SM(filename_input, 'lon_' + E3SMdomain_range)
+                (timem, latm, timeunitm, latmunit, latmname) = read_E3SM(filename_input, 'lat_' + E3SMdomain_range)
+                (timem, z3, timeunitm, zunit, zname) = read_E3SM(filename_input, 'Z3_' + E3SMdomain_range)
+                # (timem, psm, timeunitm, psmunit, psmname) = read_E3SM(filename_input, 'PS_' + E3SMdomain_range)
+                
+                cdaym = timeunit2cday(timeunitm, 'noleap')
+                yearm = timeunitm.split(' ')[2][0:4]
+                timem2 = timem.data-365*(int(ymd[0][0:4])-int(yearm)) + cdaym
+                
+                # ship measurement times during the model day
+                timeo = time[np.logical_and(time>=timem2[0], time<timem2[0] + 1)]
+                lono = lon[np.logical_and(time>=timem2[0], time<timem2[0] + 1)]
+                lato = lat[np.logical_and(time>=timem2[0], time<timem2[0] + 1)]
+                
+                (timem, var2d, timeunitm, var2dunit, var2dlongname) = \
+                         read_E3SM(filename_input, [a + '_' + E3SMdomain_range for a in variable_names])
+                
+                # allocation variables and attributes
+                if dd == 0:
+                    for vv in range(varlen):
+                        variables.append([])
+                    var_units = var2dunit
+                    var_longnames = var2dlongname
+                    
+                for tt in range(len(timeo)):
+                    t_idx = np.abs(timem2-timeo[tt]).argmin()
+                    if lono[tt]<-900. or lato[tt]<-900:
+                        for vv in range(varlen):
+                            variables[vv].append(np.full((zlen), -9999.))
+                    else:
+                        x_idx = find_nearest(lonm, latm, lono[tt], lato[tt])
+                        zm = z3[t_idx, :, x_idx]
+                        for vv in range(varlen):
+                            var_tmp = var2d[vv][t_idx, :, x_idx]
+                            # interpolate height above sea level to height above ground
+                            var2 = np.interp(z_f, np.flip(zm[:]-zm[-1]), np.flip(var_tmp))
+                            variables[vv].append(var2)
+                            
+            # %% output extacted file
+            outputname = 'Ship_profiles_' + campaign + '_' + model + '_shipleg' + legnum + '.nc'
+            print('output to this file: ' + E3SM_ship_path + outputname)
+            
+            # define filename
+            f = Dataset(E3SM_ship_path + outputname, 'w', format = 'NETCDF4')
+            
+            # define dimensions
+            t = f.createDimension('time', None)  # unlimited
+            z = f.createDimension('height', zlen)
+            
+            # create variable list
+            time_o = f.createVariable("time", "f8", ("time", ))
+            height_o = f.createVariable("height", "f8", ("height", ))
+            lat_o = f.createVariable("lat", "f8", ("time", ))
+            lon_o = f.createVariable("lon", "f8", ("time", ))
+            var_o = list()
+            for vv in range(varlen):
+                var_o.append (f.createVariable(variable_names[vv], 'f8', ("time", "height")))
+            
+            # write data
+            time_o[:] = time
+            height_o[:] = z_f
+            lat[lat<-900] = -9999.
+            lon[lon<-900] = -9999.
+            lat_o[:] = lat
+            lon_o[:] = lon
+            for vv in range(varlen):
+                var_o[vv][:] = np.array(variables[vv])
+            
+            # attributes
+            time_o.units = "days since " + str(int(ymd[0][0:4])-1) + "-12-31 00:00:00 UTC"
+            lat_o.units = "degree north"
+            lon_o.units = "degree east"
+            height_o.units = "gpm above ground"
+            time_o.long_name = "Calendar Day"
+            lat_o.long_name = "latitude"
+            lon_o.long_name = "longitude"
+            height_o.long_name = "height above ground"
+            for vv in range(varlen):
+                var_o[vv].units = var_units[vv]
+                var_o[vv].long_name = var_longnames[vv]
+                var_o[vv].missing_value = -9999.
+            
+            # global attributes
+            import time as ttt
+            f.description = model + " extact vertical profiles along ship tracks for " + campaign
+            f.shiptrackdata = filename
+            f.modeldata = E3SM_hourly_path[mm] + E3SM_hourly_filehead[mm] + '.cam.h3.*.nc'
+            f.datanotes = 'variables are set as missing if GPS location is missing'
+            f.create_time = ttt.ctime(ttt.time())
+            
+            f.close()
+                            
\ No newline at end of file
diff --git a/src/esmac_diags/preprocessing/prep_obs_mergesize_ACEENA.py b/src/esmac_diags/preprocessing/prep_obs_mergesize_ACEENA.py
new file mode 100644
index 0000000..448105f
--- /dev/null
+++ b/src/esmac_diags/preprocessing/prep_obs_mergesize_ACEENA.py
@@ -0,0 +1,528 @@
+"""
+# merge size distribution from FIMS, PCASP and OPC for ACE-ENA
+# revised from size_bin_merge.pro by Jerome Fast
+# Shuaiqi Tang
+# 2020.10.1
+"""
+
+import os
+import glob
+import re
+import numpy as np
+from ..subroutines.read_aircraft import read_fims, read_fims_bin, read_iwg1, read_pcasp, read_opc
+from ..subroutines.read_ARMdata import read_cvi_aceena as read_cvi
+from ..subroutines.time_format_change import hhmmss2sec
+from netCDF4 import Dataset
+
+def run_prep(settings):
+    #%% variables from settings
+    iwgpath = settings['iwgpath']
+    fimspath = settings['fimspath']
+    pcasppath = settings['pcasppath']
+    opcpath = settings['opcpath']
+    cvipath = settings['cvipath']
+    merged_size_path = settings['merged_size_path']
+
+    #%% other settings
+        
+    if not os.path.exists(merged_size_path):
+        os.makedirs(merged_size_path)
+    
+    # %% find all data
+    # lst = glob.glob(iwgpath + 'aaf.iwg1001s.g1.hiscale.20160830*.a2.txt')
+    lst = glob.glob(iwgpath + '*.a2.txt')
+    lst.sort()
+    
+    # read in fims bin
+    (d_fims, dmin_f, dmax_f) = read_fims_bin(fimspath + 'ACEENA_FIMS_bins_R0.dat')
+    # change unit to um
+    d_fims = [x/1000 for x in d_fims]
+    dmin_f = [x/1000 for x in dmin_f]
+    dmax_f = [x/1000 for x in dmax_f]
+    dlnDp_f = np.empty(len(d_fims))
+    for bb in range(len(d_fims)):
+        dlnDp_f[bb] = np.log(dmax_f[bb]/dmin_f[bb])
+    dlnDp_f = np.mean(dlnDp_f)
+    
+    for filename in lst[:]:
+        
+        # get date
+        fname = re.split('aceena.|.a2', filename)
+        date = fname[-2]
+        print(date)
+        
+        #%% read in data
+        # IWG
+        (iwg, iwgvars) = read_iwg1(filename)
+        timelen = len(iwg)
+        # get lat, lon, height, time
+        lon = np.empty(timelen)
+        lat = np.empty(timelen)
+        height = np.empty(timelen)
+        time = np.empty(timelen)
+        cldflag = np.empty(timelen)
+        legnum = np.empty(timelen)
+        T_amb = np.empty(timelen)
+        p_amb = np.empty(timelen)
+        if date == '20180216a':
+            iwg.insert(1403, list(iwg[1403]))
+            tstr = iwg[1403][1]
+            tstr = tstr[0:-1] + str(int(tstr[-1])-1)
+            iwg[1403][1] = tstr
+            del iwg[-1]
+        for t in range(timelen):
+            lat[t] = float(iwg[t][2])
+            lon[t] = float(iwg[t][3])
+            height[t] = float(iwg[t][4])
+            T_amb[t] = float(iwg[t][20])
+            p_amb[t] = float(iwg[t][23])
+            cldflag[t] = int(iwg[t][35])
+            legnum[t] = int(iwg[t][-1])
+            timestr = iwg[t][1].split(' ')
+            time[t] = hhmmss2sec(timestr[1])
+        datestr = timestr[0]
+    
+        # FIMS
+        filename_f = glob.glob(fimspath + 'FIMS_G1_' + date[0:8] + '*_001s.ict')
+        # read in data
+        if len(filename_f) == 1:
+            (data0, fimslist) = read_fims(filename_f[0])
+            # remove some unrealistic data    
+            # data2 = data0[1:-2, :]
+            # data2[np.isnan(data2)] = 1e8
+            # data2[:, data2[0, :]>1e4] = 1e8
+            # data2[np.logical_or(data2<0, data2>1e4)] = np.nan
+            time_fims = data0[0, :]
+            # change data from #/dlnDp to number
+            data2 = data0[1:-3, :]*dlnDp_f
+            
+            # TD mode or AMB mode. remove TD mode
+            TD_AMB = data0[-1, :]
+            data2[:, TD_AMB != 0] = -9999.
+            
+            fims = np.empty([30, len(time)])
+            for ii in range(30):
+                fims[ii, :] = np.interp(time, time_fims, data2[ii, :])
+            idx = np.logical_or(time>time_fims[-1], time<time_fims[0])
+            fims[:, idx] = np.nan
+        elif len(filename_f) == 0:
+            time_fims = time
+            fims = np.nan*np.empty([len(d_fims), len(time)])
+        else:
+            raise ValueError('find more than one file: ' + filename_f)
+    
+        fims_total = np.nansum(fims, 0)
+        fims_total[fims_total <= 0] = np.nan
+    
+        # PCASP    
+        filename_p = glob.glob(pcasppath + 'pcasp_g1_' + date[0:8] + '*' + '_aceena001s.ict')
+    
+        binlen = 30
+        dmax_p = [110, 120, 130, 140, 150, 160, 170, 180, 200, 220, 240, 260, 280, 300, \
+                400, 500, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800, 3000, 3200]
+        dmin_p = [100, 110, 120, 130, 140, 150, 160, 170, 180, 200, 220, 240, 260, 280, 300, \
+                400, 500, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800, 3000]
+        dmin_p = [x/1000 for x in dmin_p]
+        dmax_p = [x/1000 for x in dmax_p]
+        # read in data
+        if len(filename_p) == 1:
+            (data0, pcasplist) = read_pcasp(filename_p[0])
+            pcasp2 = data0[1:-5, :]
+            time_pcasp = data0[0, :]
+            d_pcasp = [float(i) for i in pcasplist[1:-5]]
+            pcasp = data0[1:-5, :]
+            flag = data0[-2, :]
+            pcasp_total = data0[-5, :]
+            # remove some questionable data
+            # pcasp[np.isnan(pcasp)] = -9999
+            # pcasp[np.logical_or(pcasp <=0, pcasp>1e6)] = np.nan
+            # pcasp[:, flag != 0] = np.nan
+            if not all(time_pcasp == time):
+                raise ValueError('PCASP time is inconsistent with FIMS')
+        elif len(filename_p) == 0:
+            time_pcasp = time
+            d_pcasp = [(dmin_p[x] + dmax_p[x])/2 for x in range(len(dmin_p))]
+            pcasp = np.full([len(d_pcasp), len(time)], -9999.)
+            pcasp_total = np.full(len(time), -9999.)
+        else:
+            raise ValueError('find more than one file: ' + filename_p)
+    
+        # !! PCASP data is for standard T and p (Conc = Conc_orig*[(1013.25/Pamb)*(Tamb/293.15)]), change to ambient T/p
+        pcasp2 = np.array(pcasp)
+        for tt in range(len(time)):
+            pcasp[:, tt] = pcasp[:, tt]/((1013.25/p_amb[tt])*((T_amb[tt] + 273.15)/293.15))
+        
+        # CVI
+        filename_c = glob.glob(cvipath + 'enaaafinletcviF1.c1.' + date[0:8] + '*.nc')
+        filename_c.sort()
+        # read in data
+        if len(filename_c) == 1:
+            (time_c, lon_c, lat_c, alt_c, timeunit_c, cvimode, cvi_inlet, enhance_factor, dilution_factor) = read_cvi(filename_c[0])
+            if date == '20180216a':
+                time_c = np.insert(time_c, 1403, (time_c[1402] + time_c[1403])/2)
+                cvi_inlet=np.insert(cvi_inlet, 1403, cvi_inlet[1403])
+                cvimode=np.insert(cvimode, 1403, cvimode[1403])
+                enhance_factor = np.insert(enhance_factor, 1403, enhance_factor[1403])
+                dilution_factor = np.insert(dilution_factor, 1403, dilution_factor[1403])
+            enhance_factor[enhance_factor<-9000] = np.nan
+            dilution_factor[dilution_factor<-9000] = np.nan
+            if not all(time_c == time):
+                raise ValueError('CVI time is inconsistent with FIMS')
+        elif len(filename_c) == 0:
+            time_c = time
+            cvi_inlet = np.nan*np.empty([len(time)])
+            cvimode = np.nan*np.empty([len(time)])
+            enhance_factor = np.nan*np.empty([len(time)])
+            dilution_factor = np.nan*np.empty([len(time)])
+        else:
+            raise ValueError('find more than one file: ' + filename_c)
+    
+        cvi_inlet[cvi_inlet == -9] = 1  # if cvi_inlet is unfunctional, assume it is isokinetic and use fims as good data
+        
+        
+        # read OPC
+        filename_o = glob.glob(opcpath + 'OPCISO_G1_' + date[0:8] + '*.ict')
+        if len(filename_o) == 1:
+            (opc, dmin_o, dmax_o, d_opc, opclist) = read_opc(filename_o[0])
+            time_o = opc[0, :]
+            opc = opc[1:, :]
+            opc[opc<0] = np.nan
+        else:
+            raise ValueError('can not find OPC data or find multiple files: ' + filename_o)
+        if date == '20180216a':
+            time_o = np.hstack((time_o[0:1403], 47873., time_o[1403:]))
+            opc = np.hstack((opc[:, 0:1403], (opc[:, 1402:1403] + opc[:, 1403:1404])/2, opc[:, 1403:]))
+        if any(time_o != time):
+            raise ValueError('OPC time is inconsistent with FIMS')
+        if sum(np.isnan(opc[0, :]))<0.1*len(time_o):
+            for ii in range(len(d_opc)):    # fill missing timesteps
+                opc2 = opc[ii, :]
+                opc[ii, np.isnan(opc2)] = np.interp(time[np.isnan(opc2)], time[~np.isnan(opc2)], opc[ii, ~np.isnan(opc2)])
+        else:
+            print('this date does not fill NaN OPC values')
+            
+        #%% now merge fims and pcasp
+        timelen = len(time)
+        nbin_merge = 67
+        nbin_fims = len(d_fims)
+        nbin_pcasp = len(d_pcasp)
+        nbin_opc = len(d_opc)
+        # low and high range of each bin
+        dia_merge_l = np.full(nbin_merge, np.nan)
+        dia_merge_h = np.full(nbin_merge, np.nan)
+        # from bins 1-30, use FIMS bin
+        for n in range(nbin_fims):
+            dia_merge_l[n] = dmin_f[n]
+            dia_merge_h[n] = dmax_f[n]
+        # for the next bin, use upper range (0.64) of FIMS as low bound and 0.8 of PCASP as high bound
+        idx = dmax_p.index(0.8)
+        dia_merge_l[nbin_fims] = dmax_f[-1]
+        dia_merge_h[nbin_fims] = dmax_p[idx]
+        # next bin uses 0.8 as low bound and high bound of 2nd bin (0.9) of OPC
+        dia_merge_l[31] = 0.8
+        dia_merge_h[31] = 0.9
+        # next few bins are merged two OPC bins
+        for n in range(1, 6):
+            dia_merge_l[31 + n] = dmin_o[n*2]
+            dia_merge_h[31 + n] = dmax_o[n*2 + 1]
+        # other bins follows OPC bins
+        for n in range(12, nbin_opc):
+            dia_merge_l[25 + n] = dmin_o[n]
+            dia_merge_h[25 + n] = dmax_o[n]
+            
+        d_merge = (dia_merge_h + dia_merge_l)/2
+        
+        # merged concentration
+        conc_merge = np.full([timelen, nbin_merge], -9999.)
+        fims[np.isnan(fims)] = -9999.   # do not treat missing as NaN. treat -9999
+        for k in range(timelen):
+            # mask all data with cloud flag on
+            if cldflag[k] != 0:
+                continue
+            # use fims data up to d_fims[24]
+            for n in range(24 + 1):
+                if cvi_inlet[k] == 0:     # in Jerome's code it is 0. looks like it should be 1 (CVI in cloud)
+                    fims[n, k] = -9999
+                conc_merge[k, n] = fims[n, k]
+            # overlapping bins
+            idx = dmin_p.index(0.3)   # start merging size. choose the index of pcasp for merging
+            if fims[25, k] >=0:
+                if cvi_inlet[k] == 1:
+                    ffac = 0.8
+                    pfac = 0.2
+                elif cvi_inlet[k] == 0:
+                    ffac = 0.0
+                    pfac = 1.0
+                else:
+                    raise ValueError('cvi_inlet value is neither 0 nor 1')
+            else:
+                ffac = 0.0
+                pfac = 1.0
+            conc_merge[k, 25] = (fims[25, k]*ffac + pcasp[idx, k]*0.3*pfac)
+            if fims[26, k] >=0:
+                if cvi_inlet[k] == 1:
+                    ffac = 0.7
+                    pfac = 0.3
+                elif cvi_inlet[k] == 0:
+                    ffac = 0.0
+                    pfac = 1.0
+                else:
+                    raise ValueError('cvi_inlet value is neither 0 nor 1')
+            else:
+                ffac = 0.0
+                pfac = 1.0
+            conc_merge[k, 26] = (fims[26, k]*ffac + (pcasp[idx, k]*0.3 + pcasp[idx + 1, k]*0.2)*pfac)
+            if fims[27, k] >=0:
+                if cvi_inlet[k] == 1:
+                    ffac = 0.5
+                    pfac = 0.5
+                elif cvi_inlet[k] == 0:
+                    ffac = 0.0
+                    pfac = 1.0
+                else:
+                    raise ValueError('cvi_inlet value is neither 0 nor 1')
+            else:
+                ffac = 0.0
+                pfac = 1.0
+            conc_merge[k, 27] = (fims[27, k]*ffac + (pcasp[idx + 1, k]*0.65)*pfac)
+            if fims[28, k] >=0:
+                if cvi_inlet[k] == 1:
+                    ffac = 0.3
+                    pfac = 0.7
+                elif cvi_inlet[k] == 0:
+                    ffac = 0.0
+                    pfac = 1.0
+                else:
+                    raise ValueError('cvi_inlet value is neither 0 nor 1')
+            else:
+                ffac = 0.0
+                pfac = 1.0
+            conc_merge[k, 28] = (fims[28, k]*ffac + (pcasp[idx + 1, k]*0.15 + pcasp[idx + 2, k]*0.5)*pfac)
+            if fims[29, k] >=0:
+                if cvi_inlet[k] == 1:
+                    ffac = 0.2
+                    pfac = 0.8
+                elif cvi_inlet[k] == 0:
+                    ffac = 0.0
+                    pfac = 1.0
+                else:
+                    raise ValueError('cvi_inlet value is neither 0 nor 1')
+            else:
+                ffac = 0.0
+                pfac = 1.0
+            conc_merge[k, 29] = (fims[29, k]*ffac + (pcasp[idx + 2, k]*0.4 + pcasp[idx + 3, k]*0.2)*pfac)
+            conc_merge[k, 30] = pcasp[idx + 3, k]*0.8
+            if not all(pcasp[idx:idx + 4, k] >=0):
+                conc_merge[k, 25:30] = fims[25:30, k]
+                conc_merge[k, 30] = (conc_merge[k, 29] + opc[1, k]*1.4)/2.0
+            # next merge OPC and PCASP, remove PCASP if the values is 10x larger than OPC
+            pcasp2 = pcasp[18, k]*0.5
+            opc2 = opc[1, k]*1.4     # the first bin of OPC contains all small-size particles. not using opc[0, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 31] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 31] = opc2
+            else:
+                conc_merge[k, 31] = (opc2 + pcasp2)/2.0    # assume equal weight
+            pcasp2 = pcasp[18, k]*0.5 + pcasp[19, k]*0.2
+            opc2 = opc[2, k] + opc[3, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 32] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 32] = opc2
+            else:
+                conc_merge[k, 32] = (opc2 + pcasp2)/2.0    # assume equal weight
+            pcasp2 = pcasp[19, k]*0.8
+            opc2 = opc[4, k] + opc[5, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 33] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 33] = opc2
+            else:
+                conc_merge[k, 33] = (opc2 + pcasp2)/2.0    # assume equal weight
+            pcasp2 = pcasp[20, k]*0.9
+            opc2 = opc[6, k] + opc[7, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 34] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 34] = opc2
+            else:
+                conc_merge[k, 34] = (opc2 + pcasp2)/2.0    # assume equal weight
+            pcasp2 = pcasp[20, k]*0.1 + pcasp[21, k]
+            opc2 = opc[8, k] + opc[9, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 35] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 35] = opc2
+            else:
+                conc_merge[k, 35] = (opc2 + pcasp2)/2.0    # assume equal weight
+            pcasp2 = pcasp[22, k] + pcasp[23, k]*0.2
+            opc2 = opc[10, k] + opc[11, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 36] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 36] = opc2
+            else:
+                conc_merge[k, 36] = (opc2 + pcasp2)/2.0    # assume equal weight
+            pcasp2 = pcasp[23, k]*0.7
+            opc2 = opc[12, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 37] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 37] = opc2
+            else:
+                conc_merge[k, 37] = (opc2 + pcasp2)/2.0    # assume equal weight
+            pcasp2 = pcasp[23, k]*0.1 + pcasp[24, k]*0.7
+            opc2 = opc[13, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 38] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 38] = opc2
+            else:
+                conc_merge[k, 38] = (opc2 + pcasp2)/2.0    # assume equal weight
+            pcasp2 = pcasp[24, k]*0.3 + pcasp[25, k]*0.4
+            opc2 = opc[14, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 39] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 39] = opc2
+            else:
+                conc_merge[k, 39] = (opc2 + pcasp2)/2.0    # assume equal weight
+            pcasp2 = pcasp[25, k]*0.6 + pcasp[26, k]*0.3
+            opc2 = opc[15, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 40] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 40] = opc2
+            else:
+                conc_merge[k, 40] = opc2*0.6 + pcasp2*0.4  # gradually reduce the weight of PCASP
+            pcasp2 = pcasp[26, k]*0.7 + pcasp[27, k]*0.2
+            opc2 = opc[16, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 41] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 41] = opc2
+            else:
+                conc_merge[k, 41] = opc2*0.7 + pcasp2*0.3  # gradually reduce the weight of PCASP
+            pcasp2 = pcasp[27, k]*0.8 + pcasp[28, k]*0.2
+            opc2 = opc[17, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 42] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 42] = opc2
+            else:
+                conc_merge[k, 42] = opc2*0.8 + pcasp2*0.2  # gradually reduce the weight of PCASP
+            pcasp2 = pcasp[28, k]*0.8 + pcasp[29, k]*0.3
+            opc2 = opc[18, k]
+            if np.isnan(opc2):             
+                conc_merge[k, 43] = pcasp2
+            elif pcasp2>10*opc2 or pcasp2<0:
+                conc_merge[k, 43] = opc2
+            else:
+                conc_merge[k, 43] = opc2*0.9 + pcasp2*0.1  # gradually reduce the weight of PCASP
+            # using OPC for other bins
+            for n in range(44, nbin_merge):
+                conc_merge[k, n] = opc[n-25, k]
+            
+        
+        #%% output data
+        if not os.path.exists(merged_size_path):
+            os.mkdir(merged_size_path)
+        outfile = merged_size_path + 'merged_bin_fims_pcasp_opc_ACEENA_' + date + '.nc'
+        # define filename
+        f = Dataset(outfile, 'w', format = 'NETCDF4')
+        
+        # define dimensions
+        t = f.createDimension('time', None)  # unlimited
+        s = f.createDimension('size', nbin_merge)  # unlimited
+        
+        # create variable list
+        time_o = f.createVariable("time", "f8", ("time", ))
+        size_o = f.createVariable("size", "f8", ("size", ))
+        sizeh_o = f.createVariable("size_high", "f8", ("size", ))
+        sizel_o = f.createVariable("size_low", "f8", ("size", ))
+        lon_o = f.createVariable("lon", 'f8', ("time", ))
+        lat_o = f.createVariable("lat", 'f8', ("time", ))
+        height_o = f.createVariable("height", 'f8', ("time", ))
+        cflag_o = f.createVariable('cld_flag', 'i8', ("time", ))
+        legnum_o = f.createVariable('leg_number', 'i8', ("time", ))
+        cvi_o = f.createVariable('CVI_inlet', 'i8', ("time", ))
+        cvim_o = f.createVariable('CVI_mode', 'i8', ("time", ))
+        df_o = f.createVariable('CVI_Dilution_Factor', 'f8', ("time", ))
+        ef_o = f.createVariable('CVI_Enhancement_Factor', 'f8', ("time", ))
+        merge_o = f.createVariable('size_distribution_merged', 'f8', ("time", "size"))
+        fims_o = f.createVariable('totalnum_fims', 'f8', ("time", ))
+        pcasp_o = f.createVariable('totalnum_pcasp', 'f8', ("time", ))
+        
+        # write data
+        time_o[:] = time
+        size_o[:] = d_merge
+        sizeh_o[:] = dia_merge_h
+        sizel_o[:] = dia_merge_l
+        lon_o[:] = lon
+        lat_o[:] = lat    
+        height_o[:] = height
+        cflag_o[:] = cldflag
+        legnum_o[:] = legnum
+        cvi_o[:] = cvi_inlet
+        cvim_o[:] = np.array(cvimode)
+        dilution_factor[np.isnan(dilution_factor)] = -9999.
+        df_o[:] = dilution_factor
+        enhance_factor[np.isnan(enhance_factor)] = -9999.
+        ef_o[:] = enhance_factor
+        conc_merge[np.isnan(conc_merge)] = -9999.
+        conc_merge[conc_merge<0] = -9999.
+        merge_o[:, :] = conc_merge
+        fims_total[np.isnan(fims_total)] = -9999.
+        fims_total[fims_total<0] = -9999.
+        fims_o[:] = fims_total
+        pcasp_total[np.isnan(pcasp_total)] = -9999.
+        pcasp_total[pcasp_total<0] = -9999.
+        pcasp_o[:] = pcasp_total
+        
+        # attributes
+        time_o.units = "seconds since " + date[0:4] + '-' + date[4:6] + '-' + date[6:8] + " 00:00:00"
+        size_o.units = 'um'
+        size_o.long_name = 'center of size bin'
+        sizeh_o.units = 'um'
+        sizeh_o.long_name = 'upper bound of size bin'
+        sizel_o.units = 'um'
+        sizel_o.long_name = 'lower bound of size bin'
+        lon_o.units = 'degree east'
+        lon_o.long_name = 'Longitude'
+        lat_o.units = 'degree north'
+        lat_o.long_name = 'Latitude'
+        height_o.units = 'm MSL'
+        height_o.long_name = 'height'
+        cflag_o.units = 'N/A'
+        cflag_o.long_name = 'cloud flag'
+        cflag_o.description = '1-cloud; 0-no cloud'
+        legnum_o.units = 'N/A'
+        legnum_o.long_name = 'leg number'
+        cvi_o.units = 'N/A'
+        cvi_o.long_name = 'CVI inlet status'
+        cvi_o.description = '0-CVI inlet on; 1-Isokinetic inlet on'
+        cvim_o.units = 'N/A'
+        cvim_o.long_name = 'CVI mode flag'
+        cvim_o.description = '0: CVI mode; 1: under-kinetic; -1: transition'
+        df_o.units = 'N/A'
+        df_o.long_name = 'CVI Dilution Factor'
+        df_o.description = 'Dilution Factor after under-kinetic mode. Some measurements such as AMS, need to divide by this number'
+        ef_o.units = 'N/A'
+        ef_o.long_name = 'CVI Enhancement Factor'
+        ef_o.description = 'Enhancement Factor after CVI mode. Some measurements such as AMS, need to divide by this number'
+        merge_o.units = '#/cm3'
+        merge_o.long_name = 'merged size distribution'
+        fims_o.units = '#/cm3'
+        fims_o.long_name = 'total aerosol concentration from FIMS'
+        pcasp_o.units = '#/cm3'
+        pcasp_o.long_name = 'total aerosol concentration from PCASP'
+        
+        # global attributes
+        import time as ttt
+        f.description = "Merged size distribution from FIMS, PCASP and OPC"
+        f.create_time = ttt.ctime(ttt.time())
+        
+        f.close()
+        
+    
diff --git a/src/esmac_diags/preprocessing/prep_obs_mergesize_HISCALE.py b/src/esmac_diags/preprocessing/prep_obs_mergesize_HISCALE.py
new file mode 100644
index 0000000..2cccdb1
--- /dev/null
+++ b/src/esmac_diags/preprocessing/prep_obs_mergesize_HISCALE.py
@@ -0,0 +1,393 @@
+"""
+# merge size distribution from FIMS and PCASP for Hi-Scale
+# revised from size_bin_merge.pro by Jerome Fast
+# Shuaiqi Tang
+# 2020.10.1
+"""
+
+import os
+import glob
+import re
+import numpy as np
+from ..subroutines.read_aircraft import read_fims, read_fims_bin, read_iwg1, read_pcasp, read_cvi_hiscale as read_cvi
+from ..subroutines.time_format_change import hhmmss2sec
+from netCDF4 import Dataset
+
+def run_prep(settings):
+    #%% variables from settings
+    iwgpath = settings['iwgpath']
+    fimspath = settings['fimspath']
+    pcasppath = settings['pcasppath']
+    cvipath = settings['cvipath']
+    merged_size_path = settings['merged_size_path']
+
+    #%% other settings
+    
+    if not os.path.exists(merged_size_path):
+        os.makedirs(merged_size_path)
+        
+    
+    # %% find all data
+    # lst = glob.glob(iwgpath + 'aaf.iwg1001s.g1.hiscale.20160830*.a2.txt')
+    lst = glob.glob(iwgpath + '*.a2.txt')
+    lst.sort()
+    
+    # read in fims bin
+    (d_fims, dmin_f, dmax_f) = read_fims_bin(fimspath + 'HISCALE_FIMS_bins_R1.dat')
+    # change unit to um
+    d_fims = [x/1000 for x in d_fims]
+    dmin_f = [x/1000 for x in dmin_f]
+    dmax_f = [x/1000 for x in dmax_f]
+    dlnDp_f = np.empty(len(d_fims))
+    for bb in range(len(d_fims)):
+        dlnDp_f[bb] = np.log(dmax_f[bb]/dmin_f[bb])
+    dlnDp_f = np.mean(dlnDp_f)
+    
+    for filename in lst[:]:
+        
+        # get date
+        fname = re.split('hiscale.|.a2', filename)
+        date = fname[-2]
+        print(date)
+        if date[-1] == 'a':
+            flightidx = 1
+        else:
+            flightidx = 2
+        
+        #%% read in data
+        # IWG
+        (iwg, iwgvars) = read_iwg1(filename)
+        timelen = len(iwg)
+        # get lat, lon, height, time
+        lon = np.empty(timelen)
+        lat = np.empty(timelen)
+        height = np.empty(timelen)
+        time = np.empty(timelen)
+        cldflag = np.empty(timelen)
+        legnum = np.empty(timelen)
+        T_amb = np.empty(timelen)
+        p_amb = np.empty(timelen)
+        for t in range(timelen):
+            lat[t] = float(iwg[t][2])
+            lon[t] = float(iwg[t][3])
+            height[t] = float(iwg[t][4])
+            T_amb[t] = float(iwg[t][20])
+            p_amb[t] = float(iwg[t][23])
+            cldflag[t] = int(iwg[t][35])
+            legnum[t] = int(iwg[t][-1])
+            timestr = iwg[t][1].split(' ')
+            time[t] = hhmmss2sec(timestr[1])
+        datestr = timestr[0]
+    
+        # FIMS
+        filename_f = glob.glob(fimspath + 'FIMS_G1_' + date[0:8] + '*' + str(flightidx) + '_HISCALE_001s.ict')
+        # read in data
+        if len(filename_f) == 1:
+            (data0, fimslist) = read_fims(filename_f[0])
+            # remove some unrealistic data    
+            data2 = data0[1:-2, :]
+            data2[np.isnan(data2)] = 1e8
+            data2[:, data2[0, :] > 1e4] = 1e8
+            data2[np.logical_or(data2 < 0, data2 > 1e4)] = np.nan
+            data0[1:-2, :] = data2
+            time_fims = data0[0, :]
+            # change data from #/dlnDp to number
+            data2 = data0[1:-2, :]*dlnDp_f
+            fims = np.empty([30, len(time)])
+            for ii in range(30):
+                fims[ii, :] = np.interp(time, time_fims, data2[ii, :])
+            idx = np.logical_or(time > time_fims[-1], time < time_fims[0])
+            fims[:, idx] = np.nan
+        elif len(filename_f) == 0:
+            time_fims = time
+            fims = np.nan*np.empty([len(d_fims), len(time)])
+        else:
+            raise ValueError('find more than one file: ' + filename_f)
+        fims_total = np.nansum(fims, 0)
+        fims_total[fims_total <= 0] = np.nan
+    
+        # PCASP    
+        filename_p = glob.glob(pcasppath + 'pcasp_g1_' + date[0:8] + '*' + str(flightidx) + '_hiscale001s.ict.txt')
+        if date[4:6] == '04' or date[4:6] == '05':
+            binlen = 27
+            dmax_p = [130, 140, 150, 160, 170, 180, 200, 220, 240, 260, 280, 300, 400, 500, \
+                    600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800, 3000]
+            dmin_p = [120, 130, 140, 150, 160, 170, 180, 200, 220, 240, 260, 280, 300, 400, 500, \
+                    600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800]
+        elif date[4:6] == '08' or date[4:6] == '09':
+            binlen = 30
+            dmax_p = [100, 110, 120, 130, 140, 150, 160, 170, 180, 200, 220, 240, 260, 280, 300, \
+                    400, 500, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800, 3000]
+            dmin_p = [90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 200, 220, 240, 260, 280, 300, \
+                    400, 500, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600, 2800]
+        dmin_p = [x/1000 for x in dmin_p]
+        dmax_p = [x/1000 for x in dmax_p]
+        # read in data
+        if len(filename_p) == 1:
+            (data0, pcasplist) = read_pcasp(filename_p[0])
+            time_pcasp = data0[0, :]
+            d_pcasp = [float(i) for i in pcasplist[1:-5]]
+            pcasp = data0[1:-5, :]
+            flag = data0[-2, :]
+            pcasp_total = data0[-5, :]
+            # remove some questionable data
+            # pcasp[np.isnan(pcasp)] = -9999
+            # pcasp[np.logical_or(pcasp <= 0, pcasp > 1e6)] = np.nan
+            pcasp[:, flag != 0] = np.nan
+            pcasp[:, cldflag == 1] = np.nan
+            if not all(time_pcasp == time):
+                raise ValueError('PCASP time is inconsistent with FIMS')
+        elif len(filename_p) == 0:
+            time_pcasp = time
+            d_pcasp = [(dmin_p[x] + dmax_p[x])/2 for x in range(len(dmin_p))]
+            pcasp = np.nan*np.empty([len(d_pcasp), len(time)])
+        else:
+            raise ValueError('find more than one file: ' + filename_p)
+        # !! PCASP data is for standard T and p (Conc = Conc_orig*[(1013.25/Pamb)*(Tamb/293.15)]), change to ambient T/p
+        for tt in range(len(time)):
+            pcasp[:, tt] = pcasp[:, tt]/((1013.25/p_amb[tt])*((T_amb[tt] + 273.15)/293.15))
+            
+            
+        # CVI
+        filename_c = glob.glob(cvipath + 'CVI_G1_' + date[0:8] + '*R4_HISCALE_001s.ict.txt')
+        filename_c.sort()
+        # read in data
+        if len(filename_c) == 1 or len(filename_c) == 2:
+            (cvi, cvilist) = read_cvi(filename_c[flightidx-1])
+            time_cvi = cvi[0, :]
+            cvi_inlet = cvi[-1, :]
+            enhance_factor = cvi[2, :]
+            enhance_factor[enhance_factor < -9000] = np.nan
+            dilution_factor = cvi[3, :]
+            dilution_factor[dilution_factor < -9000] = np.nan
+            cvi_mode = cvi[4, :]
+            cvi_qc = cvi[5, :]
+            if not all(time_cvi == time):
+                raise ValueError('CVI time is inconsistent with FIMS')
+        elif len(filename_c) == 0:
+            time_cvi = time
+            cvi_inlet = np.nan*np.empty([len(time)])
+            cvi_mode = np.nan*np.empty([len(time)])
+            dilution_factor = np.nan*np.empty([len(time)])
+            enhance_factor = np.nan*np.empty([len(time)])
+            cvi_qc = np.nan*np.empty([len(time)])
+        else:
+            raise ValueError('find more than one file: ' + filename_c)
+        cvi_mode[cvi_qc != 0] = -9999
+            
+        #%% now merge fims and pcasp
+        timelen = len(time)
+        nbin_merge = 44
+        nbin_fims = len(d_fims)
+        nbin_pcasp = len(d_pcasp)
+        # low and high range of each bin
+        dia_merge_l = np.empty(nbin_merge)
+        dia_merge_h = np.empty(nbin_merge)
+        for n in range(nbin_fims):
+            dia_merge_l[n] = dmin_f[n]
+            dia_merge_h[n] = dmax_f[n]
+        idx = dmax_p.index(0.5)
+        # use upper range (0.425) of FIMS as low bound and 0.5 of PCASP as high bound
+        dia_merge_l[nbin_fims] = dmax_f[-1]
+        dia_merge_h[nbin_fims] = dmax_p[idx]
+        for n in range(idx + 1, nbin_pcasp):
+            dia_merge_l[nbin_fims + n-idx] = dmin_p[n]
+            dia_merge_h[nbin_fims + n-idx] = dmax_p[n]
+        d_merge = (dia_merge_h + dia_merge_l)/2
+        
+        # merged concentration
+        conc_merge = np.empty([timelen, nbin_merge])
+        fims[np.isnan(fims)] = -9999.   # do not treat missing as NaN. treat -9999
+        for k in range(timelen):
+            # use fims data up to d_fims[23] (~0.19 um)
+            for n in range(23 + 1):
+                if cvi_inlet[k] == 0:     # in Jerome's code it is 0. looks like it should be 1 (CVI in cloud)
+                    fims[n, k] = -9999
+                conc_merge[k, n] = fims[n, k]
+            # overlapping bins
+            idx = dmin_p.index(0.2)   # start merging size. corresponding to 10 in IOP2
+            if fims[24, k] > 0:
+                if cvi_inlet[k] == 1:
+                    ffac = 0.95
+                    pfac = 0.05
+                elif cvi_inlet[k] == 0:
+                    ffac = 0.0
+                    pfac = 1.0
+                else:
+                    raise ValueError('cvi_inlet value is neither 0 nor 1')
+            else:
+                ffac = 0.0
+                pfac = 1.0
+            conc_merge[k, 24] = (fims[24, k]*ffac + (pcasp[idx, k]*1.0 + pcasp[idx + 1, k]*0.25)*pfac)
+            if fims[25, k] > 0:
+                if cvi_inlet[k] == 1:
+                    ffac = 0.8
+                    pfac = 0.2
+                elif cvi_inlet[k] == 0:
+                    ffac = 0.0
+                    pfac = 1.0
+                else:
+                    raise ValueError('cvi_inlet value is neither 0 nor 1')
+            else:
+                ffac = 0.0
+                pfac = 1.0
+            conc_merge[k, 25] = (fims[25, k]*ffac + (pcasp[idx + 1, k]*0.75 + pcasp[idx + 2, k]*0.8)*pfac)
+            if fims[26, k] > 0:
+                if cvi_inlet[k] == 1:
+                    ffac = 0.65
+                    pfac = 0.35
+                elif cvi_inlet[k] == 0:
+                    ffac = 0.0
+                    pfac = 1.0
+                else:
+                    raise ValueError('cvi_inlet value is neither 0 nor 1')
+            else:
+                ffac = 0.0
+                pfac = 1.0
+            conc_merge[k, 26] = (fims[26, k]*ffac + (pcasp[idx + 2, k]*0.2 + pcasp[idx + 3, k]*1.0 + pcasp[idx + 4, k]*0.5)*pfac)
+            if fims[27, k] > 0:
+                if cvi_inlet[k] == 1:
+                    ffac = 0.35
+                    pfac = 0.65
+                elif cvi_inlet[k] == 0:
+                    ffac = 0.0
+                    pfac = 1.0
+                else:
+                    raise ValueError('cvi_inlet value is neither 0 nor 1')
+            else:
+                ffac = 0.0
+                pfac = 1.0
+            conc_merge[k, 27] = (fims[27, k]*ffac + (pcasp[idx + 4, k]*0.5 + pcasp[idx + 5, k]*0.25)*pfac)
+            if fims[28, k] > 0:
+                if cvi_inlet[k] == 1:
+                    ffac = 0.2
+                    pfac = 0.8
+                elif cvi_inlet[k] == 0:
+                    ffac = 0.0
+                    pfac = 1.0
+                else:
+                    raise ValueError('cvi_inlet value is neither 0 nor 1')
+            else:
+                ffac = 0.0
+                pfac = 1.0
+            conc_merge[k, 28] = (fims[28, k]*ffac + (pcasp[idx + 5, k]*0.5)*pfac)
+            if fims[29, k] > 0:
+                if cvi_inlet[k] == 1:
+                    ffac = 0.05
+                    pfac = 0.95
+                elif cvi_inlet[k] == 0:
+                    ffac = 0.0
+                    pfac = 1.0
+                else:
+                    raise ValueError('cvi_inlet value is neither 0 nor 1')
+            else:
+                ffac = 0.0
+                pfac = 1.0
+            conc_merge[k, 29] = (fims[29, k]*ffac + (pcasp[idx + 5, k]*0.25 + pcasp[idx + 6, k]*0.25)*pfac)
+            conc_merge[k, 30] = pcasp[idx + 6, k]*0.75
+            # using PCASP for upper bins
+            nn = 31
+            for n in range(idx + 7, nbin_pcasp):
+                conc_merge[k, nn] = pcasp[n, k]
+                nn = nn + 1
+            
+        #%% output data
+        if not os.path.exists(merged_size_path):
+            os.mkdir(merged_size_path)
+        outfile = merged_size_path + 'merged_bin_fims_pcasp_HISCALE_' + date + '.nc'
+        # define filename
+        f = Dataset(outfile, 'w', format = 'NETCDF4')
+        
+        # define dimensions
+        t = f.createDimension('time', None)  # unlimited
+        s = f.createDimension('size', nbin_merge)  # unlimited
+        
+        # create variable list
+        time_o = f.createVariable("time", "f8", ("time", ))
+        size_o = f.createVariable("size", "f8", ("size", ))
+        sizeh_o = f.createVariable("size_high", "f8", ("size", ))
+        sizel_o = f.createVariable("size_low", "f8", ("size", ))
+        lon_o = f.createVariable("lon", 'f8', ("time", ))
+        lat_o = f.createVariable("lat", 'f8', ("time", ))
+        height_o = f.createVariable("height", 'f8', ("time", ))
+        cflag_o = f.createVariable('cld_flag', 'i8', ("time", ))
+        legnum_o = f.createVariable('leg_number', 'i8', ("time", ))
+        cvi_o = f.createVariable('CVI_inlet', 'i8', ("time", ))
+        cvim_o = f.createVariable('CVI_mode', 'i8', ("time", ))
+        df_o = f.createVariable('CVI_Dilution_Factor', 'f8', ("time", ))
+        ef_o = f.createVariable('CVI_Enhancement_Factor', 'f8', ("time", ))
+        merge_o = f.createVariable('size_distribution_merged', 'f8', ("time", "size"))
+        fims_o = f.createVariable('totalnum_fims', 'f8', ("time", ))
+        pcasp_o = f.createVariable('totalnum_pcasp', 'f8', ("time", ))
+        
+        # write data
+        time_o[:] = time
+        size_o[:] = d_merge
+        sizeh_o[:] = dia_merge_h
+        sizel_o[:] = dia_merge_l
+        lon_o[:] = lon
+        lat_o[:] = lat    
+        height_o[:] = height
+        cflag_o[:] = cldflag
+        legnum_o[:] = legnum
+        cvi_o[:] = cvi_inlet
+        cvim_o[:] = np.array(cvi_mode)
+        dilution_factor[np.isnan(dilution_factor)] = -9999.
+        df_o[:] = dilution_factor
+        enhance_factor[np.isnan(enhance_factor)] = -9999.
+        ef_o[:] = enhance_factor
+        conc_merge[np.isnan(conc_merge)] = -9999.
+        conc_merge[conc_merge < 0] = -9999.
+        merge_o[:, :] = conc_merge
+        fims_total[np.isnan(fims_total)] = -9999.
+        fims_total[fims_total < 0] = -9999.
+        fims_o[:] = fims_total
+        pcasp_total[np.isnan(pcasp_total)] = -9999.
+        pcasp_total[pcasp_total < 0] = -9999.
+        pcasp_o[:] = pcasp_total
+        
+        # attributes
+        time_o.units = "seconds since " + date[0:4] + '-' + date[4:6] + '-' + date[6:8] + " 00:00:00"
+        size_o.units = 'um'
+        size_o.long_name = 'center of size bin'
+        sizeh_o.units = 'um'
+        sizeh_o.long_name = 'upper bound of size bin'
+        sizel_o.units = 'um'
+        sizel_o.long_name = 'lower bound of size bin'
+        lon_o.units = 'degree east'
+        lon_o.long_name = 'Longitude'
+        lat_o.units = 'degree north'
+        lat_o.long_name = 'Latitude'
+        height_o.units = 'm MSL'
+        height_o.long_name = 'height'
+        cflag_o.units = 'N/A'
+        cflag_o.long_name = 'cloud flag'
+        cflag_o.description = '1-cloud; 0-no cloud'
+        legnum_o.units = 'N/A'
+        legnum_o.long_name = 'leg number'
+        cvi_o.units = 'N/A'
+        cvi_o.long_name = 'CVI inlet status'
+        cvi_o.description = '0-CVI inlet on; 1-Isokinetic inlet on'
+        cvim_o.units = 'N/A'
+        cvim_o.long_name = 'CVI mode flag'
+        cvim_o.description = '0: CVI mode; 1: under-kinetic; -1: transition'
+        df_o.units = 'N/A'
+        df_o.long_name = 'CVI Dilution Factor'
+        df_o.description = 'Dilution Factor after under-kinetic mode. Some measurements such as AMS, need to divide by this number'
+        ef_o.units = 'N/A'
+        ef_o.long_name = 'CVI Enhancement Factor'
+        ef_o.description = 'Enhancement Factor after CVI mode. Some measurements such as AMS, need to divide by this number'
+        merge_o.units = '#/cm3'
+        merge_o.long_name = 'merged size distribution'
+        fims_o.units = '#/cm3'
+        fims_o.long_name = 'total aerosol concentration from FIMS'
+        pcasp_o.units = '#/cm3'
+        pcasp_o.long_name = 'total aerosol concentration from PCASP'
+        
+        # global attributes
+        import time as ttt
+        f.description = "Merged size distribution from FIMS and PCASP"
+        f.create_time = ttt.ctime(ttt.time())
+        
+        f.close()
+        
diff --git a/python/subroutines/CN_mode_to_size.py b/src/esmac_diags/subroutines/CN_mode_to_size.py
similarity index 100%
rename from python/subroutines/CN_mode_to_size.py
rename to src/esmac_diags/subroutines/CN_mode_to_size.py
diff --git a/src/esmac_diags/subroutines/__init.py__ b/src/esmac_diags/subroutines/__init.py__
new file mode 100644
index 0000000..e69de29
diff --git a/python/subroutines/quality_control.py b/src/esmac_diags/subroutines/quality_control.py
similarity index 100%
rename from python/subroutines/quality_control.py
rename to src/esmac_diags/subroutines/quality_control.py
diff --git a/python/subroutines/read_ARMdata.py b/src/esmac_diags/subroutines/read_ARMdata.py
similarity index 100%
rename from python/subroutines/read_ARMdata.py
rename to src/esmac_diags/subroutines/read_ARMdata.py
diff --git a/python/subroutines/read_aircraft.py b/src/esmac_diags/subroutines/read_aircraft.py
similarity index 95%
rename from python/subroutines/read_aircraft.py
rename to src/esmac_diags/subroutines/read_aircraft.py
index da1cdd0..170437d 100644
--- a/python/subroutines/read_aircraft.py
+++ b/src/esmac_diags/subroutines/read_aircraft.py
@@ -11,7 +11,8 @@ def read_ams(filename):
 
     Parameters
     ----------
-    filename : input filename
+    filename : str
+        input filename
 
     Returns
     -------
diff --git a/python/subroutines/read_netcdf.py b/src/esmac_diags/subroutines/read_netcdf.py
similarity index 100%
rename from python/subroutines/read_netcdf.py
rename to src/esmac_diags/subroutines/read_netcdf.py
diff --git a/python/subroutines/read_ship.py b/src/esmac_diags/subroutines/read_ship.py
similarity index 100%
rename from python/subroutines/read_ship.py
rename to src/esmac_diags/subroutines/read_ship.py
diff --git a/python/subroutines/read_surface.py b/src/esmac_diags/subroutines/read_surface.py
similarity index 100%
rename from python/subroutines/read_surface.py
rename to src/esmac_diags/subroutines/read_surface.py
diff --git a/python/subroutines/specific_data_treatment.py b/src/esmac_diags/subroutines/specific_data_treatment.py
similarity index 91%
rename from python/subroutines/specific_data_treatment.py
rename to src/esmac_diags/subroutines/specific_data_treatment.py
index 1aad79f..2eaa7e3 100644
--- a/python/subroutines/specific_data_treatment.py
+++ b/src/esmac_diags/subroutines/specific_data_treatment.py
@@ -3,6 +3,7 @@
 """
 
 import numpy as np
+from ..subroutines.quality_control import qc_remove_neg
 
 #%%
 def avg_time_1d(time0, data0, time):
@@ -23,7 +24,6 @@ def avg_time_1d(time0, data0, time):
     data : output data
 
     """
-    from quality_control import qc_remove_neg
     data0 = qc_remove_neg(data0)
     if data0.shape[0] != len(time0):
         raise ValueError("Arrays must have the same size")
@@ -53,7 +53,6 @@ def avg_time_2d(time0, data0, time):
     data : output data
 
     """
-    from quality_control import qc_remove_neg
     data0 = qc_remove_neg(data0)
     if data0.shape[0] != len(time0):
         raise ValueError("the first dimension of input data must have the same size with time")
@@ -119,14 +118,10 @@ def mask_model_ps(timem, psm, legnum, campaign, shipmetpath):
     datamask : mask flag of large Ps difference
 
     """
-    
-    import sys
-    sys.path.insert(1, '../subroutines/')
-    
     import glob
-    from read_ship import read_marmet
-    from read_ARMdata import read_met
-    from time_format_change import yyyymmdd2cday,  cday2mmdd
+    from ..subroutines.read_ship import read_marmet
+    from ..subroutines.read_ARMdata import read_met
+    from ..subroutines.time_format_change import yyyymmdd2cday,  cday2mmdd
     
     if campaign == 'MAGIC':
         filenameo = shipmetpath + 'marmet' + legnum + '.txt'
diff --git a/python/subroutines/time_format_change.py b/src/esmac_diags/subroutines/time_format_change.py
similarity index 100%
rename from python/subroutines/time_format_change.py
rename to src/esmac_diags/subroutines/time_format_change.py
diff --git a/tests/test_package_imports.py b/tests/test_package_imports.py
new file mode 100644
index 0000000..92981a5
--- /dev/null
+++ b/tests/test_package_imports.py
@@ -0,0 +1,7 @@
+import pytest
+
+def test_package_imports():
+    """ Test whether the package imports"""
+    import esmac_diags
+
+