diff --git a/.github/workflows/lint_check.yml b/.github/workflows/lint_check.yml
new file mode 100644
index 0000000..b509f84
--- /dev/null
+++ b/.github/workflows/lint_check.yml
@@ -0,0 +1,37 @@
+name: lint-check
+
+on:
+  push:
+  pull_request:
+    branches:
+      - "main"
+
+jobs:
+  # lint check can be auto-executed by the workflow
+  lint-check:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: lint-flake8
+      run: |
+        pip install flake8==v3.8.4
+        FLAKE_DISABLE_LIST="F403,F405,W504,W503,E203"
+        flake8 --max-line-length=120 --ignore=$FLAKE_DISABLE_LIST ./examples/*
+
+    - name: lint-isort
+      run: |
+        pip install isort==5.12.0
+        isort --check --profile=black ./examples/*
+
+    - name: lint-black
+      run: |
+        pip install black==22.8.0
+        BLACK_EXCLUDE_SETTINGS='\.venv/|\.local/|\.cache/|\.git/'
+        black --line-length=120 --check --exclude $BLACK_EXCLUDE_SETTINGS ./examples/*
+
+    - name: lint-pylint
+      run: |
+        pip install pylint==v2.17.2
+        PYLINT_DISABLE_LIST="C0114,C0415,W0212,W0235,W0238,W0621,C0103,R1735,C2801,E0402,C0412,W0719,R1728,W1514,W0718,W0105,W0707,C0209,W0703,W1203"
+        pylint --rcfile .pylintrc --disable=$PYLINT_DISABLE_LIST ./examples/*
diff --git a/.pylintrc b/.pylintrc
new file mode 100644
index 0000000..73cb3ae
--- /dev/null
+++ b/.pylintrc
@@ -0,0 +1,428 @@
+# This Pylint rcfile contains a best-effort configuration to uphold the
+# best-practices and style described in the Google Python style guide:
+#   https://google.github.io/styleguide/pyguide.html
+#
+# Its canonical open-source location is:
+#   https://google.github.io/styleguide/pylintrc
+
+[MASTER]
+
+# Files or directories to be skipped. They should be base names, not paths.
+ignore=third_party,storage
+
+# Files or directories matching the regex patterns are skipped. The regex
+# matches against base names, not paths.
+ignore-patterns=
+
+# Pickle collected data for later comparisons.
+persistent=no
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+# Use multiple processes to speed up Pylint.
+jobs=4
+
+# Allow loading of arbitrary C extensions. Extensions are imported into the
+# active Python interpreter and may run arbitrary code.
+unsafe-load-any-extension=no
+
+
+[MESSAGES CONTROL]
+
+# Only show warnings with the listed confidence levels. Leave empty to show
+# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED
+confidence=
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once). See also the "--disable" option for examples.
+#enable=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifiers separated by comma (,) or put this
+# option multiple times (only on the command line, not in the configuration
+# file where it should appear only once).You can also use "--disable=all" to
+# disable everything first and then reenable specific checks. For example, if
+# you want to run only the similarities checker, you can use "--disable=all
+# --enable=similarities". If you want to run only the classes checker, but have
+# no Warning level messages displayed, use"--disable=all --enable=classes
+# --disable=W"
+disable=abstract-method,
+        apply-builtin,
+        arguments-differ,
+        attribute-defined-outside-init,
+        backtick,
+        bad-option-value,
+        basestring-builtin,
+        buffer-builtin,
+        c-extension-no-member,
+        consider-using-enumerate,
+        cmp-builtin,
+        cmp-method,
+        coerce-builtin,
+        coerce-method,
+        delslice-method,
+        div-method,
+        duplicate-code,
+        eq-without-hash,
+        execfile-builtin,
+        file-builtin,
+        filter-builtin-not-iterating,
+        fixme,
+        getslice-method,
+        global-statement,
+        hex-method,
+        idiv-method,
+        implicit-str-concat,
+        import-error,
+        import-self,
+        import-star-module-level,
+        inconsistent-return-statements,
+        input-builtin,
+        intern-builtin,
+        invalid-str-codec,
+        locally-disabled,
+        long-builtin,
+        long-suffix,
+        map-builtin-not-iterating,
+        misplaced-comparison-constant,
+        missing-function-docstring,
+        metaclass-assignment,
+        next-method-called,
+        next-method-defined,
+        no-absolute-import,
+        no-else-break,
+        no-else-continue,
+        no-else-raise,
+        no-else-return,
+        no-init,  # added
+        no-member,
+        no-name-in-module,
+        no-self-use,
+        nonzero-method,
+        oct-method,
+        old-division,
+        old-ne-operator,
+        old-octal-literal,
+        old-raise-syntax,
+        parameter-unpacking,
+        print-statement,
+        raising-string,
+        range-builtin-not-iterating,
+        raw_input-builtin,
+        rdiv-method,
+        reduce-builtin,
+        relative-import,
+        reload-builtin,
+        round-builtin,
+        setslice-method,
+        signature-differs,
+        standarderror-builtin,
+        suppressed-message,
+        sys-max-int,
+        too-few-public-methods,
+        too-many-ancestors,
+        too-many-arguments,
+        too-many-boolean-expressions,
+        too-many-branches,
+        too-many-instance-attributes,
+        too-many-locals,
+        too-many-nested-blocks,
+        too-many-public-methods,
+        too-many-return-statements,
+        too-many-statements,
+        trailing-newlines,
+        unichr-builtin,
+        unicode-builtin,
+        unnecessary-pass,
+        unpacking-in-except,
+        useless-else-on-loop,
+        useless-object-inheritance,
+        useless-suppression,
+        using-cmp-argument,
+        wrong-import-order,
+        xrange-builtin,
+        zip-builtin-not-iterating,
+
+
+[REPORTS]
+
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html. You can also give a reporter class, eg
+# mypackage.mymodule.MyReporterClass.
+output-format=colorized
+
+# Tells whether to display a full report or only the messages
+reports=no
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Template used to display messages. This is a python new-style format string
+# used to format the message information. See doc for all details
+#msg-template=
+
+
+[BASIC]
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=main,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=
+
+# Colon-delimited sets of names that determine each other's naming style when
+# the name regexes allow several styles.
+name-group=
+
+# Include a hint for the correct naming format with invalid-name
+include-naming-hint=no
+
+# List of decorators that produce properties, such as abc.abstractproperty. Add
+# to this list to register other decorators that produce valid properties.
+property-classes=abc.abstractproperty,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl
+
+# Regular expression matching correct function names
+function-rgx=^(?:(?P<exempt>setUp|tearDown|setUpModule|tearDownModule)|(?P<camel_case>_?[A-Z][a-zA-Z0-9]*)|(?P<snake_case>_?[a-z][a-z0-9_]*))$
+
+# Regular expression matching correct variable names
+variable-rgx=^[a-z][a-z0-9_]*$
+
+# Regular expression matching correct constant names
+const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
+
+# Regular expression matching correct attribute names
+attr-rgx=^_{0,2}[a-z][a-z0-9_]*$
+
+# Regular expression matching correct argument names
+argument-rgx=^[a-z][a-z0-9_]*$
+
+# Regular expression matching correct class attribute names
+class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$
+
+# Regular expression matching correct inline iteration names
+inlinevar-rgx=^[a-z][a-z0-9_]*$
+
+# Regular expression matching correct class names
+class-rgx=^_?[A-Z][a-zA-Z0-9]*$
+
+# Regular expression matching correct module names
+module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$
+
+# Regular expression matching correct method names
+method-rgx=(?x)^(?:(?P<exempt>_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P<camel_case>_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P<snake_case>_{0,2}[a-z][a-z0-9_]*))$
+
+# Regular expression which should only match function or class names that do
+# not require a docstring.
+no-docstring-rgx=(__.*__|main|test.*|.*test|.*Test)$
+
+# Minimum line length for functions/classes that require docstrings, shorter
+# ones are exempt.
+docstring-min-length=10
+
+
+[TYPECHECK]
+
+# List of decorators that produce context managers, such as
+# contextlib.contextmanager. Add to this list to register other decorators that
+# produce valid context managers.
+contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# List of module names for which member attributes should not be checked
+# (useful for modules/projects where namespaces are manipulated during runtime
+# and thus existing member attributes cannot be deduced by static analysis. It
+# supports qualified module names, as well as Unix pattern matching.
+ignored-modules=
+
+# List of class names for which member attributes should not be checked (useful
+# for classes with dynamically set attributes). This supports the use of
+# qualified names.
+ignored-classes=optparse.Values,thread._local,_thread._local
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E1101 when accessed. Python regular
+# expressions are accepted.
+generated-members=
+
+
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=120
+
+# TODO(https://github.com/PyCQA/pylint/issues/3352): Direct pylint to exempt
+# lines made too long by directives to pytype.
+
+# Regexp for a line that is allowed to be longer than the limit.
+ignore-long-lines=(?x)(
+  ^\s*(\#\ )?<?https?://\S+>?$|
+  ^\s*(from\s+\S+\s+)?import\s+.+$)
+
+# Allow the body of an if to be on the same line as the test if there is no
+# else.
+single-line-if-stmt=yes
+
+# Maximum number of lines in a module
+max-module-lines=99999
+
+# String used as indentation unit.  The internal Google style guide mandates 2
+# spaces.  Google's externaly-published style guide says 4, consistent with
+# PEP 8.  Here, we use 2 spaces, for conformity with many open-sourced Google
+# projects (like TensorFlow).
+indent-string='    '
+
+# Number of spaces of indent required inside a hanging  or continued line.
+indent-after-paren=4
+
+# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
+expected-line-ending-format=
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=TODO
+
+
+[STRING]
+
+# This flag controls whether inconsistent-quotes generates a warning when the
+# character used as a quote delimiter is used inconsistently within a module.
+check-quote-consistency=yes
+
+
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching the name of dummy variables (i.e. expectedly
+# not used).
+dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_)
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+# List of strings which can identify a callback function by name. A callback
+# name must start or end with one of those strings.
+callbacks=cb_,_cb
+
+# List of qualified module names which can have objects that can redefine
+# builtins.
+redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools
+
+
+[LOGGING]
+
+# Logging modules to check that the string format arguments are in logging
+# function parameter format
+logging-modules=logging,absl.logging,tensorflow.io.logging
+
+
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+# Ignore imports when computing similarities.
+ignore-imports=no
+
+
+[SPELLING]
+
+# Spelling dictionary name. Available dictionaries: none. To make it working
+# install python-enchant package.
+spelling-dict=
+
+# List of comma separated words that should not be checked.
+spelling-ignore-words=
+
+# A path to a file that contains private dictionary; one word per line.
+spelling-private-dict-file=
+
+# Tells whether to store unknown words to indicated private dictionary in
+# --spelling-private-dict-file option instead of raising a message.
+spelling-store-unknown-words=no
+
+
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,
+                   TERMIOS,
+                   Bastion,
+                   rexec,
+                   sets
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+int-import-graph=
+
+# Force import order to recognize a module as part of the standard
+# compatibility libraries.
+known-standard-library=
+
+# Force import order to recognize a module as part of a third party library.
+known-third-party=enchant, absl
+
+# Analyse import fallback blocks. This can be used to support both Python 2 and
+# 3 compatible code, which means that the block might have code that exists
+# only in one or another interpreter, leading to false positives when analysed.
+analyse-fallback-blocks=no
+
+
+[CLASSES]
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,
+                      __new__,
+                      setUp
+
+# List of member names, which should be excluded from the protected access
+# warning.
+exclude-protected=_asdict,
+                  _fields,
+                  _replace,
+                  _source,
+                  _make
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls,
+                            class_
+
+# List of valid names for the first argument in a metaclass class method.
+valid-metaclass-classmethod-first-arg=mcs
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "Exception"
+overgeneral-exceptions=builtins.BaseException,
+                       builtins.Exception
diff --git a/examples/DiscoResearch/__init__.py b/examples/DiscoResearch/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/DiscoResearch/mixtral_7b_8expert/config.py b/examples/DiscoResearch/mixtral_7b_8expert/config.py
index 544a817..3b9dcc1 100644
--- a/examples/DiscoResearch/mixtral_7b_8expert/config.py
+++ b/examples/DiscoResearch/mixtral_7b_8expert/config.py
@@ -22,7 +22,8 @@
     # 'load_ckpt_info' setting guide:
     # 1. the 'path' indicate ckpt path,
     # 2. the 'content‘ means what states will be loaded, support: "model", "sampler", "optimizer", "scheduler", "all"
-    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama", "hf_model".
+    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama",
+    #    and "hf_model".
     load_ckpt_info=dict(path=MODEL_ONLY_FOLDER, content=("model",), ckpt_type="hf_model"),
     # 'auto_resume' is designed to automatically load the latest checkpoint from 'save_ckpt_folder' when encountering
     # training interruptions/hangs caused by hardware failures, using a scheduling system (such as k8s/slurm)
diff --git a/examples/DiscoResearch/mixtral_7b_8expert/train.py b/examples/DiscoResearch/mixtral_7b_8expert/train.py
index 8411f19..b6e1c77 100644
--- a/examples/DiscoResearch/mixtral_7b_8expert/train.py
+++ b/examples/DiscoResearch/mixtral_7b_8expert/train.py
@@ -8,13 +8,17 @@
     build_valid_loader_with_data_type,
 )
 from internlm.initialize import initialize_distributed_env
+from internlm.model.registry import hf_config_initializer, model_initializer
 from internlm.monitor import internevo_monitor
 from internlm.train import initialize_model
 from internlm.utils.common import parse_args
-from internlm.model.registry import model_initializer, hf_config_initializer
 
-from huggingface_model.DiscoResearch.mixtral_7b_8expert.modeling_moe_mistral import MixtralForCausalLM
-from huggingface_model.DiscoResearch.mixtral_7b_8expert.configuration_moe_mistral import MixtralConfig
+from huggingface_model.DiscoResearch.mixtral_7b_8expert.configuration_moe_mistral import (
+    MixtralConfig,
+)
+from huggingface_model.DiscoResearch.mixtral_7b_8expert.modeling_moe_mistral import (
+    MixtralForCausalLM,
+)
 
 
 @internevo_monitor(feishu_alert=True, clean_run=True)
diff --git a/examples/Qwen/Qwen2_7B/config.py b/examples/Qwen/Qwen2_7B/config.py
index 04d3770..b691090 100644
--- a/examples/Qwen/Qwen2_7B/config.py
+++ b/examples/Qwen/Qwen2_7B/config.py
@@ -22,7 +22,8 @@
     # 'load_ckpt_info' setting guide:
     # 1. the 'path' indicate ckpt path,
     # 2. the 'content‘ means what states will be loaded, support: "model", "sampler", "optimizer", "scheduler", "all"
-    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama", "hf_model".
+    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama",
+    #    and "hf_model".
     load_ckpt_info=dict(path=MODEL_ONLY_FOLDER, content=("model",), ckpt_type="hf_model"),
     # 'auto_resume' is designed to automatically load the latest checkpoint from 'save_ckpt_folder' when encountering
     # training interruptions/hangs caused by hardware failures, using a scheduling system (such as k8s/slurm)
diff --git a/examples/Qwen/Qwen2_7B/train.py b/examples/Qwen/Qwen2_7B/train.py
index 2645e00..20e4c61 100644
--- a/examples/Qwen/Qwen2_7B/train.py
+++ b/examples/Qwen/Qwen2_7B/train.py
@@ -8,13 +8,13 @@
     build_valid_loader_with_data_type,
 )
 from internlm.initialize import initialize_distributed_env
+from internlm.model.registry import hf_config_initializer, model_initializer
 from internlm.monitor import internevo_monitor
 from internlm.train import initialize_model
 from internlm.utils.common import parse_args
-from internlm.model.registry import model_initializer, hf_config_initializer
 
-from huggingface_model.Qwen.Qwen2_7B.modeling_qwen2 import Qwen2ForCausalLM
 from huggingface_model.Qwen.Qwen2_7B.configuration_qwen2 import Qwen2Config
+from huggingface_model.Qwen.Qwen2_7B.modeling_qwen2 import Qwen2ForCausalLM
 
 
 @internevo_monitor(feishu_alert=True, clean_run=True)
diff --git a/examples/Qwen/__init__.py b/examples/Qwen/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/baichuan_inc/Baichuan2_7B_Base/config.py b/examples/baichuan_inc/Baichuan2_7B_Base/config.py
index 5dd57ba..437c253 100644
--- a/examples/baichuan_inc/Baichuan2_7B_Base/config.py
+++ b/examples/baichuan_inc/Baichuan2_7B_Base/config.py
@@ -22,7 +22,8 @@
     # 'load_ckpt_info' setting guide:
     # 1. the 'path' indicate ckpt path,
     # 2. the 'content‘ means what states will be loaded, support: "model", "sampler", "optimizer", "scheduler", "all"
-    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama", "hf_model".
+    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama",
+    #    and "hf_model".
     load_ckpt_info=dict(path=MODEL_ONLY_FOLDER, content=("model",), ckpt_type="hf_model"),
     # 'auto_resume' is designed to automatically load the latest checkpoint from 'save_ckpt_folder' when encountering
     # training interruptions/hangs caused by hardware failures, using a scheduling system (such as k8s/slurm)
diff --git a/examples/baichuan_inc/Baichuan2_7B_Base/train.py b/examples/baichuan_inc/Baichuan2_7B_Base/train.py
index fde8ed7..22ccda0 100644
--- a/examples/baichuan_inc/Baichuan2_7B_Base/train.py
+++ b/examples/baichuan_inc/Baichuan2_7B_Base/train.py
@@ -8,13 +8,17 @@
     build_valid_loader_with_data_type,
 )
 from internlm.initialize import initialize_distributed_env
+from internlm.model.registry import hf_config_initializer, model_initializer
 from internlm.monitor import internevo_monitor
 from internlm.train import initialize_model
 from internlm.utils.common import parse_args
-from internlm.model.registry import model_initializer, hf_config_initializer
 
-from huggingface_model.baichuan_inc.Baichuan2_7B_Base.modeling_baichuan import BaichuanForCausalLM
-from huggingface_model.baichuan_inc.Baichuan2_7B_Base.configuration_baichuan import BaichuanConfig
+from huggingface_model.baichuan_inc.Baichuan2_7B_Base.configuration_baichuan import (
+    BaichuanConfig,
+)
+from huggingface_model.baichuan_inc.Baichuan2_7B_Base.modeling_baichuan import (
+    BaichuanForCausalLM,
+)
 
 
 @internevo_monitor(feishu_alert=True, clean_run=True)
diff --git a/examples/baichuan_inc/__init__.py b/examples/baichuan_inc/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/deepseek_ai/DeepSeek_V2/config.py b/examples/deepseek_ai/DeepSeek_V2/config.py
index 9639603..8ac137a 100644
--- a/examples/deepseek_ai/DeepSeek_V2/config.py
+++ b/examples/deepseek_ai/DeepSeek_V2/config.py
@@ -22,7 +22,8 @@
     # 'load_ckpt_info' setting guide:
     # 1. the 'path' indicate ckpt path,
     # 2. the 'content‘ means what states will be loaded, support: "model", "sampler", "optimizer", "scheduler", "all"
-    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama", "hf_model".
+    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama",
+    #    and "hf_model".
     load_ckpt_info=dict(path=MODEL_ONLY_FOLDER, content=("model",), ckpt_type="hf_model"),
     # 'auto_resume' is designed to automatically load the latest checkpoint from 'save_ckpt_folder' when encountering
     # training interruptions/hangs caused by hardware failures, using a scheduling system (such as k8s/slurm)
diff --git a/examples/deepseek_ai/DeepSeek_V2/train.py b/examples/deepseek_ai/DeepSeek_V2/train.py
index 32a5d4b..26f7b01 100644
--- a/examples/deepseek_ai/DeepSeek_V2/train.py
+++ b/examples/deepseek_ai/DeepSeek_V2/train.py
@@ -8,13 +8,17 @@
     build_valid_loader_with_data_type,
 )
 from internlm.initialize import initialize_distributed_env
+from internlm.model.registry import hf_config_initializer, model_initializer
 from internlm.monitor import internevo_monitor
 from internlm.train import initialize_model
 from internlm.utils.common import parse_args
-from internlm.model.registry import model_initializer, hf_config_initializer
 
-from huggingface_model.deepseek_ai.DeepSeek_V2.modeling_deepseek import DeepseekV2ForCausalLM
-from huggingface_model.deepseek_ai.DeepSeek_V2.configuration_deepseek import DeepseekV2Config
+from huggingface_model.deepseek_ai.DeepSeek_V2.configuration_deepseek import (
+    DeepseekV2Config,
+)
+from huggingface_model.deepseek_ai.DeepSeek_V2.modeling_deepseek import (
+    DeepseekV2ForCausalLM,
+)
 
 
 @internevo_monitor(feishu_alert=True, clean_run=True)
diff --git a/examples/deepseek_ai/__init__.py b/examples/deepseek_ai/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/internlm/__init__.py b/examples/internlm/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/internlm/internlm2_7b/config.py b/examples/internlm/internlm2_7b/config.py
index 0c65e6e..3043e84 100644
--- a/examples/internlm/internlm2_7b/config.py
+++ b/examples/internlm/internlm2_7b/config.py
@@ -22,7 +22,8 @@
     # 'load_ckpt_info' setting guide:
     # 1. the 'path' indicate ckpt path,
     # 2. the 'content‘ means what states will be loaded, support: "model", "sampler", "optimizer", "scheduler", "all"
-    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama", "hf_model".
+    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama",
+    #    and "hf_model".
     load_ckpt_info=dict(path=MODEL_ONLY_FOLDER, content=("model",), ckpt_type="hf_model"),
     # 'auto_resume' is designed to automatically load the latest checkpoint from 'save_ckpt_folder' when encountering
     # training interruptions/hangs caused by hardware failures, using a scheduling system (such as k8s/slurm)
diff --git a/examples/internlm/internlm2_7b/train.py b/examples/internlm/internlm2_7b/train.py
index a7f9540..7478716 100644
--- a/examples/internlm/internlm2_7b/train.py
+++ b/examples/internlm/internlm2_7b/train.py
@@ -8,13 +8,17 @@
     build_valid_loader_with_data_type,
 )
 from internlm.initialize import initialize_distributed_env
+from internlm.model.registry import hf_config_initializer, model_initializer
 from internlm.monitor import internevo_monitor
 from internlm.train import initialize_model
 from internlm.utils.common import parse_args
-from internlm.model.registry import model_initializer, hf_config_initializer
 
-from huggingface_model.internlm.internlm2_7b.modeling_internlm2 import InternLM2ForCausalLM
-from huggingface_model.internlm.internlm2_7b.configuration_internlm2 import InternLM2Config
+from huggingface_model.internlm.internlm2_7b.configuration_internlm2 import (
+    InternLM2Config,
+)
+from huggingface_model.internlm.internlm2_7b.modeling_internlm2 import (
+    InternLM2ForCausalLM,
+)
 
 
 @internevo_monitor(feishu_alert=True, clean_run=True)
diff --git a/examples/internlm/internlm_7b/config.py b/examples/internlm/internlm_7b/config.py
index f3e2c9a..e85b8de 100644
--- a/examples/internlm/internlm_7b/config.py
+++ b/examples/internlm/internlm_7b/config.py
@@ -22,7 +22,8 @@
     # 'load_ckpt_info' setting guide:
     # 1. the 'path' indicate ckpt path,
     # 2. the 'content‘ means what states will be loaded, support: "model", "sampler", "optimizer", "scheduler", "all"
-    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama", "hf_model".
+    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama",
+    #    and "hf_model".
     load_ckpt_info=dict(path=MODEL_ONLY_FOLDER, content=("model",), ckpt_type="hf_model"),
     # 'auto_resume' is designed to automatically load the latest checkpoint from 'save_ckpt_folder' when encountering
     # training interruptions/hangs caused by hardware failures, using a scheduling system (such as k8s/slurm)
diff --git a/examples/internlm/internlm_7b/train.py b/examples/internlm/internlm_7b/train.py
index 575042b..9611d79 100644
--- a/examples/internlm/internlm_7b/train.py
+++ b/examples/internlm/internlm_7b/train.py
@@ -8,13 +8,13 @@
     build_valid_loader_with_data_type,
 )
 from internlm.initialize import initialize_distributed_env
+from internlm.model.registry import hf_config_initializer, model_initializer
 from internlm.monitor import internevo_monitor
 from internlm.train import initialize_model
 from internlm.utils.common import parse_args
-from internlm.model.registry import model_initializer, hf_config_initializer
 
-from huggingface_model.internlm.internlm_7b.modeling_internlm import InternLMForCausalLM
 from huggingface_model.internlm.internlm_7b.configuration_internlm import InternLMConfig
+from huggingface_model.internlm.internlm_7b.modeling_internlm import InternLMForCausalLM
 
 
 @internevo_monitor(feishu_alert=True, clean_run=True)
diff --git a/examples/microsoft/Phi_3_mini_4k_instruct/config.py b/examples/microsoft/Phi_3_mini_4k_instruct/config.py
index 87eebf8..23f94d9 100644
--- a/examples/microsoft/Phi_3_mini_4k_instruct/config.py
+++ b/examples/microsoft/Phi_3_mini_4k_instruct/config.py
@@ -22,7 +22,8 @@
     # 'load_ckpt_info' setting guide:
     # 1. the 'path' indicate ckpt path,
     # 2. the 'content‘ means what states will be loaded, support: "model", "sampler", "optimizer", "scheduler", "all"
-    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama", "hf_model".
+    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama",
+    #    and "hf_model".
     load_ckpt_info=dict(path=MODEL_ONLY_FOLDER, content=("model",), ckpt_type="hf_model"),
     # 'auto_resume' is designed to automatically load the latest checkpoint from 'save_ckpt_folder' when encountering
     # training interruptions/hangs caused by hardware failures, using a scheduling system (such as k8s/slurm)
diff --git a/examples/microsoft/Phi_3_mini_4k_instruct/train.py b/examples/microsoft/Phi_3_mini_4k_instruct/train.py
index 08e7648..ddc2965 100644
--- a/examples/microsoft/Phi_3_mini_4k_instruct/train.py
+++ b/examples/microsoft/Phi_3_mini_4k_instruct/train.py
@@ -8,13 +8,17 @@
     build_valid_loader_with_data_type,
 )
 from internlm.initialize import initialize_distributed_env
+from internlm.model.registry import hf_config_initializer, model_initializer
 from internlm.monitor import internevo_monitor
 from internlm.train import initialize_model
 from internlm.utils.common import parse_args
-from internlm.model.registry import model_initializer, hf_config_initializer
 
-from huggingface_model.microsoft.Phi_3_mini_4k_instruct.modeling_phi3 import Phi3ForCausalLM
-from huggingface_model.microsoft.Phi_3_mini_4k_instruct.configuration_phi3 import Phi3Config
+from huggingface_model.microsoft.Phi_3_mini_4k_instruct.configuration_phi3 import (
+    Phi3Config,
+)
+from huggingface_model.microsoft.Phi_3_mini_4k_instruct.modeling_phi3 import (
+    Phi3ForCausalLM,
+)
 
 
 @internevo_monitor(feishu_alert=True, clean_run=True)
diff --git a/examples/microsoft/__init__.py b/examples/microsoft/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/yi/__init__.py b/examples/yi/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/yi/yi_6B/config.py b/examples/yi/yi_6B/config.py
index 9f2f151..d4bd9b4 100644
--- a/examples/yi/yi_6B/config.py
+++ b/examples/yi/yi_6B/config.py
@@ -22,7 +22,8 @@
     # 'load_ckpt_info' setting guide:
     # 1. the 'path' indicate ckpt path,
     # 2. the 'content‘ means what states will be loaded, support: "model", "sampler", "optimizer", "scheduler", "all"
-    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama", "hf_model".
+    # 3. the ’ckpt_type‘ means the type of checkpoint to be loaded, support: "internevo", "llama", "hf_llama",
+    #    and "hf_model".
     load_ckpt_info=dict(path=MODEL_ONLY_FOLDER, content=("model",), ckpt_type="hf_model"),
     # 'auto_resume' is designed to automatically load the latest checkpoint from 'save_ckpt_folder' when encountering
     # training interruptions/hangs caused by hardware failures, using a scheduling system (such as k8s/slurm)
diff --git a/examples/yi/yi_6B/train.py b/examples/yi/yi_6B/train.py
index 40ce4da..88d7c6d 100644
--- a/examples/yi/yi_6B/train.py
+++ b/examples/yi/yi_6B/train.py
@@ -8,13 +8,13 @@
     build_valid_loader_with_data_type,
 )
 from internlm.initialize import initialize_distributed_env
+from internlm.model.registry import hf_config_initializer, model_initializer
 from internlm.monitor import internevo_monitor
 from internlm.train import initialize_model
 from internlm.utils.common import parse_args
-from internlm.model.registry import model_initializer, hf_config_initializer
 
-from huggingface_model.yi.yi_6B.modeling_yi import YiForCausalLM
 from huggingface_model.yi.yi_6B.configuration_yi import YiConfig
+from huggingface_model.yi.yi_6B.modeling_yi import YiForCausalLM
 
 
 @internevo_monitor(feishu_alert=True, clean_run=True)