ci: make ci happy lint the code, delete unused imports

Signed-off-by: yihong0618 <[email protected]>
eosphoros-ai · May 24, 2023 · b098a48 · b098a48
1 parent 562d5a9
commit b098a48
Show file tree

Hide file tree

Showing 75 changed files with 1,108 additions and 822 deletions.
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -1,6 +1,15 @@
 name: Pylint
 
-on: [push]
+on:
+  push:
+    branches: [ make_ci_happy ]
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.event.number || github.run_id }}
+  cancel-in-progress: true
 
 jobs:
   build:
@@ -17,7 +26,7 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install pylint
-    - name: Analysing the code with pylint
+        pip install -U black isort
+    - name: check the code lint
       run: |
-        pylint $(git ls-files '*.py')
+        black . --check
diff --git a/examples/app.py b/examples/app.py
@@ -2,52 +2,63 @@
 # -*- coding:utf-8 -*-
 
 import gradio as gr
-from langchain.agents import (
-    load_tools,
-    initialize_agent,
-    AgentType
-)
-from pilot.model.vicuna_llm import VicunaRequestLLM, VicunaEmbeddingLLM
-from llama_index import LLMPredictor, LangchainEmbedding, ServiceContext
+from langchain.agents import AgentType, initialize_agent, load_tools
 from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-from llama_index import Document, GPTSimpleVectorIndex
+from llama_index import (
+    Document,
+    GPTSimpleVectorIndex,
+    LangchainEmbedding,
+    LLMPredictor,
+    ServiceContext,
+)
+
+from pilot.model.vicuna_llm import VicunaEmbeddingLLM, VicunaRequestLLM
+
 
 def agent_demo():
     llm = VicunaRequestLLM()
 
-    tools = load_tools(['python_repl'], llm=llm)
-    agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
-    agent.run(
-        "Write a SQL script that Query 'select count(1)!'"
+    tools = load_tools(["python_repl"], llm=llm)
+    agent = initialize_agent(
+        tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True
     )
+    agent.run("Write a SQL script that Query 'select count(1)!'")
+
 
 def knowledged_qa_demo(text_list):
     llm_predictor = LLMPredictor(llm=VicunaRequestLLM())
     hfemb = VicunaEmbeddingLLM()
     embed_model = LangchainEmbedding(hfemb)
     documents = [Document(t) for t in text_list]
 
-    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
-    index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context) 
+    service_context = ServiceContext.from_defaults(
+        llm_predictor=llm_predictor, embed_model=embed_model
+    )
+    index = GPTSimpleVectorIndex.from_documents(
+        documents, service_context=service_context
+    )
     return index
 
 
 def get_answer(q):
-    base_knowledge = """ """ 
+    base_knowledge = """ """
     text_list = [base_knowledge]
     index = knowledged_qa_demo(text_list)
     response = index.query(q)
     return response.response
 
+
 def get_similar(q):
     from pilot.vector_store.extract_tovec import knownledge_tovec, knownledge_tovec_st
+
     docsearch = knownledge_tovec_st("./datasets/plan.md")
     docs = docsearch.similarity_search_with_score(q, k=1)
 
     for doc in docs:
-        dc, s = doc 
+        dc, s = doc
         print(s)
-        yield dc.page_content 
+        yield dc.page_content
+
 
 if __name__ == "__main__":
     # agent_demo()
@@ -58,8 +69,7 @@ def get_similar(q):
             text_input = gr.TextArea()
             text_output = gr.TextArea()
             text_button = gr.Button()
-        
+
         text_button.click(get_similar, inputs=text_input, outputs=text_output)
 
     demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
-
diff --git a/examples/embdserver.py b/examples/embdserver.py
@@ -1,30 +1,29 @@
 #!/usr/bin/env python3
 # -*- coding:utf-8 -*-
 
-import requests
 import json
-import time
-import uuid
 import os
 import sys
 from urllib.parse import urljoin
+
 import gradio as gr
+import requests
 
 ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.append(ROOT_PATH)
 
 
-from pilot.configs.config import Config
-from pilot.conversation import conv_qa_prompt_template, conv_templates
 from langchain.prompts import PromptTemplate
 
+from pilot.configs.config import Config
+from pilot.conversation import conv_qa_prompt_template, conv_templates
 
 llmstream_stream_path = "generate_stream"
 
 CFG = Config()
 
-def generate(query):
 
+def generate(query):
     template_name = "conv_one_shot"
     state = conv_templates[template_name].copy()
 
@@ -47,7 +46,7 @@ def generate(query):
         "prompt": prompt,
         "temperature": 1.0,
         "max_new_tokens": 1024,
-        "stop": "###"
+        "stop": "###",
     }
 
     response = requests.post(
@@ -57,19 +56,18 @@ def generate(query):
     skip_echo_len = len(params["prompt"]) + 1 - params["prompt"].count("</s>") * 3
 
     for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):
-
         if chunk:
             data = json.loads(chunk.decode())
             if data["error_code"] == 0:
-
                 if "vicuna" in CFG.LLM_MODEL:
                     output = data["text"][skip_echo_len:].strip()
                 else:
                     output = data["text"].strip()
 
                 state.messages[-1][-1] = output + "▌"
-                yield(output) 
-
+                yield (output)
+
+
 if __name__ == "__main__":
     print(CFG.LLM_MODEL)
     with gr.Blocks() as demo:
@@ -78,10 +76,7 @@ def generate(query):
             text_input = gr.TextArea()
             text_output = gr.TextArea()
             text_button = gr.Button("提交")
-
 
         text_button.click(generate, inputs=text_input, outputs=text_output)
 
-    demo.queue(concurrency_count=3).launch(server_name="0.0.0.0") 
-
-
+    demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
diff --git a/examples/gpt_index.py b/examples/gpt_index.py
@@ -1,19 +1,19 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-import os
 import logging
 import sys
 
-from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex
+from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader
+
 logging.basicConfig(stream=sys.stdout, level=logging.INFO)
 logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
 
 # read the document of data dir
 documents = SimpleDirectoryReader("data").load_data()
-# split the document to chunk, max token size=500, convert chunk to vector 
+# split the document to chunk, max token size=500, convert chunk to vector
 
 index = GPTSimpleVectorIndex(documents)
 
 # save index
-index.save_to_disk("index.json")
+index.save_to_disk("index.json")
diff --git a/examples/gradio_test.py b/examples/gradio_test.py
@@ -3,17 +3,19 @@
 
 import gradio as gr
 
+
 def change_tab():
     return gr.Tabs.update(selected=1)
 
+
 with gr.Blocks() as demo:
     with gr.Tabs() as tabs:
         with gr.TabItem("Train", id=0):
             t = gr.Textbox()
         with gr.TabItem("Inference", id=1):
             i = gr.Image()
-    
+
     btn = gr.Button()
     btn.click(change_tab, None, tabs)
 
-demo.launch()
+demo.launch()
diff --git a/examples/knowledge_embedding/csv_embedding_test.py b/examples/knowledge_embedding/csv_embedding_test.py
@@ -1,5 +1,3 @@
-
-
 from pilot.source_embedding.csv_embedding import CSVEmbedding
 
 # path = "/Users/chenketing/Downloads/share_ireserve双写数据异常2.xlsx"
@@ -8,6 +6,13 @@
 vector_store_path = "your_path/"
 
 
-pdf_embedding = CSVEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "url", "vector_store_path": "vector_store_path"})
+pdf_embedding = CSVEmbedding(
+    file_path=path,
+    model_name=model_name,
+    vector_store_config={
+        "vector_store_name": "url",
+        "vector_store_path": "vector_store_path",
+    },
+)
 pdf_embedding.source_embedding()
-print("success")
+print("success")
diff --git a/examples/knowledge_embedding/pdf_embedding_test.py b/examples/knowledge_embedding/pdf_embedding_test.py
@@ -6,6 +6,13 @@
 vector_store_path = "your_path/"
 
 
-pdf_embedding = PDFEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "ob-pdf", "vector_store_path": vector_store_path})
+pdf_embedding = PDFEmbedding(
+    file_path=path,
+    model_name=model_name,
+    vector_store_config={
+        "vector_store_name": "ob-pdf",
+        "vector_store_path": vector_store_path,
+    },
+)
 pdf_embedding.source_embedding()
-print("success")
+print("success")
diff --git a/examples/knowledge_embedding/url_embedding_test.py b/examples/knowledge_embedding/url_embedding_test.py
@@ -5,6 +5,13 @@
 vector_store_path = "your_path"
 
 
-pdf_embedding = URLEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "url", "vector_store_path": "vector_store_path"})
+pdf_embedding = URLEmbedding(
+    file_path=path,
+    model_name=model_name,
+    vector_store_config={
+        "vector_store_name": "url",
+        "vector_store_path": "vector_store_path",
+    },
+)
 pdf_embedding.source_embedding()
-print("success")
+print("success")
diff --git a/examples/t5_example.py b/examples/t5_example.py
@@ -1,19 +1,28 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper
-from langchain.embeddings.huggingface import HuggingFaceEmbeddings
-from llama_index import LLMPredictor
 import torch
+from langchain.embeddings.huggingface import HuggingFaceEmbeddings
 from langchain.llms.base import LLM
+from llama_index import (
+    GPTListIndex,
+    GPTSimpleVectorIndex,
+    LangchainEmbedding,
+    LLMPredictor,
+    PromptHelper,
+    SimpleDirectoryReader,
+)
 from transformers import pipeline
 
 
 class FlanLLM(LLM):
     model_name = "google/flan-t5-large"
-    pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={
-        "torch_dtype": torch.bfloat16
-    })
+    pipeline = pipeline(
+        "text2text-generation",
+        model=model_name,
+        device=0,
+        model_kwargs={"torch_dtype": torch.bfloat16},
+    )
 
     def _call(self, prompt, stop=None):
         return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
@@ -24,6 +33,7 @@ def _identifying_params(self):
     def _llm_type(self):
         return "custome"
 
+
 llm_predictor = LLMPredictor(llm=FlanLLM())
 hfemb = HuggingFaceEmbeddings()
 embed_model = LangchainEmbedding(hfemb)
@@ -214,9 +224,10 @@ def _llm_type(self):
 
 回答: nlj也是左表的表是驱动表，这个要了解下计划执行方面的基本原理，取左表的一行数据，再遍历右表，一旦满足连接条件，就可以返回数据
 anti/semi只是因为not exists/exist的语义只是返回左表数据，改成anti join是一种计划优化，连接的方式比子查询更优
-""" 
+"""
 
 from llama_index import Document
+
 text_list = [text1]
 documents = [Document(t) for t in text_list]
 
@@ -226,12 +237,18 @@ def _llm_type(self):
 max_chunk_overlap = 20
 prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
 
-index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
+index = GPTListIndex(
+    documents,
+    embed_model=embed_model,
+    llm_predictor=llm_predictor,
+    prompt_helper=prompt_helper,
+)
 index.save_to_disk("index.json")
 
 
 if __name__ == "__main__":
     import logging
+
     logging.getLogger().setLevel(logging.CRITICAL)
     for d in documents:
         print(d)

diff --git a/pilot/__init__.py b/pilot/__init__.py
@@ -1,6 +1,3 @@
-from pilot.source_embedding import (SourceEmbedding, register)
+from pilot.source_embedding import SourceEmbedding, register
 
-__all__ = [
-    "SourceEmbedding",
-    "register"
-]
+__all__ = ["SourceEmbedding", "register"]