Skip to content

Commit

Permalink
ci: make ci happy lint the code, delete unused imports
Browse files Browse the repository at this point in the history
Signed-off-by: yihong0618 <[email protected]>
  • Loading branch information
yihong0618 committed May 24, 2023
1 parent 562d5a9 commit b098a48
Show file tree
Hide file tree
Showing 75 changed files with 1,108 additions and 822 deletions.
17 changes: 13 additions & 4 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,15 @@
name: Pylint

on: [push]
on:
push:
branches: [ make_ci_happy ]
pull_request:
branches: [ main ]
workflow_dispatch:

concurrency:
group: ${{ github.event.number || github.run_id }}
cancel-in-progress: true

jobs:
build:
Expand All @@ -17,7 +26,7 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint
- name: Analysing the code with pylint
pip install -U black isort
- name: check the code lint
run: |
pylint $(git ls-files '*.py')
black . --check
48 changes: 29 additions & 19 deletions examples/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,52 +2,63 @@
# -*- coding:utf-8 -*-

import gradio as gr
from langchain.agents import (
load_tools,
initialize_agent,
AgentType
)
from pilot.model.vicuna_llm import VicunaRequestLLM, VicunaEmbeddingLLM
from llama_index import LLMPredictor, LangchainEmbedding, ServiceContext
from langchain.agents import AgentType, initialize_agent, load_tools
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import Document, GPTSimpleVectorIndex
from llama_index import (
Document,
GPTSimpleVectorIndex,
LangchainEmbedding,
LLMPredictor,
ServiceContext,
)

from pilot.model.vicuna_llm import VicunaEmbeddingLLM, VicunaRequestLLM


def agent_demo():
llm = VicunaRequestLLM()

tools = load_tools(['python_repl'], llm=llm)
agent = initialize_agent(tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
agent.run(
"Write a SQL script that Query 'select count(1)!'"
tools = load_tools(["python_repl"], llm=llm)
agent = initialize_agent(
tools, llm, agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION, verbose=True
)
agent.run("Write a SQL script that Query 'select count(1)!'")


def knowledged_qa_demo(text_list):
llm_predictor = LLMPredictor(llm=VicunaRequestLLM())
hfemb = VicunaEmbeddingLLM()
embed_model = LangchainEmbedding(hfemb)
documents = [Document(t) for t in text_list]

service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model)
index = GPTSimpleVectorIndex.from_documents(documents, service_context=service_context)
service_context = ServiceContext.from_defaults(
llm_predictor=llm_predictor, embed_model=embed_model
)
index = GPTSimpleVectorIndex.from_documents(
documents, service_context=service_context
)
return index


def get_answer(q):
base_knowledge = """ """
base_knowledge = """ """
text_list = [base_knowledge]
index = knowledged_qa_demo(text_list)
response = index.query(q)
return response.response


def get_similar(q):
from pilot.vector_store.extract_tovec import knownledge_tovec, knownledge_tovec_st

docsearch = knownledge_tovec_st("./datasets/plan.md")
docs = docsearch.similarity_search_with_score(q, k=1)

for doc in docs:
dc, s = doc
dc, s = doc
print(s)
yield dc.page_content
yield dc.page_content


if __name__ == "__main__":
# agent_demo()
Expand All @@ -58,8 +69,7 @@ def get_similar(q):
text_input = gr.TextArea()
text_output = gr.TextArea()
text_button = gr.Button()

text_button.click(get_similar, inputs=text_input, outputs=text_output)

demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")

25 changes: 10 additions & 15 deletions examples/embdserver.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,29 @@
#!/usr/bin/env python3
# -*- coding:utf-8 -*-

import requests
import json
import time
import uuid
import os
import sys
from urllib.parse import urljoin

import gradio as gr
import requests

ROOT_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(ROOT_PATH)


from pilot.configs.config import Config
from pilot.conversation import conv_qa_prompt_template, conv_templates
from langchain.prompts import PromptTemplate

from pilot.configs.config import Config
from pilot.conversation import conv_qa_prompt_template, conv_templates

llmstream_stream_path = "generate_stream"

CFG = Config()

def generate(query):

def generate(query):
template_name = "conv_one_shot"
state = conv_templates[template_name].copy()

Expand All @@ -47,7 +46,7 @@ def generate(query):
"prompt": prompt,
"temperature": 1.0,
"max_new_tokens": 1024,
"stop": "###"
"stop": "###",
}

response = requests.post(
Expand All @@ -57,19 +56,18 @@ def generate(query):
skip_echo_len = len(params["prompt"]) + 1 - params["prompt"].count("</s>") * 3

for chunk in response.iter_lines(decode_unicode=False, delimiter=b"\0"):

if chunk:
data = json.loads(chunk.decode())
if data["error_code"] == 0:

if "vicuna" in CFG.LLM_MODEL:
output = data["text"][skip_echo_len:].strip()
else:
output = data["text"].strip()

state.messages[-1][-1] = output + "▌"
yield(output)

yield (output)


if __name__ == "__main__":
print(CFG.LLM_MODEL)
with gr.Blocks() as demo:
Expand All @@ -78,10 +76,7 @@ def generate(query):
text_input = gr.TextArea()
text_output = gr.TextArea()
text_button = gr.Button("提交")


text_button.click(generate, inputs=text_input, outputs=text_output)

demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")


demo.queue(concurrency_count=3).launch(server_name="0.0.0.0")
8 changes: 4 additions & 4 deletions examples/gpt_index.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import os
import logging
import sys

from llama_index import SimpleDirectoryReader, GPTSimpleVectorIndex
from llama_index import GPTSimpleVectorIndex, SimpleDirectoryReader

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# read the document of data dir
documents = SimpleDirectoryReader("data").load_data()
# split the document to chunk, max token size=500, convert chunk to vector
# split the document to chunk, max token size=500, convert chunk to vector

index = GPTSimpleVectorIndex(documents)

# save index
index.save_to_disk("index.json")
index.save_to_disk("index.json")
6 changes: 4 additions & 2 deletions examples/gradio_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,19 @@

import gradio as gr


def change_tab():
return gr.Tabs.update(selected=1)


with gr.Blocks() as demo:
with gr.Tabs() as tabs:
with gr.TabItem("Train", id=0):
t = gr.Textbox()
with gr.TabItem("Inference", id=1):
i = gr.Image()

btn = gr.Button()
btn.click(change_tab, None, tabs)

demo.launch()
demo.launch()
13 changes: 9 additions & 4 deletions examples/knowledge_embedding/csv_embedding_test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@


from pilot.source_embedding.csv_embedding import CSVEmbedding

# path = "/Users/chenketing/Downloads/share_ireserve双写数据异常2.xlsx"
Expand All @@ -8,6 +6,13 @@
vector_store_path = "your_path/"


pdf_embedding = CSVEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "url", "vector_store_path": "vector_store_path"})
pdf_embedding = CSVEmbedding(
file_path=path,
model_name=model_name,
vector_store_config={
"vector_store_name": "url",
"vector_store_path": "vector_store_path",
},
)
pdf_embedding.source_embedding()
print("success")
print("success")
11 changes: 9 additions & 2 deletions examples/knowledge_embedding/pdf_embedding_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@
vector_store_path = "your_path/"


pdf_embedding = PDFEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "ob-pdf", "vector_store_path": vector_store_path})
pdf_embedding = PDFEmbedding(
file_path=path,
model_name=model_name,
vector_store_config={
"vector_store_name": "ob-pdf",
"vector_store_path": vector_store_path,
},
)
pdf_embedding.source_embedding()
print("success")
print("success")
11 changes: 9 additions & 2 deletions examples/knowledge_embedding/url_embedding_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
vector_store_path = "your_path"


pdf_embedding = URLEmbedding(file_path=path, model_name=model_name, vector_store_config={"vector_store_name": "url", "vector_store_path": "vector_store_path"})
pdf_embedding = URLEmbedding(
file_path=path,
model_name=model_name,
vector_store_config={
"vector_store_name": "url",
"vector_store_path": "vector_store_path",
},
)
pdf_embedding.source_embedding()
print("success")
print("success")
33 changes: 25 additions & 8 deletions examples/t5_example.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,28 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex, GPTSimpleVectorIndex, PromptHelper
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LLMPredictor
import torch
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.llms.base import LLM
from llama_index import (
GPTListIndex,
GPTSimpleVectorIndex,
LangchainEmbedding,
LLMPredictor,
PromptHelper,
SimpleDirectoryReader,
)
from transformers import pipeline


class FlanLLM(LLM):
model_name = "google/flan-t5-large"
pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={
"torch_dtype": torch.bfloat16
})
pipeline = pipeline(
"text2text-generation",
model=model_name,
device=0,
model_kwargs={"torch_dtype": torch.bfloat16},
)

def _call(self, prompt, stop=None):
return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
Expand All @@ -24,6 +33,7 @@ def _identifying_params(self):
def _llm_type(self):
return "custome"


llm_predictor = LLMPredictor(llm=FlanLLM())
hfemb = HuggingFaceEmbeddings()
embed_model = LangchainEmbedding(hfemb)
Expand Down Expand Up @@ -214,9 +224,10 @@ def _llm_type(self):
回答: nlj也是左表的表是驱动表,这个要了解下计划执行方面的基本原理,取左表的一行数据,再遍历右表,一旦满足连接条件,就可以返回数据
anti/semi只是因为not exists/exist的语义只是返回左表数据,改成anti join是一种计划优化,连接的方式比子查询更优
"""
"""

from llama_index import Document

text_list = [text1]
documents = [Document(t) for t in text_list]

Expand All @@ -226,12 +237,18 @@ def _llm_type(self):
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor, prompt_helper=prompt_helper)
index = GPTListIndex(
documents,
embed_model=embed_model,
llm_predictor=llm_predictor,
prompt_helper=prompt_helper,
)
index.save_to_disk("index.json")


if __name__ == "__main__":
import logging

logging.getLogger().setLevel(logging.CRITICAL)
for d in documents:
print(d)
Expand Down
7 changes: 2 additions & 5 deletions pilot/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
from pilot.source_embedding import (SourceEmbedding, register)
from pilot.source_embedding import SourceEmbedding, register

__all__ = [
"SourceEmbedding",
"register"
]
__all__ = ["SourceEmbedding", "register"]
Loading

0 comments on commit b098a48

Please sign in to comment.