Adding langchain integration

nordquant · Apr 23, 2024 · f15e838 · f15e838
1 parent 13a61cb
commit f15e838
Show file tree

Hide file tree

Showing 4 changed files with 101 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -21,3 +21,11 @@ The dataset on Hugging Face: https://huggingface.co/datasets/Open-Orca/OpenOrca
 ## Chainlit and Streamlit
  * Chainlit: https://docs.chainlit.io/get-started/overview
  * Streamlit: https://streamlit.io/
+
+## LangChain
+
+### Installation
+```
+pip install langchain langchain-community
+```
+
diff --git a/requirements.txt b/requirements.txt
@@ -21,6 +21,7 @@ filetype==1.2.0
 frozenlist==1.4.1
 fsspec==2024.2.0
 googleapis-common-protos==1.62.0
+greenlet==3.0.3
 grpcio==1.60.1
 h11==0.14.0
 httpcore==0.17.3
@@ -29,12 +30,21 @@ huggingface-hub==0.20.3
 idna==3.6
 importlib-metadata==6.11.0
 isort==5.13.2
+jsonpatch==1.33
+jsonpointer==2.4
+langchain==0.1.16
+langchain-community==0.0.34
+langchain-core==0.1.45
+langchain-text-splitters==0.0.1
+langsmith==0.1.49
 Lazify==0.4.0
 literalai==0.0.103
+llamacpp==0.1.14
 marshmallow==3.20.2
 multidict==6.0.5
 mypy-extensions==1.0.0
 nest-asyncio==1.6.0
+numpy==1.26.4
 opentelemetry-api==1.22.0
 opentelemetry-exporter-otlp==1.22.0
 opentelemetry-exporter-otlp-proto-common==1.22.0
@@ -44,6 +54,7 @@ opentelemetry-instrumentation==0.43b0
 opentelemetry-proto==1.22.0
 opentelemetry-sdk==1.22.0
 opentelemetry-semantic-conventions==0.43b0
+orjson==3.10.1
 packaging==23.2
 protobuf==4.25.2
 py-cpuinfo==9.0.0
@@ -59,8 +70,10 @@ PyYAML==6.0.1
 requests==2.31.0
 simple-websocket==1.0.0
 sniffio==1.3.0
+SQLAlchemy==2.0.29
 starlette==0.32.0.post1
 syncer==2.0.3
+tenacity==8.2.3
 tomli==2.0.1
 tqdm==4.66.1
 typing-inspect==0.9.0

diff --git a/solutions/langchain/chainlit_with_langchain.py b/solutions/langchain/chainlit_with_langchain.py
@@ -0,0 +1,55 @@
+import chainlit as cl
+from langchain.callbacks.base import BaseCallbackHandler
+from langchain.chains import LLMChain
+from langchain.memory import ConversationBufferMemory
+from langchain_community.llms import CTransformers
+from langchain_core.prompts import PromptTemplate
+
+
+class StreamHandler(BaseCallbackHandler):
+    def __init__(self):
+        self.msg = cl.Message(content="")
+
+    async def on_llm_new_token(self, token: str, **kwargs):
+        await self.msg.stream_token(token)
+
+    async def on_llm_end(self, response: str, **kwargs):
+        await self.msg.send()
+        self.msg = cl.Message(content="")
+
+
+# Load quantized Llama 2
+llm = CTransformers(
+    model="TheBloke/Llama-2-7B-Chat-GGUF",
+    model_file="llama-2-7b-chat.Q2_K.gguf",
+    model_type="llama2",
+    max_new_tokens=20,
+)
+
+template = """
+[INST] <<SYS>>
+You are a helpful, respectful and honest assistant.
+Always provide a concise answer and use the following Context:
+{context}
+<</SYS>>
+User:
+{instruction}[/INST]"""
+
+prompt = PromptTemplate(template=template, input_variables=["context", "instruction"])
+
+
+@cl.on_chat_start
+def on_chat_start():
+    memory = ConversationBufferMemory(memory_key="context")
+    llm_chain = LLMChain(prompt=prompt, llm=llm, verbose=False, memory=memory)
+    cl.user_session.set("llm_chain", llm_chain)
+
+
+@cl.on_message
+async def on_message(message: cl.Message):
+    llm_chain = cl.user_session.get("llm_chain")
+
+    await llm_chain.ainvoke(
+        message.content,
+        config={"callbacks": [cl.AsyncLangchainCallbackHandler(), StreamHandler()]},
+    )
diff --git a/solutions/langchain/langchain_demo.py b/solutions/langchain/langchain_demo.py
@@ -0,0 +1,25 @@
+from langchain.chains import LLMChain
+from langchain.memory import ConversationBufferMemory
+from langchain_community.llms import CTransformers
+from langchain_core.prompts import PromptTemplate
+
+llm = CTransformers(
+    model="zoltanctoth/orca_mini_3B-GGUF",
+    model_file="orca-mini-3b.q4_0.gguf",
+    model_type="llama2",
+    max_new_tokens=20,
+)
+
+prompt_template = """### System:\nYou are an AI assistant that gives helpful answers. 
+You answer the question in a short and concise way. Take this context into account when answering the question: {context}\n
+\n\n### User:\n{instruction}\n\n### Response:\n"""
+
+prompt = PromptTemplate(template=prompt_template, input_variables=["instruction"])
+memory = ConversationBufferMemory(memory_key="context")
+
+chain = LLMChain(llm=llm, prompt=prompt, verbose=True, memory=memory)
+
+print(chain.invoke({"instruction": "Which city is the capital of India?"}))
+print(
+    chain.invoke({"instruction": "Which city is has the same functionality in the US?"})
+)