Merge branch 'fireworks_integration' into support

ScrapeGraphAI · Jul 4, 2024 · 720f187 · 720f187
2 parents 2ab7ddc + 4b56604
commit 720f187
Show file tree

Hide file tree

Showing 32 changed files with 1,799 additions and 182 deletions.
diff --git a/examples/fireworks/.env.example b/examples/fireworks/.env.example
@@ -0,0 +1 @@
+FIREWORKS_APIKEY="your fireworks api key"
diff --git a/examples/fireworks/csv_scraper_fireworks.py b/examples/fireworks/csv_scraper_fireworks.py
@@ -0,0 +1,63 @@
+"""
+Basic example of scraping pipeline using CSVScraperGraph from CSV documents
+"""
+
+import os
+from dotenv import load_dotenv
+import pandas as pd
+from scrapegraphai.graphs import CSVScraperGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+load_dotenv()
+
+# ************************************************
+# Read the CSV file
+# ************************************************
+
+FILE_NAME = "inputs/username.csv"
+curr_dir = os.path.dirname(os.path.realpath(__file__))
+file_path = os.path.join(curr_dir, FILE_NAME)
+
+text = pd.read_csv(file_path)
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": fireworks_api_key,
+        "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
+    },
+     "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# ************************************************
+# Create the CSVScraperGraph instance and run it
+# ************************************************
+
+csv_scraper_graph = CSVScraperGraph(
+    prompt="List me all the last names",
+    source=str(text),  # Pass the content of the file, not the file object
+    config=graph_config
+)
+
+result = csv_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = csv_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/fireworks/csv_scraper_graph_multi_fireworks.py b/examples/fireworks/csv_scraper_graph_multi_fireworks.py
@@ -0,0 +1,63 @@
+"""
+Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
+"""
+
+import os
+from dotenv import load_dotenv
+import pandas as pd
+from scrapegraphai.graphs import CSVScraperMultiGraph
+from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
+
+load_dotenv()
+# ************************************************
+# Read the CSV file
+# ************************************************
+
+FILE_NAME = "inputs/username.csv"
+curr_dir = os.path.dirname(os.path.realpath(__file__))
+file_path = os.path.join(curr_dir, FILE_NAME)
+
+text = pd.read_csv(file_path)
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": fireworks_api_key,
+        "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
+    },
+     "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# ************************************************
+# Create the CSVScraperMultiGraph instance and run it
+# ************************************************
+
+csv_scraper_graph = CSVScraperMultiGraph(
+    prompt="List me all the last names",
+    source=[str(text), str(text)],
+    config=graph_config
+)
+
+result = csv_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = csv_scraper_graph.get_execution_info()
+print(prettify_exec_info(graph_exec_info))
+
+# Save to json or csv
+convert_to_csv(result, "result")
+convert_to_json(result, "result")
diff --git a/examples/fireworks/custom_graph_fireworks.py b/examples/fireworks/custom_graph_fireworks.py
@@ -0,0 +1,118 @@
+"""
+Example of custom graph using existing nodes
+"""
+
+import os
+from dotenv import load_dotenv
+
+from langchain_openai import OpenAIEmbeddings
+from scrapegraphai.models import OpenAI
+from scrapegraphai.graphs import BaseGraph
+from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": fireworks_api_key,
+        "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
+    },
+     "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "verbose": True,
+    "headless": False,
+}
+
+# ************************************************
+# Define the graph nodes
+# ************************************************
+
+llm_model = OpenAI(graph_config["llm"])
+embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)
+
+# define the nodes for the graph
+robot_node = RobotsNode(
+    input="url",
+    output=["is_scrapable"],
+    node_config={
+        "llm_model": llm_model,
+        "force_scraping": True,
+        "verbose": True,
+        }
+)
+
+fetch_node = FetchNode(
+    input="url | local_dir",
+    output=["doc", "link_urls", "img_urls"],
+    node_config={
+        "verbose": True,
+        "headless": True,
+    }
+)
+parse_node = ParseNode(
+    input="doc",
+    output=["parsed_doc"],
+    node_config={
+        "chunk_size": 4096,
+        "verbose": True,
+    }
+)
+rag_node = RAGNode(
+    input="user_prompt & (parsed_doc | doc)",
+    output=["relevant_chunks"],
+    node_config={
+        "llm_model": llm_model,
+        "embedder_model": embedder,
+        "verbose": True,
+    }
+)
+generate_answer_node = GenerateAnswerNode(
+    input="user_prompt & (relevant_chunks | parsed_doc | doc)",
+    output=["answer"],
+    node_config={
+        "llm_model": llm_model,
+        "verbose": True,
+    }
+)
+
+# ************************************************
+# Create the graph by defining the connections
+# ************************************************
+
+graph = BaseGraph(
+    nodes=[
+        robot_node,
+        fetch_node,
+        parse_node,
+        rag_node,
+        generate_answer_node,
+    ],
+    edges=[
+        (robot_node, fetch_node),
+        (fetch_node, parse_node),
+        (parse_node, rag_node),
+        (rag_node, generate_answer_node)
+    ],
+    entry_point=robot_node
+)
+
+# ************************************************
+# Execute the graph
+# ************************************************
+
+result, execution_info = graph.execute({
+    "user_prompt": "Describe the content",
+    "url": "https://example.com/"
+})
+
+# get the answer from the result
+result = result.get("answer", "No answer found.")
+print(result)
diff --git a/examples/fireworks/deep_scraper_fireworks.py b/examples/fireworks/deep_scraper_fireworks.py
@@ -0,0 +1,52 @@
+""" 
+Basic example of scraping pipeline using SmartScraper
+"""
+
+import os
+from dotenv import load_dotenv
+from scrapegraphai.graphs import DeepScraperGraph
+from scrapegraphai.utils import prettify_exec_info
+
+load_dotenv()
+
+# ************************************************
+# Define the configuration for the graph
+# ************************************************
+
+fireworks_api_key = os.getenv("FIREWORKS_APIKEY")
+
+graph_config = {
+    "llm": {
+        "api_key": fireworks_api_key,
+        "model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
+    },
+     "embeddings": {
+        "model": "ollama/nomic-embed-text",
+        "temperature": 0,
+        # "base_url": "http://localhost:11434",  # set ollama URL arbitrarily
+    },
+    "verbose": True,
+    "max_depth": 1
+}
+
+# ************************************************
+# Create the SmartScraperGraph instance and run it
+# ************************************************
+
+deep_scraper_graph = DeepScraperGraph(
+    prompt="List me all the job titles and detailed job description.",
+    # also accepts a string with the already downloaded HTML code
+    source="https://www.google.com/about/careers/applications/jobs/results/?location=Bangalore%20India",
+    config=graph_config
+)
+
+result = deep_scraper_graph.run()
+print(result)
+
+# ************************************************
+# Get graph execution info
+# ************************************************
+
+graph_exec_info = deep_scraper_graph.get_execution_info()
+print(deep_scraper_graph.get_state("relevant_links"))
+print(prettify_exec_info(graph_exec_info))