Skip to content

Commit

Permalink
Merge branch 'fireworks_integration' into support
Browse files Browse the repository at this point in the history
  • Loading branch information
f-aguzzi committed Jul 4, 2024
2 parents 2ab7ddc + 4b56604 commit 720f187
Show file tree
Hide file tree
Showing 32 changed files with 1,799 additions and 182 deletions.
1 change: 1 addition & 0 deletions examples/fireworks/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FIREWORKS_APIKEY="your fireworks api key"
63 changes: 63 additions & 0 deletions examples/fireworks/csv_scraper_fireworks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""
Basic example of scraping pipeline using CSVScraperGraph from CSV documents
"""

import os
from dotenv import load_dotenv
import pandas as pd
from scrapegraphai.graphs import CSVScraperGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info
load_dotenv()

# ************************************************
# Read the CSV file
# ************************************************

FILE_NAME = "inputs/username.csv"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)

text = pd.read_csv(file_path)

# ************************************************
# Define the configuration for the graph
# ************************************************
fireworks_api_key = os.getenv("FIREWORKS_APIKEY")

graph_config = {
"llm": {
"api_key": fireworks_api_key,
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"verbose": True,
"headless": False,
}

# ************************************************
# Create the CSVScraperGraph instance and run it
# ************************************************

csv_scraper_graph = CSVScraperGraph(
prompt="List me all the last names",
source=str(text), # Pass the content of the file, not the file object
config=graph_config
)

result = csv_scraper_graph.run()
print(result)

# ************************************************
# Get graph execution info
# ************************************************

graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")
63 changes: 63 additions & 0 deletions examples/fireworks/csv_scraper_graph_multi_fireworks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents
"""

import os
from dotenv import load_dotenv
import pandas as pd
from scrapegraphai.graphs import CSVScraperMultiGraph
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info

load_dotenv()
# ************************************************
# Read the CSV file
# ************************************************

FILE_NAME = "inputs/username.csv"
curr_dir = os.path.dirname(os.path.realpath(__file__))
file_path = os.path.join(curr_dir, FILE_NAME)

text = pd.read_csv(file_path)

# ************************************************
# Define the configuration for the graph
# ************************************************
fireworks_api_key = os.getenv("FIREWORKS_APIKEY")

graph_config = {
"llm": {
"api_key": fireworks_api_key,
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"verbose": True,
"headless": False,
}

# ************************************************
# Create the CSVScraperMultiGraph instance and run it
# ************************************************

csv_scraper_graph = CSVScraperMultiGraph(
prompt="List me all the last names",
source=[str(text), str(text)],
config=graph_config
)

result = csv_scraper_graph.run()
print(result)

# ************************************************
# Get graph execution info
# ************************************************

graph_exec_info = csv_scraper_graph.get_execution_info()
print(prettify_exec_info(graph_exec_info))

# Save to json or csv
convert_to_csv(result, "result")
convert_to_json(result, "result")
118 changes: 118 additions & 0 deletions examples/fireworks/custom_graph_fireworks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
"""
Example of custom graph using existing nodes
"""

import os
from dotenv import load_dotenv

from langchain_openai import OpenAIEmbeddings
from scrapegraphai.models import OpenAI
from scrapegraphai.graphs import BaseGraph
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode
load_dotenv()

# ************************************************
# Define the configuration for the graph
# ************************************************

fireworks_api_key = os.getenv("FIREWORKS_APIKEY")

graph_config = {
"llm": {
"api_key": fireworks_api_key,
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"verbose": True,
"headless": False,
}

# ************************************************
# Define the graph nodes
# ************************************************

llm_model = OpenAI(graph_config["llm"])
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key)

# define the nodes for the graph
robot_node = RobotsNode(
input="url",
output=["is_scrapable"],
node_config={
"llm_model": llm_model,
"force_scraping": True,
"verbose": True,
}
)

fetch_node = FetchNode(
input="url | local_dir",
output=["doc", "link_urls", "img_urls"],
node_config={
"verbose": True,
"headless": True,
}
)
parse_node = ParseNode(
input="doc",
output=["parsed_doc"],
node_config={
"chunk_size": 4096,
"verbose": True,
}
)
rag_node = RAGNode(
input="user_prompt & (parsed_doc | doc)",
output=["relevant_chunks"],
node_config={
"llm_model": llm_model,
"embedder_model": embedder,
"verbose": True,
}
)
generate_answer_node = GenerateAnswerNode(
input="user_prompt & (relevant_chunks | parsed_doc | doc)",
output=["answer"],
node_config={
"llm_model": llm_model,
"verbose": True,
}
)

# ************************************************
# Create the graph by defining the connections
# ************************************************

graph = BaseGraph(
nodes=[
robot_node,
fetch_node,
parse_node,
rag_node,
generate_answer_node,
],
edges=[
(robot_node, fetch_node),
(fetch_node, parse_node),
(parse_node, rag_node),
(rag_node, generate_answer_node)
],
entry_point=robot_node
)

# ************************************************
# Execute the graph
# ************************************************

result, execution_info = graph.execute({
"user_prompt": "Describe the content",
"url": "https://example.com/"
})

# get the answer from the result
result = result.get("answer", "No answer found.")
print(result)
52 changes: 52 additions & 0 deletions examples/fireworks/deep_scraper_fireworks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""
Basic example of scraping pipeline using SmartScraper
"""

import os
from dotenv import load_dotenv
from scrapegraphai.graphs import DeepScraperGraph
from scrapegraphai.utils import prettify_exec_info

load_dotenv()

# ************************************************
# Define the configuration for the graph
# ************************************************

fireworks_api_key = os.getenv("FIREWORKS_APIKEY")

graph_config = {
"llm": {
"api_key": fireworks_api_key,
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct"
},
"embeddings": {
"model": "ollama/nomic-embed-text",
"temperature": 0,
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily
},
"verbose": True,
"max_depth": 1
}

# ************************************************
# Create the SmartScraperGraph instance and run it
# ************************************************

deep_scraper_graph = DeepScraperGraph(
prompt="List me all the job titles and detailed job description.",
# also accepts a string with the already downloaded HTML code
source="https://www.google.com/about/careers/applications/jobs/results/?location=Bangalore%20India",
config=graph_config
)

result = deep_scraper_graph.run()
print(result)

# ************************************************
# Get graph execution info
# ************************************************

graph_exec_info = deep_scraper_graph.get_execution_info()
print(deep_scraper_graph.get_state("relevant_links"))
print(prettify_exec_info(graph_exec_info))
Loading

0 comments on commit 720f187

Please sign in to comment.