-
Notifications
You must be signed in to change notification settings - Fork 990
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'fireworks_integration' into support
- Loading branch information
Showing
32 changed files
with
1,799 additions
and
182 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
FIREWORKS_APIKEY="your fireworks api key" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
""" | ||
Basic example of scraping pipeline using CSVScraperGraph from CSV documents | ||
""" | ||
|
||
import os | ||
from dotenv import load_dotenv | ||
import pandas as pd | ||
from scrapegraphai.graphs import CSVScraperGraph | ||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info | ||
load_dotenv() | ||
|
||
# ************************************************ | ||
# Read the CSV file | ||
# ************************************************ | ||
|
||
FILE_NAME = "inputs/username.csv" | ||
curr_dir = os.path.dirname(os.path.realpath(__file__)) | ||
file_path = os.path.join(curr_dir, FILE_NAME) | ||
|
||
text = pd.read_csv(file_path) | ||
|
||
# ************************************************ | ||
# Define the configuration for the graph | ||
# ************************************************ | ||
fireworks_api_key = os.getenv("FIREWORKS_APIKEY") | ||
|
||
graph_config = { | ||
"llm": { | ||
"api_key": fireworks_api_key, | ||
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct" | ||
}, | ||
"embeddings": { | ||
"model": "ollama/nomic-embed-text", | ||
"temperature": 0, | ||
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily | ||
}, | ||
"verbose": True, | ||
"headless": False, | ||
} | ||
|
||
# ************************************************ | ||
# Create the CSVScraperGraph instance and run it | ||
# ************************************************ | ||
|
||
csv_scraper_graph = CSVScraperGraph( | ||
prompt="List me all the last names", | ||
source=str(text), # Pass the content of the file, not the file object | ||
config=graph_config | ||
) | ||
|
||
result = csv_scraper_graph.run() | ||
print(result) | ||
|
||
# ************************************************ | ||
# Get graph execution info | ||
# ************************************************ | ||
|
||
graph_exec_info = csv_scraper_graph.get_execution_info() | ||
print(prettify_exec_info(graph_exec_info)) | ||
|
||
# Save to json or csv | ||
convert_to_csv(result, "result") | ||
convert_to_json(result, "result") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
""" | ||
Basic example of scraping pipeline using CSVScraperMultiGraph from CSV documents | ||
""" | ||
|
||
import os | ||
from dotenv import load_dotenv | ||
import pandas as pd | ||
from scrapegraphai.graphs import CSVScraperMultiGraph | ||
from scrapegraphai.utils import convert_to_csv, convert_to_json, prettify_exec_info | ||
|
||
load_dotenv() | ||
# ************************************************ | ||
# Read the CSV file | ||
# ************************************************ | ||
|
||
FILE_NAME = "inputs/username.csv" | ||
curr_dir = os.path.dirname(os.path.realpath(__file__)) | ||
file_path = os.path.join(curr_dir, FILE_NAME) | ||
|
||
text = pd.read_csv(file_path) | ||
|
||
# ************************************************ | ||
# Define the configuration for the graph | ||
# ************************************************ | ||
fireworks_api_key = os.getenv("FIREWORKS_APIKEY") | ||
|
||
graph_config = { | ||
"llm": { | ||
"api_key": fireworks_api_key, | ||
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct" | ||
}, | ||
"embeddings": { | ||
"model": "ollama/nomic-embed-text", | ||
"temperature": 0, | ||
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily | ||
}, | ||
"verbose": True, | ||
"headless": False, | ||
} | ||
|
||
# ************************************************ | ||
# Create the CSVScraperMultiGraph instance and run it | ||
# ************************************************ | ||
|
||
csv_scraper_graph = CSVScraperMultiGraph( | ||
prompt="List me all the last names", | ||
source=[str(text), str(text)], | ||
config=graph_config | ||
) | ||
|
||
result = csv_scraper_graph.run() | ||
print(result) | ||
|
||
# ************************************************ | ||
# Get graph execution info | ||
# ************************************************ | ||
|
||
graph_exec_info = csv_scraper_graph.get_execution_info() | ||
print(prettify_exec_info(graph_exec_info)) | ||
|
||
# Save to json or csv | ||
convert_to_csv(result, "result") | ||
convert_to_json(result, "result") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
""" | ||
Example of custom graph using existing nodes | ||
""" | ||
|
||
import os | ||
from dotenv import load_dotenv | ||
|
||
from langchain_openai import OpenAIEmbeddings | ||
from scrapegraphai.models import OpenAI | ||
from scrapegraphai.graphs import BaseGraph | ||
from scrapegraphai.nodes import FetchNode, ParseNode, RAGNode, GenerateAnswerNode, RobotsNode | ||
load_dotenv() | ||
|
||
# ************************************************ | ||
# Define the configuration for the graph | ||
# ************************************************ | ||
|
||
fireworks_api_key = os.getenv("FIREWORKS_APIKEY") | ||
|
||
graph_config = { | ||
"llm": { | ||
"api_key": fireworks_api_key, | ||
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct" | ||
}, | ||
"embeddings": { | ||
"model": "ollama/nomic-embed-text", | ||
"temperature": 0, | ||
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily | ||
}, | ||
"verbose": True, | ||
"headless": False, | ||
} | ||
|
||
# ************************************************ | ||
# Define the graph nodes | ||
# ************************************************ | ||
|
||
llm_model = OpenAI(graph_config["llm"]) | ||
embedder = OpenAIEmbeddings(api_key=llm_model.openai_api_key) | ||
|
||
# define the nodes for the graph | ||
robot_node = RobotsNode( | ||
input="url", | ||
output=["is_scrapable"], | ||
node_config={ | ||
"llm_model": llm_model, | ||
"force_scraping": True, | ||
"verbose": True, | ||
} | ||
) | ||
|
||
fetch_node = FetchNode( | ||
input="url | local_dir", | ||
output=["doc", "link_urls", "img_urls"], | ||
node_config={ | ||
"verbose": True, | ||
"headless": True, | ||
} | ||
) | ||
parse_node = ParseNode( | ||
input="doc", | ||
output=["parsed_doc"], | ||
node_config={ | ||
"chunk_size": 4096, | ||
"verbose": True, | ||
} | ||
) | ||
rag_node = RAGNode( | ||
input="user_prompt & (parsed_doc | doc)", | ||
output=["relevant_chunks"], | ||
node_config={ | ||
"llm_model": llm_model, | ||
"embedder_model": embedder, | ||
"verbose": True, | ||
} | ||
) | ||
generate_answer_node = GenerateAnswerNode( | ||
input="user_prompt & (relevant_chunks | parsed_doc | doc)", | ||
output=["answer"], | ||
node_config={ | ||
"llm_model": llm_model, | ||
"verbose": True, | ||
} | ||
) | ||
|
||
# ************************************************ | ||
# Create the graph by defining the connections | ||
# ************************************************ | ||
|
||
graph = BaseGraph( | ||
nodes=[ | ||
robot_node, | ||
fetch_node, | ||
parse_node, | ||
rag_node, | ||
generate_answer_node, | ||
], | ||
edges=[ | ||
(robot_node, fetch_node), | ||
(fetch_node, parse_node), | ||
(parse_node, rag_node), | ||
(rag_node, generate_answer_node) | ||
], | ||
entry_point=robot_node | ||
) | ||
|
||
# ************************************************ | ||
# Execute the graph | ||
# ************************************************ | ||
|
||
result, execution_info = graph.execute({ | ||
"user_prompt": "Describe the content", | ||
"url": "https://example.com/" | ||
}) | ||
|
||
# get the answer from the result | ||
result = result.get("answer", "No answer found.") | ||
print(result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
""" | ||
Basic example of scraping pipeline using SmartScraper | ||
""" | ||
|
||
import os | ||
from dotenv import load_dotenv | ||
from scrapegraphai.graphs import DeepScraperGraph | ||
from scrapegraphai.utils import prettify_exec_info | ||
|
||
load_dotenv() | ||
|
||
# ************************************************ | ||
# Define the configuration for the graph | ||
# ************************************************ | ||
|
||
fireworks_api_key = os.getenv("FIREWORKS_APIKEY") | ||
|
||
graph_config = { | ||
"llm": { | ||
"api_key": fireworks_api_key, | ||
"model": "fireworks/accounts/fireworks/models/mixtral-8x7b-instruct" | ||
}, | ||
"embeddings": { | ||
"model": "ollama/nomic-embed-text", | ||
"temperature": 0, | ||
# "base_url": "http://localhost:11434", # set ollama URL arbitrarily | ||
}, | ||
"verbose": True, | ||
"max_depth": 1 | ||
} | ||
|
||
# ************************************************ | ||
# Create the SmartScraperGraph instance and run it | ||
# ************************************************ | ||
|
||
deep_scraper_graph = DeepScraperGraph( | ||
prompt="List me all the job titles and detailed job description.", | ||
# also accepts a string with the already downloaded HTML code | ||
source="https://www.google.com/about/careers/applications/jobs/results/?location=Bangalore%20India", | ||
config=graph_config | ||
) | ||
|
||
result = deep_scraper_graph.run() | ||
print(result) | ||
|
||
# ************************************************ | ||
# Get graph execution info | ||
# ************************************************ | ||
|
||
graph_exec_info = deep_scraper_graph.get_execution_info() | ||
print(deep_scraper_graph.get_state("relevant_links")) | ||
print(prettify_exec_info(graph_exec_info)) |
Oops, something went wrong.