Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix contents options, add subpages and extras #47

Merged
merged 3 commits into from
Oct 28, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
add examples and fix contents functions
Feel-ix-343 committed Oct 28, 2024
commit 5c41f75c42bdc15dd5832bbaafb865346c0e1a6c
45 changes: 28 additions & 17 deletions exa_py/api.py
Original file line number Diff line number Diff line change
@@ -148,7 +148,7 @@ def to_snake_case(data: dict) -> dict:

CONTENTS_ENDPOINT_OPTIONS_TYPES = {
"subpages": [int], # Number of subpages to get contents for; these will appear as additional content results
"subpage_target": [str, list] # Specific subpage(s) to get contents for
"extras": [dict], # Additional options for
}

# FOR BETA OPTIONS
@@ -263,6 +263,7 @@ def __init__(self, **kwargs):
self.author = kwargs.get('author')
self.image = kwargs.get('image')
self.subpages = kwargs.get('subpages')
self.extras = kwargs.get("extras")

def __str__(self):
return (
@@ -273,6 +274,8 @@ def __str__(self):
f"Published Date: {self.published_date}\n"
f"Author: {self.author}\n"
f"Image: {self.image}\n"
f"Extras {self.extras}\n"
f"Subpages: {self.subpages}\n"
)


@@ -542,7 +545,7 @@ def __init__(
self,
api_key: Optional[str],
base_url: str = "https://api.exa.ai",
user_agent: str = "exa-py 1.4.1-beta",
user_agent: str = "exa-py 1.5.0",
):
"""Initialize the Exa client with the provided API key and optional base URL and user agent.

@@ -636,6 +639,7 @@ def search_and_contents(
livecrawl_timeout: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
subpages: Optional[int] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithText]:
...
@@ -658,6 +662,7 @@ def search_and_contents(
use_autoprompt: Optional[bool] = None,
type: Optional[str] = None,
category: Optional[str] = None,
subpages: Optional[int] = None,
livecrawl_timeout: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
@@ -684,6 +689,7 @@ def search_and_contents(
type: Optional[str] = None,
category: Optional[str] = None,
livecrawl_timeout: Optional[int] = None,
subpages: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
extras: Optional[ExtrasOptions] = None,
@@ -710,6 +716,7 @@ def search_and_contents(
type: Optional[str] = None,
category: Optional[str] = None,
livecrawl_timeout: Optional[int] = None,
subpages: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
extras: Optional[ExtrasOptions] = None,
@@ -735,6 +742,7 @@ def search_and_contents(
type: Optional[str] = None,
category: Optional[str] = None,
livecrawl_timeout: Optional[int] = None,
subpages: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
extras: Optional[ExtrasOptions] = None,
@@ -760,6 +768,7 @@ def search_and_contents(
use_autoprompt: Optional[bool] = None,
type: Optional[str] = None,
category: Optional[str] = None,
subpages: Optional[int] = None,
livecrawl_timeout: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
@@ -787,6 +796,7 @@ def search_and_contents(
type: Optional[str] = None,
category: Optional[str] = None,
livecrawl_timeout: Optional[int] = None,
subpages: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
extras: Optional[ExtrasOptions] = None,
@@ -815,6 +825,7 @@ def search_and_contents(
category: Optional[str] = None,
livecrawl_timeout: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
subpages: Optional[int] = None,
filter_empty_results: Optional[bool] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithTextAndHighlightsAndSummary]:
@@ -826,12 +837,12 @@ def search_and_contents(self, query: str, **kwargs):
for k, v in {"query": query, **kwargs}.items()
if k != "self" and v is not None
}
if "text" not in options and "highlights" not in options and "summary" not in options:
if "text" not in options and "highlights" not in options and "summary" not in options and "subpages" not in options and "extras" not in options:
options["text"] = True
validate_search_options(
options, {**SEARCH_OPTIONS_TYPES, **CONTENTS_OPTIONS_TYPES}
options, {**SEARCH_OPTIONS_TYPES, **CONTENTS_OPTIONS_TYPES, **CONTENTS_ENDPOINT_OPTIONS_TYPES}
)
options = nest_fields(options, ["text", "highlights", "summary", "subpages", "subpageTarget", "livecrawl", "livecrawl_timeout", "extras"], "contents")
options = nest_fields(options, ["text", "highlights", "summary", "subpages", "livecrawl", "livecrawl_timeout", "extras"], "contents")
options = to_camel_case(options)
data = self.request("/search", options)
return SearchResponse(
@@ -849,7 +860,6 @@ def get_contents(
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
subpages: Optional[int] = None,
subpage_target: Optional[Union[str, List[str]]] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithText]:
...
@@ -864,7 +874,6 @@ def get_contents(
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
subpages: Optional[int] = None,
subpage_target: Optional[Union[str, List[str]]] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithText]:
...
@@ -879,7 +888,6 @@ def get_contents(
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
subpages: Optional[int] = None,
subpage_target: Optional[Union[str, List[str]]] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithHighlights]:
...
@@ -895,7 +903,6 @@ def get_contents(
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
subpages: Optional[int] = None,
subpage_target: Optional[Union[str, List[str]]] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithTextAndHighlights]:
...
@@ -910,7 +917,6 @@ def get_contents(
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
subpages: Optional[int] = None,
subpage_target: Optional[Union[str, List[str]]] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithSummary]:
...
@@ -926,7 +932,6 @@ def get_contents(
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
subpages: Optional[int] = None,
subpage_target: Optional[Union[str, List[str]]] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithTextAndSummary]:
...
@@ -942,7 +947,6 @@ def get_contents(
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
subpages: Optional[int] = None,
subpage_target: Optional[Union[str, List[str]]] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithHighlightsAndSummary]:
...
@@ -959,7 +963,6 @@ def get_contents(
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
subpages: Optional[int] = None,
subpage_target: Optional[Union[str, List[str]]] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithTextAndHighlightsAndSummary]:
...
@@ -970,7 +973,7 @@ def get_contents(self, ids: Union[str, List[str], List[_Result]], **kwargs):
for k, v in {"ids": ids, **kwargs}.items()
if k != "self" and v is not None
}
if "text" not in options and "highlights" not in options and "summary" not in options:
if "text" not in options and "highlights" not in options and "summary" not in options and "extras" not in options and "subpages" not in options:
options["text"] = True
validate_search_options(options, {**CONTENTS_OPTIONS_TYPES, **CONTENTS_ENDPOINT_OPTIONS_TYPES})
options = to_camel_case(options)
@@ -1026,6 +1029,7 @@ def find_similar_and_contents(
exclude_source_domain: Optional[bool] = None,
category: Optional[str] = None,
livecrawl_timeout: Optional[int] = None,
subpages: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
extras: Optional[ExtrasOptions] = None,
@@ -1050,6 +1054,7 @@ def find_similar_and_contents(
exclude_source_domain: Optional[bool] = None,
category: Optional[str] = None,
livecrawl_timeout: Optional[int] = None,
subpages: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
extras: Optional[ExtrasOptions] = None,
@@ -1073,6 +1078,7 @@ def find_similar_and_contents(
exclude_text: Optional[List[str]] = None,
exclude_source_domain: Optional[bool] = None,
category: Optional[str] = None,
subpages: Optional[int] = None,
livecrawl_timeout: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
@@ -1099,6 +1105,7 @@ def find_similar_and_contents(
exclude_source_domain: Optional[bool] = None,
category: Optional[str] = None,
livecrawl_timeout: Optional[int] = None,
subpages: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
filter_empty_results: Optional[bool] = None,
extras: Optional[ExtrasOptions] = None,
@@ -1124,6 +1131,7 @@ def find_similar_and_contents(
category: Optional[str] = None,
livecrawl_timeout: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
subpages: Optional[int] = None,
filter_empty_results: Optional[bool] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithSummary]:
@@ -1149,6 +1157,7 @@ def find_similar_and_contents(
category: Optional[str] = None,
livecrawl_timeout: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
subpages: Optional[int] = None,
filter_empty_results: Optional[bool] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithTextAndSummary]:
@@ -1174,6 +1183,7 @@ def find_similar_and_contents(
category: Optional[str] = None,
livecrawl_timeout: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
subpages: Optional[int] = None,
filter_empty_results: Optional[bool] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithHighlightsAndSummary]:
@@ -1200,6 +1210,7 @@ def find_similar_and_contents(
category: Optional[str] = None,
livecrawl_timeout: Optional[int] = None,
livecrawl: Optional[LIVECRAWL_OPTIONS] = None,
subpages: Optional[int] = None,
filter_empty_results: Optional[bool] = None,
extras: Optional[ExtrasOptions] = None,
) -> SearchResponse[ResultWithTextAndHighlightsAndSummary]:
@@ -1211,13 +1222,13 @@ def find_similar_and_contents(self, url: str, **kwargs):
for k, v in {"url": url, **kwargs}.items()
if k != "self" and v is not None
}
if "text" not in options and "highlights" not in options:
if "text" not in options and "highlights" not in options and "summary" not in options and "extras" not in options and "subpages" not in options:
options["text"] = True
validate_search_options(
options, {**FIND_SIMILAR_OPTIONS_TYPES, **CONTENTS_OPTIONS_TYPES}
options, {**FIND_SIMILAR_OPTIONS_TYPES, **CONTENTS_OPTIONS_TYPES, **CONTENTS_ENDPOINT_OPTIONS_TYPES}
)
options = to_camel_case(options)
options = nest_fields(options, ["text", "highlights", "summary", "subpages", "subpageTarget", "livecrawl", "livecrawl_timeout", "extras"], "contents")
options = nest_fields(options, ["text", "highlights", "summary", "subpages", "livecrawl", "livecrawl_timeout", "extras"], "contents")
data = self.request("/findSimilar", options)
return SearchResponse(
[Result(**to_snake_case(result)) for result in data["results"]],
16 changes: 16 additions & 0 deletions examples/extras_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from exa_py import Exa
import os

# Initialize the Exa client
exa = Exa(os.environ.get("EXA_API_KEY"))

response = exa.get_contents(
ids=["firecrawl.dev"],
# subpage_target= // specific subpage targets if you have any
extras={
"links": 5
},
)

print(response)

21 changes: 21 additions & 0 deletions examples/livecrawl_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import os
from exa_py import Exa

EXA_API_KEY = os.environ.get('EXA_API_KEY')

if not EXA_API_KEY:
raise ValueError("EXA_API_KEY environment variable not set!")

exa = Exa(EXA_API_KEY)

response = exa.search_and_contents("the canonical url for the homepage of tesla",
num_results=1,
livecrawl="always"
)
print(response)

norm_response = exa.search_and_contents("the canonical url for the homepage of tesla", num_results=1)

print(norm_response)

assert(response.results[0].text != norm_response.results[0].text)
35 changes: 24 additions & 11 deletions examples/subpages_example.py
Original file line number Diff line number Diff line change
@@ -6,18 +6,31 @@

response = exa.get_contents(
ids=["firecrawl.dev"],
subpages=4,
# subpage_target= // specific subpage targets if you have any
subpages=2,
livecrawl="always"
)

print(response)


print("SEARCH AND CONTENTS SUBPAGES")

response = exa.search_and_contents(
"canonical url of tesla motors",
subpages=2,
num_results=1,
)

print(response)

print("FIND SIMILAR AND CONTENTS SUBPAGES")

response = exa.find_similar_and_contents(
"tesla.com",
subpages=2,
text=True,
# livecrawl="always"
num_results=1,
)

# Print the results
for result in response.results:
print("=" * 80)
print(f"Main URL: {result.url}")
print(f"Title: {result.title}")
print("-" * 40)
print("Text snippet:")
print(f"{result.text[:500]}...") # Print first 500 characters of text
print("\n")
print(response)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -2,7 +2,7 @@

setup(
name="exa_py",
version="1.4.1-beta",
version="1.5.0",
description="Python SDK for Exa API.",
long_description_content_type="text/markdown",
long_description=open("README.md").read(),