Skip to content

Commit

Permalink
feat(clients): add exponential backoff
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Dec 5, 2024
1 parent 12fd0f9 commit 0327151
Show file tree
Hide file tree
Showing 15 changed files with 375 additions and 192 deletions.
3 changes: 2 additions & 1 deletion book/src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@
- Cron Jobs
- Subscriptions
- AI Scraping and Event Driven Actions
- Blacklisting and Budgeting Depth
- Blacklisting and Budgeting Depth
- Exponential Backoff
2 changes: 1 addition & 1 deletion book/src/rust/getting-started.md
Original file line number Diff line number Diff line change
Expand Up @@ -285,4 +285,4 @@ spider.crawl_url(url, Some(crawler_params), true, "application/jsonl", None::<fn

## Error Handling

The SDK handles errors returned by the Spider Cloud API and raises appropriate exceptions. If an error occurs during a request, it will be propagated to the caller with a descriptive error message.
The SDK handles errors returned by the Spider Cloud API and raises appropriate exceptions. If an error occurs during a request, it will be propagated to the caller with a descriptive error message. By default request use a Exponential Backoff to retry as needed.
28 changes: 26 additions & 2 deletions cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions cli/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "spider-cloud-cli"
version = "0.1.23"
version = "0.1.24"
edition = "2021"
authors = [ "j-mendez <[email protected]>"]
description = "The Spider Cloud CLI for web crawling and scraping"
Expand All @@ -11,10 +11,10 @@ categories = ["web-programming"]
include = ["src/*", "../../LICENSE", "README.md"]

[dependencies]
clap = { version = "4.5.13", features = ["derive"]}
clap = { version = "4", features = ["derive"]}
reqwest = { version = "0.12", features = ["json", "stream"] }
tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
spider-client = { path = "../rust", version = "0.1" }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
serde = { version = "1", features = ["derive"] }
serde_json = "1"
keyring = { version = "3", features = ["apple-native", "windows-native", "sync-secret-service"] }
5 changes: 4 additions & 1 deletion cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,10 @@ async fn main() {
params.limit = Some(limit);
}
println!("Labeling data from URL: {}", url);
match spider.label(&url, Some(params), false, "application/json").await {
match spider
.label(&url, Some(params), false, "application/json")
.await
{
Ok(data) => println!("{}", json!(data)),
Err(e) => eprintln!("Error labeling data: {:?}", e),
}
Expand Down
52 changes: 30 additions & 22 deletions javascript/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 8 additions & 5 deletions javascript/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@spider-cloud/spider-client",
"version": "0.1.23",
"version": "0.1.24",
"description": "Isomorphic Javascript SDK for Spider Cloud services",
"scripts": {
"test": "node --import tsx --test __tests__/*test.ts",
Expand All @@ -23,9 +23,12 @@
"author": "Jeff Mendez<[email protected]>",
"license": "MIT",
"devDependencies": {
"@types/node": "22.7.5",
"dotenv": "^16.4.5",
"tsx": "^4.19.1",
"typescript": "5.6.3"
"@types/node": "22.10.1",
"dotenv": "^16.4.7",
"tsx": "^4.19.2",
"typescript": "5.7.2"
},
"dependencies": {
"exponential-backoff": "^3.1.1"
}
}
36 changes: 23 additions & 13 deletions javascript/src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
} from "./config";
import { version } from "../package.json";
import { streamReader } from "./utils/stream-reader";
import { backOff } from "exponential-backoff";

/**
* Generic params for core request.
Expand Down Expand Up @@ -56,12 +57,15 @@ export class Spider {
jsonl?: boolean
) {
const headers = jsonl ? this.prepareHeadersJsonL : this.prepareHeaders;
const response = await fetch(
`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`,
const response = await backOff(
() =>
fetch(`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`, {
method: "POST",
headers: headers,
body: JSON.stringify(data),
}),
{
method: "POST",
headers: headers,
body: JSON.stringify(data),
numOfAttempts: 5,
}
);

Expand All @@ -82,11 +86,14 @@ export class Spider {
*/
private async _apiGet(endpoint: string) {
const headers = this.prepareHeaders;
const response = await fetch(
`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`,
const response = await backOff(
() =>
fetch(`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`, {
method: "GET",
headers: headers,
}),
{
method: "GET",
headers: headers,
numOfAttempts: 5,
}
);

Expand All @@ -104,11 +111,14 @@ export class Spider {
*/
private async _apiDelete(endpoint: string) {
const headers = this.prepareHeaders;
const response = await fetch(
`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`,
const response = await backOff(
() =>
fetch(`${APISchema["url"]}/${ApiVersion.V1}/${endpoint}`, {
method: "DELETE",
headers,
}),
{
method: "DELETE",
headers,
numOfAttempts: 5,
}
);

Expand Down
3 changes: 2 additions & 1 deletion python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ pytest-asyncio
python-dotenv
aiohttp
python-dotenv
ijson
ijson
tenacity
4 changes: 2 additions & 2 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ def read_file(fname):

setup(
name="spider-client",
version="0.1.23",
version="0.1.24",
url="https://github.com/spider-rs/spider-clients/tree/main/python",
author="Spider",
author_email="[email protected]",
description="Python SDK for Spider Cloud API",
packages=find_packages(),
install_requires=["requests", "ijson"],
install_requires=["requests", "ijson", "tenacity", "aiohttp"],
long_description=read_file("README.md"),
long_description_content_type="text/markdown",
classifiers=[
Expand Down
11 changes: 6 additions & 5 deletions python/spider/async_spider.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import os
import json
import logging
import os, tenacity, json, aiohttp, logging
from typing import Optional, Dict, Any, AsyncIterator, Callable
import aiohttp
from aiohttp import ClientSession, ClientResponse
from types import TracebackType
from typing import Type
Expand Down Expand Up @@ -35,6 +32,10 @@ async def __aexit__(
if self.session:
await self.session.close()

@tenacity.retry(
wait=tenacity.wait_exponential(multiplier=1, min=1, max=60),
stop=tenacity.stop_after_attempt(5)
)
async def _request(
self,
method: str,
Expand Down Expand Up @@ -430,7 +431,7 @@ def _prepare_headers(
return {
"Content-Type": content_type,
"Authorization": f"Bearer {self.api_key}",
"User-Agent": "AsyncSpider-Client/0.1.23",
"User-Agent": "AsyncSpider-Client/0.1.24",
}

async def _handle_error(self, response: ClientResponse, action: str) -> None:
Expand Down
Loading

0 comments on commit 0327151

Please sign in to comment.