diff --git a/apps/js-sdk/firecrawl/package.json b/apps/js-sdk/firecrawl/package.json index 681404372..6fdf196a9 100644 --- a/apps/js-sdk/firecrawl/package.json +++ b/apps/js-sdk/firecrawl/package.json @@ -1,6 +1,6 @@ { "name": "@mendable/firecrawl-js", - "version": "1.11.2", + "version": "1.11.3", "description": "JavaScript SDK for Firecrawl API", "main": "dist/index.js", "types": "dist/index.d.ts", diff --git a/apps/js-sdk/firecrawl/src/index.ts b/apps/js-sdk/firecrawl/src/index.ts index 60f485d07..474eea830 100644 --- a/apps/js-sdk/firecrawl/src/index.ts +++ b/apps/js-sdk/firecrawl/src/index.ts @@ -565,23 +565,39 @@ export default class FirecrawlApp { if ("data" in statusData) { let data = statusData.data; while (typeof statusData === 'object' && 'next' in statusData) { + if (data.length === 0) { + break + } statusData = (await this.getRequest(statusData.next, headers)).data; data = data.concat(statusData.data); } allData = data; } } - return ({ + + let resp: CrawlStatusResponse | ErrorResponse = { success: response.data.success, status: response.data.status, total: response.data.total, completed: response.data.completed, creditsUsed: response.data.creditsUsed, expiresAt: new Date(response.data.expiresAt), - next: response.data.next, - data: allData, - error: response.data.error, - }) + data: allData + } + + if (!response.data.success && response.data.error) { + resp = { + ...resp, + success: false, + error: response.data.error + } as ErrorResponse; + } + + if (response.data.next) { + (resp as CrawlStatusResponse).next = response.data.next; + } + + return resp; } else { this.handleError(response, "check crawl status"); } @@ -799,23 +815,39 @@ export default class FirecrawlApp { if ("data" in statusData) { let data = statusData.data; while (typeof statusData === 'object' && 'next' in statusData) { + if (data.length === 0) { + break + } statusData = (await this.getRequest(statusData.next, headers)).data; data = data.concat(statusData.data); } allData = data; } } - return ({ + + let resp: BatchScrapeStatusResponse | ErrorResponse = { success: response.data.success, status: response.data.status, total: response.data.total, completed: response.data.completed, creditsUsed: response.data.creditsUsed, expiresAt: new Date(response.data.expiresAt), - next: response.data.next, - data: allData, - error: response.data.error, - }) + data: allData + } + + if (!response.data.success && response.data.error) { + resp = { + ...resp, + success: false, + error: response.data.error + } as ErrorResponse; + } + + if (response.data.next) { + (resp as BatchScrapeStatusResponse).next = response.data.next; + } + + return resp; } else { this.handleError(response, "check batch scrape status"); } @@ -971,6 +1003,9 @@ export default class FirecrawlApp { if ("data" in statusData) { let data = statusData.data; while (typeof statusData === 'object' && 'next' in statusData) { + if (data.length === 0) { + break + } statusResponse = await this.getRequest(statusData.next, headers); statusData = statusResponse.data; data = data.concat(statusData.data); diff --git a/apps/python-sdk/firecrawl/__init__.py b/apps/python-sdk/firecrawl/__init__.py index d4d246e9e..5528b3b2a 100644 --- a/apps/python-sdk/firecrawl/__init__.py +++ b/apps/python-sdk/firecrawl/__init__.py @@ -13,7 +13,7 @@ from .firecrawl import FirecrawlApp # noqa -__version__ = "1.8.0" +__version__ = "1.8.1" # Define the logger for the Firecrawl project logger: logging.Logger = logging.getLogger("firecrawl") diff --git a/apps/python-sdk/firecrawl/firecrawl.py b/apps/python-sdk/firecrawl/firecrawl.py index 271a13f06..d32164052 100644 --- a/apps/python-sdk/firecrawl/firecrawl.py +++ b/apps/python-sdk/firecrawl/firecrawl.py @@ -250,6 +250,8 @@ def check_crawl_status(self, id: str) -> Any: if 'data' in status_data: data = status_data['data'] while 'next' in status_data: + if len(status_data['data']) == 0: + break next_url = status_data.get('next') if not next_url: logger.warning("Expected 'next' URL is missing.") @@ -266,17 +268,25 @@ def check_crawl_status(self, id: str) -> Any: logger.error(f"Error during pagination request: {e}") break status_data['data'] = data - - return { - 'success': True, + + response = { 'status': status_data.get('status'), 'total': status_data.get('total'), 'completed': status_data.get('completed'), 'creditsUsed': status_data.get('creditsUsed'), 'expiresAt': status_data.get('expiresAt'), - 'data': status_data.get('data'), - 'error': status_data.get('error'), - 'next': status_data.get('next', None) + 'data': status_data.get('data') + } + + if 'error' in status_data: + response['error'] = status_data['error'] + + if 'next' in status_data: + response['next'] = status_data['next'] + + return { + 'success': False if 'error' in status_data else True, + **response } else: self._handle_error(response, 'check crawl status') @@ -459,6 +469,8 @@ def check_batch_scrape_status(self, id: str) -> Any: if 'data' in status_data: data = status_data['data'] while 'next' in status_data: + if len(status_data['data']) == 0: + break next_url = status_data.get('next') if not next_url: logger.warning("Expected 'next' URL is missing.") @@ -476,16 +488,24 @@ def check_batch_scrape_status(self, id: str) -> Any: break status_data['data'] = data - return { - 'success': True, + response = { 'status': status_data.get('status'), 'total': status_data.get('total'), 'completed': status_data.get('completed'), 'creditsUsed': status_data.get('creditsUsed'), 'expiresAt': status_data.get('expiresAt'), - 'data': status_data.get('data'), - 'error': status_data.get('error'), - 'next': status_data.get('next', None) + 'data': status_data.get('data') + } + + if 'error' in status_data: + response['error'] = status_data['error'] + + if 'next' in status_data: + response['next'] = status_data['next'] + + return { + 'success': False if 'error' in status_data else True, + **response } else: self._handle_error(response, 'check batch scrape status') @@ -669,6 +689,8 @@ def _monitor_job_status(self, id: str, headers: Dict[str, str], poll_interval: i if 'data' in status_data: data = status_data['data'] while 'next' in status_data: + if len(status_data['data']) == 0: + break status_response = self._get_request(status_data['next'], headers) status_data = status_response.json() data.extend(status_data.get('data', []))