You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
2024-10-08 21:50:36 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 64a966e8000000000f00e70c
2024-10-08 21:50:36 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 64a55f9d00000000310091ba
2024-10-08 21:50:40 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 64964f3a000000000800dfa8
2024-10-08 21:50:40 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 6495029b00000000140273e5
2024-10-08 21:50:40 asyncio WARNING (proactor_events.py:353) - socket.send() raised exception.
2024-10-08 21:50:40 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 64925f880000000013001496
2024-10-08 21:50:40 asyncio WARNING (proactor_events.py:353) - socket.send() raised exception.
2024-10-08 21:50:44 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 648fcd3b0000000013014332
2024-10-08 21:50:44 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 648b2b7e000000000703a035
Traceback (most recent call last):
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\anyio_core_tasks.py", line 115, in fail_after
yield cancel_scope
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_backends\anyio.py", line 114, in connect_tcp
stream: anyio.abc.ByteStream = await anyio.connect_tcp(
^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\anyio_core_sockets.py", line 219, in connect_tcp
await event.wait()
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\anyio_backends_asyncio.py", line 1662, in wait
await self._event.wait()
File "C:\Users\HUAWEI\AppData\Local\Programs\Python\Python312\Lib\asyncio\locks.py", line 212, in wait
await fut
asyncio.exceptions.CancelledError: Cancelled by cancel scope 1c4591ead50
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_exceptions.py", line 10, in map_exceptions
yield
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_backends\anyio.py", line 113, in connect_tcp
with anyio.fail_after(timeout):
File "C:\Users\HUAWEI\AppData\Local\Programs\Python\Python312\Lib\contextlib.py", line 158, in exit
self.gen.throw(value)
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\anyio_core_tasks.py", line 118, in fail_after
raise TimeoutError
TimeoutError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_transports\default.py", line 60, in map_httpcore_exceptions
yield
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_transports\default.py", line 353, in handle_async_request
resp = await self._pool.handle_async_request(req)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_async\connection_pool.py", line 262, in handle_async_request
raise exc
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_async\connection_pool.py", line 245, in handle_async_request
response = await connection.handle_async_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_async\connection.py", line 92, in handle_async_request
raise exc
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_async\connection.py", line 69, in handle_async_request
stream = await self._connect(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_async\connection.py", line 117, in _connect
stream = await self._network_backend.connect_tcp(**kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_backends\auto.py", line 31, in connect_tcp
return await self._backend.connect_tcp(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_backends\anyio.py", line 112, in connect_tcp
with map_exceptions(exc_map):
File "C:\Users\HUAWEI\AppData\Local\Programs\Python\Python312\Lib\contextlib.py", line 158, in exit
self.gen.throw(value)
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_exceptions.py", line 14, in map_exceptions
raise to_exc(exc) from exc
httpcore.ConnectTimeout
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\tenacity_asyncio.py", line 50, in call
result = await fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\client.py", line 86, in request
response = await client.request(
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_client.py", line 1530, in request
return await self.send(request, auth=auth, follow_redirects=follow_redirects)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_client.py", line 1617, in send
response = await self._send_handling_auth(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_client.py", line 1645, in _send_handling_auth
response = await self._send_handling_redirects(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_client.py", line 1682, in _send_handling_redirects
response = await self._send_single_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_client.py", line 1719, in _send_single_request
response = await transport.handle_async_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_transports\default.py", line 352, in handle_async_request
with map_httpcore_exceptions():
File "C:\Users\HUAWEI\AppData\Local\Programs\Python\Python312\Lib\contextlib.py", line 158, in exit
self.gen.throw(value)
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_transports\default.py", line 77, in map_httpcore_exceptions
raise mapped_exc(message) from exc
httpx.ConnectTimeout
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\main.py", line 58, in
asyncio.get_event_loop().run_until_complete(main())
File "C:\Users\HUAWEI\AppData\Local\Programs\Python\Python312\Lib\asyncio\base_events.py", line 687, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\main.py", line 47, in main
await crawler.start()
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\core.py", line 84, in start
await self.get_creators_and_notes()
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\core.py", line 160, in get_creators_and_notes
await self.batch_get_note_comments(note_ids)
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\core.py", line 252, in batch_get_note_comments
await asyncio.gather(*task_list)
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\core.py", line 258, in get_comments
await self.xhs_client.get_note_all_comments(
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\client.py", line 288, in get_note_all_comments
comments_res = await self.get_note_comments(note_id, comments_cursor)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\client.py", line 249, in get_note_comments
return await self.get(uri, params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\client.py", line 116, in get
return await self.request(method="GET", url=f"{self.host}{final_uri}", headers=headers)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\tenacity_asyncio.py", line 88, in async_wrapped
return await fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\tenacity_asyncio.py", line 47, in call
do = self.iter(retry_state=retry_state)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\tenacity_init.py", line 326, in iter
raise retry_exc from fut.exception()
tenacity.RetryError: RetryError[<Future at 0x1c455019190 state=finished raised ConnectTimeout>]
The text was updated successfully, but these errors were encountered:
2024-10-08 21:50:36 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 64a966e8000000000f00e70c
2024-10-08 21:50:36 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 64a55f9d00000000310091ba
2024-10-08 21:50:40 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 64964f3a000000000800dfa8
2024-10-08 21:50:40 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 6495029b00000000140273e5
2024-10-08 21:50:40 asyncio WARNING (proactor_events.py:353) - socket.send() raised exception.
2024-10-08 21:50:40 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 64925f880000000013001496
2024-10-08 21:50:40 asyncio WARNING (proactor_events.py:353) - socket.send() raised exception.
2024-10-08 21:50:44 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 648fcd3b0000000013014332
2024-10-08 21:50:44 MediaCrawler INFO (core.py:257) - [XiaoHongShuCrawler.get_comments] Begin get note id comments 648b2b7e000000000703a035
Traceback (most recent call last):
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\anyio_core_tasks.py", line 115, in fail_after
yield cancel_scope
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_backends\anyio.py", line 114, in connect_tcp
stream: anyio.abc.ByteStream = await anyio.connect_tcp(
^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\anyio_core_sockets.py", line 219, in connect_tcp
await event.wait()
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\anyio_backends_asyncio.py", line 1662, in wait
await self._event.wait()
File "C:\Users\HUAWEI\AppData\Local\Programs\Python\Python312\Lib\asyncio\locks.py", line 212, in wait
await fut
asyncio.exceptions.CancelledError: Cancelled by cancel scope 1c4591ead50
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_exceptions.py", line 10, in map_exceptions
yield
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_backends\anyio.py", line 113, in connect_tcp
with anyio.fail_after(timeout):
File "C:\Users\HUAWEI\AppData\Local\Programs\Python\Python312\Lib\contextlib.py", line 158, in exit
self.gen.throw(value)
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\anyio_core_tasks.py", line 118, in fail_after
raise TimeoutError
TimeoutError
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_transports\default.py", line 60, in map_httpcore_exceptions
yield
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_transports\default.py", line 353, in handle_async_request
resp = await self._pool.handle_async_request(req)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_async\connection_pool.py", line 262, in handle_async_request
raise exc
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_async\connection_pool.py", line 245, in handle_async_request
response = await connection.handle_async_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_async\connection.py", line 92, in handle_async_request
raise exc
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_async\connection.py", line 69, in handle_async_request
stream = await self._connect(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_async\connection.py", line 117, in _connect
stream = await self._network_backend.connect_tcp(**kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_backends\auto.py", line 31, in connect_tcp
return await self._backend.connect_tcp(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_backends\anyio.py", line 112, in connect_tcp
with map_exceptions(exc_map):
File "C:\Users\HUAWEI\AppData\Local\Programs\Python\Python312\Lib\contextlib.py", line 158, in exit
self.gen.throw(value)
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpcore_exceptions.py", line 14, in map_exceptions
raise to_exc(exc) from exc
httpcore.ConnectTimeout
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\tenacity_asyncio.py", line 50, in call
result = await fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\client.py", line 86, in request
response = await client.request(
^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_client.py", line 1530, in request
return await self.send(request, auth=auth, follow_redirects=follow_redirects)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_client.py", line 1617, in send
response = await self._send_handling_auth(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_client.py", line 1645, in _send_handling_auth
response = await self._send_handling_redirects(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_client.py", line 1682, in _send_handling_redirects
response = await self._send_single_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_client.py", line 1719, in _send_single_request
response = await transport.handle_async_request(request)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_transports\default.py", line 352, in handle_async_request
with map_httpcore_exceptions():
File "C:\Users\HUAWEI\AppData\Local\Programs\Python\Python312\Lib\contextlib.py", line 158, in exit
self.gen.throw(value)
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\httpx_transports\default.py", line 77, in map_httpcore_exceptions
raise mapped_exc(message) from exc
httpx.ConnectTimeout
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\main.py", line 58, in
asyncio.get_event_loop().run_until_complete(main())
File "C:\Users\HUAWEI\AppData\Local\Programs\Python\Python312\Lib\asyncio\base_events.py", line 687, in run_until_complete
return future.result()
^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\main.py", line 47, in main
await crawler.start()
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\core.py", line 84, in start
await self.get_creators_and_notes()
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\core.py", line 160, in get_creators_and_notes
await self.batch_get_note_comments(note_ids)
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\core.py", line 252, in batch_get_note_comments
await asyncio.gather(*task_list)
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\core.py", line 258, in get_comments
await self.xhs_client.get_note_all_comments(
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\client.py", line 288, in get_note_all_comments
comments_res = await self.get_note_comments(note_id, comments_cursor)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\client.py", line 249, in get_note_comments
return await self.get(uri, params)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\media_platform\xhs\client.py", line 116, in get
return await self.request(method="GET", url=f"{self.host}{final_uri}", headers=headers)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\tenacity_asyncio.py", line 88, in async_wrapped
return await fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\tenacity_asyncio.py", line 47, in call
do = self.iter(retry_state=retry_state)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\HUAWEI\OneDrive\文档\GitHub\MediaCrawler\venv\Lib\site-packages\tenacity_init.py", line 326, in iter
raise retry_exc from fut.exception()
tenacity.RetryError: RetryError[<Future at 0x1c455019190 state=finished raised ConnectTimeout>]
The text was updated successfully, but these errors were encountered: