diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml index 5eb0e9f..9efc3d2 100644 --- a/.github/workflows/unittest.yml +++ b/.github/workflows/unittest.yml @@ -28,10 +28,10 @@ jobs: python-version: ${{ matrix.python-version }} - name: Get setuptools Unix if: ${{ matrix.os != 'windows-latest' }} - run: pip install --upgrade pip setuptools codecov + run: python -m pip install --upgrade pip setuptools codecov - name: Get setuptools Windows if: ${{ matrix.os == 'windows-latest' }} - run: pip install --upgrade --user pip setuptools codecov + run: python -m pip install --upgrade pip setuptools codecov - name: Install dependencies run: make dev_install - name: Test with pytest diff --git a/dev-requirements.txt b/dev-requirements.txt index c0d7c62..75f5d26 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,7 +2,7 @@ black==22.6.0 isort==5.10.1 mock==4.0.3 mypy==0.971 -pre-commit==2.20.0 +pre-commit==2.21.0 pylint==2.14.5 pytest==7.2.1 pytest-cov==3.0.0 diff --git a/media_downloader.py b/media_downloader.py index 0cdfc9b..ee5f3c8 100644 --- a/media_downloader.py +++ b/media_downloader.py @@ -30,7 +30,7 @@ APPLICATION_NAME = "media_downloader" app = Application(CONFIG_NAME, DATA_FILE_NAME, APPLICATION_NAME) -RETRY_TIME_OUT = 60 +RETRY_TIME_OUT = 5 logging.getLogger("pyrogram.session.session").addFilter(LogFilter()) logging.getLogger("pyrogram.client").addFilter(LogFilter()) @@ -62,6 +62,24 @@ def _check_download_finish(media_size: int, download_path: str, message_id: int) raise TypeError("Media downloaded with wrong size") +def _check_timeout(retry: int, message_id: int): + """Check if message download timeout, then add message id into failed_ids + + Parameters + ---------- + retry: int + Retry download message times + + message_id: int + Try to download message 's id + + """ + if retry == 2: + app.failed_ids.append(message_id) + return True + return False + + def _validate_title(title: str): """Fix if title validation fails @@ -173,24 +191,32 @@ async def _get_media_meta( else: file_name = getattr(media_obj, "file_name", None) caption = getattr(message, "caption", None) + + file_name_suffix = "" + if not file_name: + if message.photo: + file_format = "jpg" + file_name_suffix = f".{file_format}" + if caption: caption = _validate_title(caption) app.set_caption_name(app.chat_id, message.media_group_id, caption) else: caption = app.get_caption_name(app.chat_id, message.media_group_id) - gen_file_name = app.get_file_name(message.id, file_name, caption) + if not file_name and message.photo: + file_name = f"{message.photo.file_unique_id}" - if not file_name: - if message.photo: - file_format = "jpg" - gen_file_name = f"{gen_file_name}.{file_format}" + gen_file_name = ( + app.get_file_name(message.id, file_name, caption) + file_name_suffix + ) file_save_path = app.get_file_save_path(_type, dirname, datetime_dir_name) file_name = os.path.join(file_save_path, gen_file_name) return file_name, file_format +# pylint: disable = R0915 async def download_media( client: pyrogram.client.Client, message: pyrogram.types.Message, @@ -228,60 +254,78 @@ async def download_media( int Current message id. """ + # pylint: disable = R0912 file_name: str = "" + ui_file_name: str = "" task_start_time: float = time.time() - ui_file_name = file_name + media_size = 0 + _media = None + try: + if message.media is None: + return message.id + for _type in media_types: + _media = getattr(message, _type, None) + if _media is None: + continue + file_name, file_format = await _get_media_meta(message, _media, _type) + media_size = getattr(_media, "file_size", 0) + + if _can_download(_type, file_formats, file_format): + if _is_exist(file_name): + # TODO: check if the file download complete + # file_size = os.path.getsize(file_name) + # media_size = getattr(_media, 'file_size') + # if media_size is not None and file_size != media_size: + + # FIXME: if exist and not empty file skip + logger.info( + "{} already download,download skipped.\n", + file_name, + ) + + return message.id + + ui_file_name = file_name + if app.hide_file_name: + ui_file_name = ( + os.path.dirname(file_name) + + "/****" + + os.path.splitext(file_name)[-1] + ) + break + except Exception as e: + logger.error( + "Message[{}]: could not be downloaded due to following exception:\n[{}].", + message.id, + e, + exc_info=True, + ) + app.failed_ids.append(message.id) + return message.id + + if _media is None: + return message.id for retry in range(3): try: - if message.media is None: - return message.id - for _type in media_types: - _media = getattr(message, _type, None) - if _media is None: - continue - file_name, file_format = await _get_media_meta(message, _media, _type) - - if _can_download(_type, file_formats, file_format): - if _is_exist(file_name): - # TODO: check if the file download complete - # file_size = os.path.getsize(file_name) - # media_size = getattr(_media, 'file_size') - # if media_size is not None and file_size != media_size: - - # FIXME: if exist and not empty file skip - logger.info( - "{} already download,download skipped.\n", - file_name, - ) - - break - - if app.hide_file_name: - ui_file_name = ( - os.path.dirname(file_name) - + "/****" - + os.path.splitext(file_name)[-1] - ) - - download_path = await client.download_media( - message, - file_name=file_name, - progress=lambda down_byte, total_byte: update_download_status( - message.id, - down_byte, - total_byte, - ui_file_name, - task_start_time, - ), - ) - if download_path and isinstance(download_path, str): - media_size = getattr(_media, "file_size", 0) - # TODO: if not exist file size or media - _check_download_finish(media_size, download_path, message.id) - await app.upload_file(file_name) + download_path = await client.download_media( + message, + file_name=file_name, + progress=lambda down_byte, total_byte: update_download_status( + message.id, + down_byte, + total_byte, + ui_file_name, + task_start_time, + ), + ) + + if download_path and isinstance(download_path, str): + # TODO: if not exist file size or media + _check_download_finish(media_size, download_path, message.id) + await app.upload_file(file_name) - app.downloaded_ids.append(message.id) + app.downloaded_ids.append(message.id) break except pyrogram.errors.exceptions.bad_request_400.BadRequest: logger.warning( @@ -292,13 +336,16 @@ async def download_media( chat_id=message.chat.id, # type: ignore message_ids=message.id, ) - if retry == 2: + if _check_timeout(retry, message.id): # pylint: disable = C0301 logger.error( "Message[{}]: file reference expired for 3 retries, download skipped.", message.id, ) - app.failed_ids.append(message.id) + except pyrogram.errors.exceptions.flood_420.FloodWait as wait_err: + await asyncio.sleep(wait_err.value) + logger.warning("Message[{}]: FlowWait {}", message.id, wait_err.value) + _check_timeout(retry, message.id) except TypeError: # pylint: disable = C0301 logger.warning( @@ -306,12 +353,11 @@ async def download_media( message.id, ) await asyncio.sleep(RETRY_TIME_OUT) - if retry == 2: + if _check_timeout(retry, message.id): logger.error( "Message[{}]: Timing out after 3 reties, download skipped.", message.id, ) - app.failed_ids.append(message.id) except Exception as e: # pylint: disable = C0301 logger.error( @@ -474,7 +520,8 @@ def main(): app.update_config() -if __name__ == "__main__": +def exec_main(): + """main""" app.pre_run() print_meta(logger) main() @@ -484,3 +531,7 @@ def main(): app.total_download_task, app.cloud_drive_config.total_upload_success_file_count, ) + + +if __name__ == "__main__": + exec_main() diff --git a/module/cloud_drive.py b/module/cloud_drive.py index 6e3a476..1f1c080 100644 --- a/module/cloud_drive.py +++ b/module/cloud_drive.py @@ -84,9 +84,12 @@ async def rclone_upload_file( ): """Use Rclone upload file""" try: - remote_dir = drive_config.remote_dir + os.path.dirname( - local_file_path - ).removeprefix(save_path).replace("\\", "/") + remote_dir = ( + drive_config.remote_dir + + "/" + + os.path.dirname(local_file_path).replace(save_path, "") + + "/" + ).replace("\\", "/") if not drive_config.dir_cache.get(remote_dir): CloudDrive.rclone_mkdir(drive_config, remote_dir) @@ -134,16 +137,15 @@ async def aligo_upload_file( try: remote_dir = ( drive_config.remote_dir - + os.path.dirname(local_file_path) - .removeprefix(save_path) - .replace("\\", "/") + "/" - ) + + os.path.dirname(local_file_path).replace(save_path, "") + + "/" + ).replace("\\", "/") if not drive_config.dir_cache.get(remote_dir): CloudDrive.aligo_mkdir(drive_config, remote_dir) - drive_config.dir_cache[ - remote_dir - ] = drive_config.aligo.get_folder_by_path(remote_dir).file_id + aligo_dir = drive_config.aligo.get_folder_by_path(remote_dir) + if aligo_dir: + drive_config.dir_cache[remote_dir] = aligo_dir.file_id zip_file_path: str = "" file_paths = [] diff --git a/module/web.py b/module/web.py index 1b6180c..7d0cfc6 100644 --- a/module/web.py +++ b/module/web.py @@ -63,15 +63,12 @@ def update_download_status( download_speed = max(download_speed, 0) - _download_result[message_id] = { - "down_byte": down_byte, - "total_size": total_size, - "file_name": file_name, - "start_time": start_time, - "end_time": end_time, - "download_speed": download_speed, - "each_second_total_download": each_second_total_download, - } + _download_result[message_id]["down_byte"] = down_byte + _download_result[message_id]["end_time"] = end_time + _download_result[message_id]["download_speed"] = download_speed + _download_result[message_id][ + "each_second_total_download" + ] = each_second_total_download else: each_second_total_download = down_byte _download_result[message_id] = { @@ -90,6 +87,7 @@ def update_download_status( _total_download_speed = int( _total_download_size / (cur_time - _last_download_time) ) + _total_download_speed = max(_total_download_speed, 0) _total_download_size = 0 _last_download_time = cur_time @@ -139,7 +137,7 @@ def get_download_list(): + '" ,"download_progress":"' ) result += ( - str(round(value["down_byte"] / value["total_size"] * 100, 2)) + f'{round(value["down_byte"] / value["total_size"] * 100, 1)}' + '" ,"download_speed":"' + download_speed + '" ,"save_path":"' diff --git a/tests/test_media_downloader.py b/tests/test_media_downloader.py index fd3f552..a79da63 100644 --- a/tests/test_media_downloader.py +++ b/tests/test_media_downloader.py @@ -15,6 +15,7 @@ app, begin_import, download_media, + exec_main, main, process_messages, ) @@ -42,6 +43,8 @@ def os_remove(_: str): def is_exist(file: str): if os.path.basename(file).find("311 - sucess_exist_down.mp4") != -1: return True + elif os.path.basename(file).find("422 - exception.mov") != -1: + raise Exception return False @@ -75,6 +78,10 @@ def raise_keyboard_interrupt(): raise KeyboardInterrupt +def raise_exception(): + raise Exception + + class Chat: def __init__(self, chat_id, chat_title): self.id = chat_id @@ -311,6 +318,10 @@ async def download_media(self, *args, **kwargs): raise pyrogram.errors.exceptions.unauthorized_401.Unauthorized elif mock_message.id == 11: raise TypeError + elif mock_message.id == 420: + raise pyrogram.errors.exceptions.flood_420.FloodWait(value=420) + elif mock_message.id == 421: + raise Exception return kwargs["file_name"] @@ -363,7 +374,7 @@ def test_get_media_meta(self): ) self.assertEqual( ( - platform_generic_path("/root/project/test2/2019_08/2.jpg"), + platform_generic_path("/root/project/test2/2019_08/2 - ADAVKJYIFV.jpg"), "jpg", ), result, @@ -386,7 +397,7 @@ def test_get_media_meta(self): self.assertEqual( ( platform_generic_path( - "/root/project/test2/2019_08/2 - #home #book.jpg" + "/root/project/test2/2019_08/2 - #home #book - ADAVKJYIFV.jpg" ), "jpg", ), @@ -561,6 +572,7 @@ def test_get_media_meta(self): @mock.patch("media_downloader.asyncio.sleep", return_value=None) @mock.patch("media_downloader.logger") @mock.patch("media_downloader.RETRY_TIME_OUT", new=1) + @mock.patch("media_downloader._is_exist", new=is_exist) def test_download_media(self, mock_logger, patched_time_sleep): client = MockClient() @@ -680,6 +692,58 @@ def test_download_media(self, mock_logger, patched_time_sleep): "Message[{}]: Timing out after 3 reties, download skipped.", 11 ) + # Test FloodWait 420 + message_7 = MockMessage( + id=420, + media=True, + video=MockVideo( + file_name="sample_video.mov", + mime_type="video/mov", + ), + ) + result = self.loop.run_until_complete( + async_download_media( + client, message_7, ["video", "photo"], {"video": ["all"]} + ) + ) + self.assertEqual(420, result) + mock_logger.warning.assert_called_with("Message[{}]: FlowWait {}", 420, 420) + self.assertEqual(app.failed_ids.count(420), 1) + + # Test other Exception + message_8 = MockMessage( + id=421, + media=True, + video=MockVideo( + file_name="sample_video.mov", + mime_type="video/mov", + ), + ) + result = self.loop.run_until_complete( + async_download_media( + client, message_8, ["video", "photo"], {"video": ["all"]} + ) + ) + self.assertEqual(421, result) + self.assertEqual(app.failed_ids.count(421), 1) + + # Test other Exception + message_9 = MockMessage( + id=422, + media=True, + video=MockVideo( + file_name="422 - exception.mov", + mime_type="video/mov", + ), + ) + result = self.loop.run_until_complete( + async_download_media( + client, message_9, ["video", "photo"], {"video": ["all"]} + ) + ) + self.assertEqual(422, result) + self.assertEqual(app.failed_ids.count(422), 1) + @mock.patch("media_downloader.pyrogram.Client", new=MockClient) @mock.patch("media_downloader.process_messages", new=mock_process_message) def test_begin_import(self): @@ -852,6 +916,23 @@ def test_keyboard_interrupt(self): self.assertEqual(app.ids_to_retry, [1, 2, 3, 4]) + @mock.patch("media_downloader.check_for_updates", new=raise_exception) + @mock.patch("media_downloader.pyrogram.Client", new=MockClient) + @mock.patch("media_downloader.process_messages", new=mock_process_message) + @mock.patch("media_downloader.RETRY_TIME_OUT", new=1) + @mock.patch("media_downloader.begin_import", new=async_begin_import) + def test_other_exception(self): + rest_app(MOCK_CONF) + app.failed_ids.append(3) + app.failed_ids.append(4) + + try: + exec_main() + except: + pass + + self.assertEqual(app.ids_to_retry, [1, 2, 3, 4]) + @classmethod def tearDownClass(cls): cls.loop.close()