Skip to content

Commit

Permalink
exclude None messages; skip unavailable channels
Browse files Browse the repository at this point in the history
  • Loading branch information
p-phung committed Feb 21, 2023
1 parent da9ad2e commit 9203f4e
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions pipeline/src/pipeline/get_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,8 @@ async def scrape_messages(telegram_client, telegram_channels, start_date, end_da
wait_time = 5
):
reply = None
df_messages = arrange_telegram_messages(df_messages, message, reply, channel)
if message is not None:
df_messages = arrange_telegram_messages(df_messages, message, reply, channel)
if channel_entity.broadcast and message.post and message.replies:
df_replies = pd.DataFrame()
try:
Expand All @@ -385,9 +386,11 @@ async def scrape_messages(telegram_client, telegram_channels, start_date, end_da
reply_to=message.id,
wait_time = 5
):
df_replies = arrange_telegram_messages(df_replies, message, reply, channel)
if reply is not None:
df_replies = arrange_telegram_messages(df_replies, message, reply, channel)
time.sleep(5)
df_messages = df_messages.append(df_replies, ignore_index=True)
if len(df_replies) > 0:
df_messages = df_messages.append(df_replies, ignore_index=True)
except Exception as e:
logging.info(f"getting replies for {message.id} failed: {e}")
time_duration = time.time() - time_start
Expand All @@ -405,7 +408,7 @@ async def scrape_messages(telegram_client, telegram_channels, start_date, end_da
time.sleep(10)
continue
except Exception as e:
logging.error(f"in getting in telegram channel {channel}: {e}")
break
logging.info(f"Unable to get in telegram channel {channel}: {e}")
# break

return df_messages, df_member_counts

0 comments on commit 9203f4e

Please sign in to comment.