diff --git a/README.md b/README.md index 3a471ec..44bf68f 100644 --- a/README.md +++ b/README.md @@ -14,6 +14,15 @@ [![codecov](https://codecov.io/gh/joweich/chat-miner/branch/main/graph/badge.svg?token=6EQF0YNGLK)](https://codecov.io/gh/joweich/chat-miner) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) +π +**English** +[Π ΡΡΡΠΊΠΈΠΉ][RU] + +[EN]:README.md +[RU]:README.ru.md + +----------------- + **chat-miner** provides lean parsers for every major platform transforming chats into pandas dataframes. Artistic visualizations allow you to explore your data and create artwork from your chats. diff --git a/README.ru.md b/README.ru.md new file mode 100644 index 0000000..7e1161e --- /dev/null +++ b/README.ru.md @@ -0,0 +1,157 @@ + + +----------------- + +# chat-miner: ΠΡΠ΅Π²ΡΠ°ΡΠΈΡΠ΅ ΡΠ²ΠΎΠΈ ΡΠ°ΡΡ Π² ΠΈΡΠΊΡΡΡΡΠ²ΠΎ! + +[![PyPI Version](https://img.shields.io/pypi/v/chat-miner.svg)](https://pypi.org/project/chat-miner/) +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) +[![Downloads](https://static.pepy.tech/badge/chat-miner/month)](https://pepy.tech/project/chat-miner) +[![codecov](https://codecov.io/gh/joweich/chat-miner/branch/main/graph/badge.svg?token=6EQF0YNGLK)](https://codecov.io/gh/joweich/chat-miner) +[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) + +π +[English][EN] +**Π ΡΡΡΠΊΠΈΠΉ** + +[EN]:README.md +[RU]:README.ru.md + +----------------- + +**chat-miner** ΠΏΡΠ΅Π΄ΠΎΡΡΠ°Π²Π»ΡΠ΅Ρ ΡΡΡΠ΅ΠΊΡΠΈΠ²Π½ΡΠ΅ ΠΏΠ°ΡΡΠ΅ΡΡ Π΄Π»Ρ Π»ΡΠ±ΠΎΠΉ ΠΊΡΡΠΏΠ½ΠΎΠΉ ΠΏΠ»Π°ΡΡΠΎΡΠΌΡ, ΠΏΡΠ΅Π΄ΡΡΠ°Π²Π»ΡΡΡΠΈΠ΅ ΡΠ°ΡΡ ΠΊΠ°ΠΊ pandas-Π΄Π°ΡΠ°ΡΡΠ΅ΠΉΠΌΡ. Π₯ΡΠ΄ΠΎΠΆΠ΅ΡΡΠ²Π΅Π½Π½Π°Ρ Π²ΠΈΠ·ΡΠ°Π»ΠΈΠ·Π°ΡΠΈΡ ΠΏΠΎΠ·Π²ΠΎΠ»ΡΠ΅Ρ Π²Π°ΠΌ ΠΈΡΡΠ»Π΅Π΄ΠΎΠ²Π°ΡΡ Π΄Π°Π½Π½ΡΠ΅ Π²Π°ΡΠΈΡ ΠΏΠ΅ΡΠ΅ΠΏΠΈΡΠΎΠΊ ΠΈ ΡΠΎΠ·Π΄Π°Π²Π°ΡΡ ΠΈΠ· Π½ΠΈΡ ΠΏΡΠΎΠΈΠ·Π²Π΅Π΄Π΅Π½ΠΈΡ ΠΈΡΠΊΡΡΡΡΠ²Π°. + + +## 1. Π£ΡΡΠ°Π½ΠΎΠ²ΠΊΠ° +ΠΠΎΡΠ»Π΅Π΄Π½ΠΈΠΉ Π²ΡΠΏΡΡΠΊ, Π²ΠΊΠ»ΡΡΠ°Ρ Π·Π°Π²ΠΈΡΠΈΠΌΠΎΡΡΠΈ, ΠΌΠΎΠΆΠ½ΠΎ ΡΡΡΠ°Π½ΠΎΠ²ΠΈΡΡ Ρ ΠΏΠΎΠΌΠΎΡΡΡ PyPI: +```sh +pip install chat-miner +``` +ΠΡΠ»ΠΈ Π²Ρ Π·Π°ΠΈΠ½ΡΠ΅ΡΠ΅ΡΠΎΠ²Π°Π½Ρ Π² ΡΡΠ°ΡΡΠΈΠΈ Π² ΠΏΡΠΎΠ΅ΠΊΡΠ΅, Π·Π°ΠΏΡΡΠΊΠ΅ ΡΠ²Π΅ΠΆΠ΅Π³ΠΎ ΠΈΡΡ ΠΎΠ΄Π½ΠΎΠ³ΠΎ ΠΊΠΎΠ΄Π° ΠΈΠ»ΠΈ ΠΏΡΠΎΡΡΠΎ Π»ΡΠ±ΠΈΡΠ΅ Π²ΡΠ΅ Π±ΠΈΠ»Π΄ΠΈΡΡ ΡΠ°ΠΌΠΈ: +```sh +git clone https://github.com/joweich/chat-miner.git +cd chat-miner +pip install -r requirements.txt +``` + +## 2. ΠΠΊΡΠΏΠΎΡΡΠΈΡΠΎΠ²Π°Π½ΠΈΠ΅ ΡΠ°ΡΠΎΠ² +ΠΠ·Π½Π°ΠΊΠΎΠΌΡΡΠ΅ΡΡ Ρ ΠΎΡΠΈΡΠΈΠ°Π»ΡΠ½ΡΠΌΠΈ ΡΡΠΊΠΎΠ²ΠΎΠ΄ΡΡΠ²Π°ΠΌΠΈ Π΄Π»Ρ [WhatsApp](https://faq.whatsapp.com/1180414079177245/), [Signal](https://github.com/carderne/signal-export), [Telegram](https://telegram.org/blog/export-and-more), [Facebook Messenger](https://www.facebook.com/help/messenger-app/713635396288741) ΠΈΠ»ΠΈ [Instagram Chats](https://help.instagram.com/181231772500920), ΡΡΠΎΠ±Ρ ΡΠ·Π½Π°ΡΡ, ΠΊΠ°ΠΊ ΡΠΊΡΠΏΠΎΡΡΠΈΡΠΎΠ²Π°ΡΡ ΡΠ°ΡΡ Π΄Π»Ρ Π²Π°ΡΠ΅ΠΉ ΠΏΠ»Π°ΡΡΠΎΡΠΌΡ. + +## 3. ΠΠ°ΡΡΠΈΠ½Π³ +ΠΠΎΠ΄ Π½ΠΈΠΆΠ΅ ΠΏΠΎΠΊΠ°Π·ΡΠ²Π°Π΅Ρ ΡΠ°Π±ΠΎΡΡ ΠΌΠΎΠ΄ΡΠ»Ρ ``WhatsAppParser``. +``SignalParser``, ``TelegramJsonParser``, ``FacebookMessengerParser`` ΠΈ ``InstagramJsonParser`` ΠΈΡΠΏΠΎΠ»ΡΠ·ΡΡΡΡΡ ΡΠ΅ΠΌ ΠΆΠ΅ ΠΎΠ±ΡΠ°Π·ΠΎΠΌ. +```python +from chatminer.chatparsers import WhatsAppParser + +parser = WhatsAppParser(FILEPATH) +parser.parse_file() +df = parser.parsed_messages.get_df() +``` +**ΠΠ½ΠΈΠΌΠ°Π½ΠΈΠ΅:** +Π Π·Π°Π²ΠΈΡΠΈΠΌΠΎΡΡΠΈ ΠΎΡ Π²Π°ΡΠ΅ΠΉ ΠΠ‘, python ΠΌΠΎΠΆΠ΅Ρ ΡΡΠ΅Π±ΠΎΠ²Π°ΡΡ ΠΊΠΎΠ½Π²Π΅ΡΡΠΈΡΠΎΠ²Π°Π½ΠΈΡ ΠΏΡΡΠΈ ΠΊ ΡΠ°ΠΉΠ»Ρ Π² "ΡΡΡΡΡ" ΡΡΡΠΎΠΊΡ. +```python +import os +FILEPATH = r"C:\Users\Username\chat.txt" # Windows +FILEPATH = "/home/username/chat.txt" # Unix +assert os.path.isfile(FILEPATH) + +``` + +## 4. ΠΠΈΠ·ΡΠ°Π»ΠΈΠ·Π°ΡΠΈΡ +```python +import chatminer.visualizations as vis +import matplotlib.pyplot as plt +``` +### 4.1 Π’Π΅ΠΏΠ»ΠΎΠ²Π°Ρ ΠΊΠ°ΡΡΠ°: ΠΠΎΠ»ΠΈΡΠ΅ΡΡΠ²ΠΎ ΡΠΎΠΎΠ±ΡΠ΅Π½ΠΈΠΉ Π² Π΄Π΅Π½Ρ +```python +fig, ax = plt.subplots(2, 1, figsize=(9, 3)) +ax[0] = vis.calendar_heatmap(df, year=2020, cmap='Oranges', ax=ax[0]) +ax[1] = vis.calendar_heatmap(df, year=2021, linewidth=0, monthly_border=True, ax=ax[1]) +``` + +
+ +
+ +### 4.2 Sunburst-Π΄ΠΈΠ°Π³ΡΠ°ΠΌΠΌΠ°: ΠΠΎΠ»ΠΈΡΠ΅ΡΡΠ²ΠΎ ΡΠΎΠΎΠ±ΡΠ΅Π½ΠΈΠΉ ΠΏΠΎ Π²ΡΠ΅ΠΌΠ΅Π½ΠΈ ΡΡΡΠΎΠΊ +```python +fig, ax = plt.subplots(1, 2, figsize=(7, 3), subplot_kw={'projection': 'polar'}) +ax[0] = vis.sunburst(df, highlight_max=True, isolines=[2500, 5000], isolines_relative=False, ax=ax[0]) +ax[1] = vis.sunburst(df, highlight_max=False, isolines=[0.5, 1], color='C1', ax=ax[1]) +``` + ++ +
+ +### 4.3 ΠΠ±Π»Π°ΠΊΠΎ ΡΠ»ΠΎΠ²: Π§Π°ΡΡΠΎΡΠ° ΡΠ»ΠΎΠ² +```python +fig, ax = plt.subplots(figsize=(8, 3)) +stopwords = ['these', 'are', 'stopwords'] +kwargs={"background_color": "white", "width": 800, "height": 300, "max_words": 500} +ax = vis.wordcloud(df, ax=ax, stopwords=stopwords, **kwargs) +``` ++ +
+ +### 4.4 Π Π°Π΄Π°ΡΠ½Π°Ρ Π΄ΠΈΠ°Π³ΡΠ°ΠΌΠΌΠ°: ΠΠΎΠ»ΠΈΡΠ΅ΡΡΠ²ΠΎ ΡΠΎΠΎΠ±ΡΠ΅Π½ΠΈΠΉ ΠΏΠΎ Π΄Π½ΡΠΌ Π½Π΅Π΄Π΅Π»ΠΈ +```python +if not vis.is_radar_registered(): + vis.radar_factory(7, frame="polygon") +fig, ax = plt.subplots(1, 2, figsize=(7, 3), subplot_kw={'projection': 'radar'}) +ax[0] = vis.radar(df, ax=ax[0]) +ax[1] = vis.radar(df, ax=ax[1], color='C1', alpha=0) +``` ++ +
+ +## 5. ΠΠ±ΡΠ°Π±ΠΎΡΠΊΠ° Π΅ΡΡΠ΅ΡΡΡΠ²Π΅Π½Π½ΠΎΠ³ΠΎ ΡΠ·ΡΠΊΠ° + +### 5.1 ΠΠΎΠ±Π°Π²ΡΡΠ΅ Π½Π°ΡΡΡΠΎΠΉ + +```python +from chatminer.nlp import add_sentiment + +df_sentiment = add_sentiment(df) +``` +### 5.2 ΠΡΠΈΠΌΠ΅Ρ Π΄ΠΈΠ°Π³ΡΠ°ΠΌΠΌΡ: ΠΠ°ΡΡΡΠΎΠΉ ΠΊΠ°ΠΆΠ΄ΠΎΠ³ΠΎ Π°Π²ΡΠΎΡΠ° Π² Π³ΡΡΠΏΠΏΠΎΠ²ΠΎΠΌ ΡΠ°ΡΠ΅ + +```python +df_grouped = df_sentiment.groupby(['author', 'sentiment']).size().unstack(fill_value=0) +ax = df_grouped.plot(kind='bar', stacked=True, figsize=(8, 3)) +``` + ++ +
+ + +## 6. ΠΠ½ΡΠ΅ΡΡΠ΅ΠΉΡ ΠΊΠΎΠΌΠΌΠ°Π½Π΄Π½ΠΎΠΉ ΡΡΡΠΎΠΊΠΈ +Π§Π΅ΡΠ΅Π· ΠΊΠΎΠΌΠΌΠ°Π½Π΄Π½ΡΡ ΡΡΡΠΎΠΊΡ ΠΏΠΎΠ΄Π΄Π΅ΡΠΆΠΈΠ²Π°Π΅ΡΡΡ ΠΏΠ°ΡΡ ΡΠ°ΡΠΎΠ² Π² csv-ΡΠ°ΠΉΠ»Ρ. +ΠΠ° Π΄Π°Π½Π½ΡΠΉ ΠΌΠΎΠΌΠ΅Π½Ρ, Π½Π°ΠΏΡΡΠΌΡΡ ΡΠ΅ΡΠ΅Π· ΠΊΠΎΠΌΠΌΠ°Π½Π΄Π½ΡΡ ΡΡΡΠΎΠΊΡ ΡΠΎΠ·Π΄Π°Π²Π°ΡΡ Π²ΠΈΠ·ΡΠ°Π»ΠΈΠ·Π°ΡΠΈΠΈ **Π½Π΅Π»ΡΠ·Ρ!** + +ΠΡΠΈΠΌΠ΅Ρ ΠΈΡΠΏΠΎΠ»ΡΠ·ΠΎΠ²Π°Π½ΠΈΡ: +```bash +$ chatminer -p whatsapp -i exportfile.txt -o output.csv +``` + +Π ΡΠΊΠΎΠ²ΠΎΠ΄ΡΡΠ²ΠΎ ΠΊ ΠΈΡΠΏΠΎΠ»ΡΠ·ΠΎΠ²Π°Π½ΠΈΡ: +``` +usage: chatminer [-h] [-p {whatsapp,instagram,facebook,signal,telegram}] [-i INPUT] [-o OUTPUT] + +options: + -h, --help + Show this help message and exit + -p {whatsapp,instagram,facebook,signal,telegram}, --parser {whatsapp,instagram,facebook,signal,telegram} + The platform from which the chats are imported + -i INPUT, --input INPUT + Input file to be processed + -o OUTPUT, --output OUTPUT + Output file for the results +```