From 40323b5cff1d9ac29ff8a5418c897cee726bde1a Mon Sep 17 00:00:00 2001 From: Arunesh Choudhary Date: Tue, 26 Nov 2024 06:36:40 -0500 Subject: [PATCH 1/7] fix Conversation CSV loader This PR involves the fix for the CSV loader. `Class Conversation(id)` requires the `id` to find the details of the conversations which has : - participants - size of the conversation (or length of messages) - list of messages --- src/cmn/conversation.py | 44 +++++++++++++++++++++++++++-------------- src/cmn/message.py | 2 +- 2 files changed, 30 insertions(+), 16 deletions(-) diff --git a/src/cmn/conversation.py b/src/cmn/conversation.py index 05aae28..a221dd3 100644 --- a/src/cmn/conversation.py +++ b/src/cmn/conversation.py @@ -1,18 +1,24 @@ -import os import csv from cmn.message import Message -class Conversation(Object): - def __init__(self, id, messages, participants): +class Conversation: + def __init__(self, id): self.id = id - self.messages = messages - self.participants = participants + self.messages = [] + self.participants = set() + self.conv_size = 0 @staticmethod def loader(path): - if path.endswith(".csv"): return Conversation.csv_loader(path) + if path.endswith(".csv"): + return Conversation.csv_loader(path) + + def add_message(self, message_content, author_involved, size): + self.messages.append(message_content) + self.participants.add(author_involved) + self.conv_size = size @staticmethod def csv_loader(filepath): @@ -26,7 +32,6 @@ def csv_loader(filepath): # before we import, could be taken care here. try: # Assign the conversation id to Conversation Object - # conversation.conversation_id = row['conv_id'] message = Message( row["msg_line"], @@ -34,15 +39,20 @@ def csv_loader(filepath): row["time"], row["msg_char_count"], row["msg_word_count"], - row["conv_size"], - row["nauthor"], row["text"], row["tagged_predator"], - row["predatory_conv"], ) - if id not in convs: convs[id] = Conversation(row["conv_id"], None, None) - convs[id].add_message(message) + conv_id = row["conv_id"] + author_involved = row["author_id"] + conversation_size = row["conv_size"] + + if conv_id not in convs: + convs[conv_id] = Conversation(conv_id) + + convs[conv_id].add_message( + message, author_involved, conversation_size + ) except KeyError as e: print(f"Import Error: {e}") @@ -50,10 +60,14 @@ def csv_loader(filepath): return convs def __repr__(self): - repr_string = f"Conversation ID: {self.id}\nNumber of messages: {len(self.messages)}\n" + authors_list = "\n".join(self.participants) + + repr_string = f"Conversation ID: {self.id}\nConversation Size: {self.conv_size}\nAuthors Involved: {len(list(self.participants))}\n{authors_list}\n" - if not self.messages: repr_string += "No messages found for this conversation.\n" + if not self.messages: + repr_string += "No messages found for this conversation.\n" else: - for message in self.messages: repr_string += f"\n{message}" + for message in self.messages: + repr_string += f"\n{message}" return repr_string diff --git a/src/cmn/message.py b/src/cmn/message.py index 2bc018e..0ac41b9 100644 --- a/src/cmn/message.py +++ b/src/cmn/message.py @@ -1,4 +1,4 @@ -class Message(Object): +class Message: def __init__( self, idx: str, From d65783bf2ee75c11620326710f84f54c40e56909 Mon Sep 17 00:00:00 2001 From: Arunesh Choudhary Date: Fri, 29 Nov 2024 10:53:47 -0500 Subject: [PATCH 2/7] Fixed the formatting and changed to one liner --- src/cmn/conversation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cmn/conversation.py b/src/cmn/conversation.py index a221dd3..cce9e8b 100644 --- a/src/cmn/conversation.py +++ b/src/cmn/conversation.py @@ -1,5 +1,4 @@ import csv - from cmn.message import Message From 7b6ccfc45e8bc163fe82154557cc2bbd51ab1a30 Mon Sep 17 00:00:00 2001 From: Arunesh Choudhary Date: Fri, 29 Nov 2024 11:03:01 -0500 Subject: [PATCH 3/7] Fixed the formatting and changed to one liner --- src/cmn/conversation.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/cmn/conversation.py b/src/cmn/conversation.py index cce9e8b..44ce042 100644 --- a/src/cmn/conversation.py +++ b/src/cmn/conversation.py @@ -11,27 +11,15 @@ def __init__(self, id): @staticmethod def loader(path): - if path.endswith(".csv"): - return Conversation.csv_loader(path) - - def add_message(self, message_content, author_involved, size): - self.messages.append(message_content) - self.participants.add(author_involved) - self.conv_size = size + if path.endswith(".csv"): return Conversation.csv_loader(path) @staticmethod def csv_loader(filepath): convs = {} - with open(filepath, mode="r", newline="", encoding="utf-8") as csvfile: csv_reader = csv.DictReader(csvfile) - # conversation = Conversation() for row in csv_reader: - # In the future, if we are having data which needs to be tweaked - # before we import, could be taken care here. try: - # Assign the conversation id to Conversation Object - message = Message( row["msg_line"], row["author_id"], From 433e3e8848d2a540ff41ed34059e69a67c74e811 Mon Sep 17 00:00:00 2001 From: Arunesh Choudhary Date: Fri, 29 Nov 2024 11:07:30 -0500 Subject: [PATCH 4/7] remove the add function --- src/cmn/conversation.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/cmn/conversation.py b/src/cmn/conversation.py index 44ce042..a124fb3 100644 --- a/src/cmn/conversation.py +++ b/src/cmn/conversation.py @@ -31,30 +31,23 @@ def csv_loader(filepath): ) conv_id = row["conv_id"] - author_involved = row["author_id"] - conversation_size = row["conv_size"] - if conv_id not in convs: convs[conv_id] = Conversation(conv_id) - convs[conv_id].add_message( - message, author_involved, conversation_size - ) + convs[conv_id].messages.append(message) + convs[conv_id].participants.add(row["author_id"]) + convs[conv_id].conv_size = row["conv_size"] except KeyError as e: print(f"Import Error: {e}") - return convs - + def __repr__(self): authors_list = "\n".join(self.participants) - repr_string = f"Conversation ID: {self.id}\nConversation Size: {self.conv_size}\nAuthors Involved: {len(list(self.participants))}\n{authors_list}\n" - if not self.messages: repr_string += "No messages found for this conversation.\n" else: for message in self.messages: repr_string += f"\n{message}" - return repr_string From 46151273bf00e8ddecfce2448c854cf14108cd31 Mon Sep 17 00:00:00 2001 From: Arunesh Choudhary Date: Fri, 29 Nov 2024 11:31:16 -0500 Subject: [PATCH 5/7] format to one liner --- src/cmn/conversation.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/cmn/conversation.py b/src/cmn/conversation.py index a124fb3..c73fd48 100644 --- a/src/cmn/conversation.py +++ b/src/cmn/conversation.py @@ -31,8 +31,7 @@ def csv_loader(filepath): ) conv_id = row["conv_id"] - if conv_id not in convs: - convs[conv_id] = Conversation(conv_id) + if conv_id not in convs: convs[conv_id] = Conversation(conv_id) convs[conv_id].messages.append(message) convs[conv_id].participants.add(row["author_id"]) From 26f1320e38ce0f116a692a596b9a138275fdc310 Mon Sep 17 00:00:00 2001 From: Arunesh Choudhary Date: Fri, 29 Nov 2024 11:32:40 -0500 Subject: [PATCH 6/7] format to one liner --- src/cmn/conversation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/cmn/conversation.py b/src/cmn/conversation.py index c73fd48..a47be9a 100644 --- a/src/cmn/conversation.py +++ b/src/cmn/conversation.py @@ -49,4 +49,5 @@ def __repr__(self): else: for message in self.messages: repr_string += f"\n{message}" + for message in self.messages: repr_string += f"\n{message}" return repr_string From fe5a71c9b1c1daab7d26abf38b4b5af5b0ef65b6 Mon Sep 17 00:00:00 2001 From: Arunesh Choudhary Date: Fri, 29 Nov 2024 11:34:39 -0500 Subject: [PATCH 7/7] fix the formatting --- src/cmn/conversation.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/cmn/conversation.py b/src/cmn/conversation.py index a47be9a..dd148c5 100644 --- a/src/cmn/conversation.py +++ b/src/cmn/conversation.py @@ -44,10 +44,7 @@ def csv_loader(filepath): def __repr__(self): authors_list = "\n".join(self.participants) repr_string = f"Conversation ID: {self.id}\nConversation Size: {self.conv_size}\nAuthors Involved: {len(list(self.participants))}\n{authors_list}\n" - if not self.messages: - repr_string += "No messages found for this conversation.\n" + if not self.messages: repr_string += "No messages found for this conversation.\n" else: - for message in self.messages: - repr_string += f"\n{message}" for message in self.messages: repr_string += f"\n{message}" return repr_string