Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add packages #1

Open
wants to merge 2 commits into
base: to_upload
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file added Code/__init__.py
Empty file.
35 changes: 20 additions & 15 deletions aibot.py → Code/aibot.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import time

import os
from transformers import AutoTokenizer, AutoModelForTokenClassification

from answer_per_question import answer_per_question
from aryana import aryana
from auto_correct import auto_correct
from find_dates import reformat_date
from find_events_in_sentence import find_events_in_sentence
from find_time import reformat_date_time
from speechRec import google
from split import split
from .answer_per_question import answer_per_question
from .API_voice.aryana import aryana
from .auto_correct import auto_correct
from .arguments.find_dates import reformat_date
from .arguments.find_events_in_sentence import find_events_in_sentence
from .arguments.find_time import reformat_date_time
from .API_voice.speechRec import google
from .split import split


class BOT:
Expand All @@ -35,8 +34,9 @@ def AIBOT(self, Question):
answer_set = {'type': set(), 'city': set(), 'date': set(),
'time': set(), 'religious_time': set(), 'calendar_type': set(),
'event': set(), 'api_url': set(), 'result': []}
# Question = auto_correct(Question)

try:
Question = auto_correct(Question)
Question = reformat_date_time(Question)
except:
pass
Expand All @@ -45,12 +45,11 @@ def AIBOT(self, Question):
Question = reformat_date(Question)
except:
pass

'/var/www/AIBot/media/codes/[email protected]/bert-base-parsbert-ner-uncased'
p = os.path.dirname(os.path.abspath(__file__))
tokenizer = AutoTokenizer.from_pretrained(
'/var/www/AIBot/media/codes/[email protected]/bert-base-parsbert-ner-uncased')
p + '/../bert-base-parsbert-ner-uncased')
model = AutoModelForTokenClassification.from_pretrained(
'/var/www/AIBot/media/codes/[email protected]/bert-base-parsbert-ner-uncased')
p + '/../bert-base-parsbert-ner-uncased')

try:
events, event_keys = find_events_in_sentence(Question)
Expand All @@ -60,6 +59,7 @@ def AIBOT(self, Question):

try:
Questions = split(Question, events)
print('Questions : ' + Questions)
except:
Questions = [Question]
pass
Expand All @@ -83,6 +83,11 @@ def AIBOT(self, Question):
answer[key] = list(answer_set[key])
final_answer = final_answer + " ."

response = aryana(final_answer)

with open("response.wav", mode='bw') as f:
f.write(response.content)

return answer, final_answer

'''
Expand Down
18 changes: 9 additions & 9 deletions answer_per_question.py → Code/answer_per_question.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
import dateparser
from persiantools.jdatetime import JalaliDate

from find import find
from find_fit_word import find_fit_word
from find_time_from_religious import find_time_from_religious
from find_weather_from_city_date import find_weather_from_city_date
from learning import predict
from mhr_time import Time
from output_sentences import religion_sentence, time_sentence, date_sentence, unknown_sentence, weather_sentence, \
from .arguments.find import find
from .arguments.find_fit_word import find_fit_word
from .arguments.find_time_from_religious import find_time_from_religious
from .arguments.find_weather_from_city_date import find_weather_from_city_date
from .learning import predict
from .mhr_time import Time
from .output_sentences import religion_sentence, time_sentence, date_sentence, unknown_sentence, weather_sentence, \
weather_logical_sentence
from utility import convert_date
from weather_difference import weather_difference
from .utility import convert_date
from .weather_difference import weather_difference


def answer_per_question(Question, model, tokenizer, all_events, all_event_keys):
Expand Down
11 changes: 11 additions & 0 deletions Code/argument_corpse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
new = []
with open("../Data/auto_correct/argument_corpse.txt") as f:
words = f.read().split('\n')
words.remove('')
for w in words:
if ' ' in w:
new += w.split()
else:
new.append(w)

new = list(set(new))
Empty file added Code/arguments/__init__.py
Empty file.
18 changes: 9 additions & 9 deletions find.py → Code/arguments/find.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from __future__ import unicode_literals

from find_calendar_types import find_calendar_types
from find_cities import find_cities
from find_dates import find_dates
from find_events import find_events
from find_religious_time import find_religious_time
from find_time import find_date_time
from find_weather_method import find_weather_method
from pipeline_sentence import pipeline_sentence
from tokens_in_sentence import find_tokens_in_sentence
from .find_calendar_types import find_calendar_types
from .find_cities import find_cities
from .find_dates import find_dates
from .find_events import find_events
from .find_religious_time import find_religious_time
from .find_time import find_date_time
from .find_weather_method import find_weather_method
from ..pipeline_sentence import pipeline_sentence
from ..tokens_in_sentence import find_tokens_in_sentence


# If you don't have model -> Comment 7, 8, 23, 24 lines and Uncomment 27, 28, 29 lines
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
4 changes: 2 additions & 2 deletions find_events.py → Code/arguments/find_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def find_events(sentence, dates, all_events, all_event_keys):
event_month = dates[0].split('-')[1]
event_day = dates[0].split('-')[2]
new_dates = dates
url1 = os.path.join(p, event_year + ".csv")
url1 = p + "/../../Data/events/" + event_year + ".csv"
df2 = pd.read_csv(url1)
for idx, row in df2.iterrows():
if str(row["month"]) == event_month and str(row["day"]) == str(event_day):
Expand All @@ -35,7 +35,7 @@ def find_events(sentence, dates, all_events, all_event_keys):
new_dates = []
if len(events) > 0 and len(dates) > 0:
year = dates[0].split('-')[0]
url1 = os.path.join(p, year + ".csv")
url1 = p + "/../../Data/events/" + year + ".csv"
df2 = pd.read_csv(url1)

for idx, row in df2.iterrows():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

def find_events_in_sentence(sentence):
p = os.path.dirname(os.path.abspath(__file__))
url1 = os.path.join(p, "find important events.csv")
url1 = os.path.join(p, "../../Data/events/find important events.csv")
df1 = pd.read_csv(url1)
important_events = df1['event']
important_events_key = df1['event_key']
Expand Down
File renamed without changes.
File renamed without changes.
2 changes: 1 addition & 1 deletion find_time.py → Code/arguments/find_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from unidecode import unidecode

from persian_num_change import persian_num_change
from ..persian_num_change import persian_num_change


def find_date_time(tokens_lem, sentence):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from religious_time import ReligiousTime
from utility import convert_date
from ..religious_time import ReligiousTime
from ..utility import convert_date


def find_time_from_religious(answer):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from datetime import datetime

from weather import Weather
from ..weather import Weather


def find_weather_from_city_date(Question, city, greg_date):
Expand Down
File renamed without changes.
25 changes: 2 additions & 23 deletions auto_correct.py → Code/auto_correct.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ def lv(s, t):


def correct(word: str) -> str:
with open('new_argument_corpse.txt') as f:
p = os.path.dirname(os.path.abspath(__file__))
with open(p + '/../Data/auto_correct/new_argument_corpse.txt') as f:
data = f.read().split('\n')
data.remove('')

Expand All @@ -71,29 +72,8 @@ def correct(word: str) -> str:
return word


def load_lists():
p = os.path.dirname(os.path.abspath(__file__))
url = os.path.join(p, "fa_cities_final2.csv")
df = pd.read_csv(url)
cities = df['city-fa']

url = os.path.join(p, "important_words.csv")
df = pd.read_csv(url)
important_words = df['words']

url = os.path.join(p, "find important events.csv")
df = pd.read_csv(url)
events = df['event']

url = os.path.join(p, "countries.csv")
df = pd.read_csv(url)
countries = df['country']
return cities, important_words, events, countries


def auto_correct(sentence: str):
start = time.time()
cities, important_word, events, countries = load_lists()

symbols = "!\"#$%&()*+-./;<=>?@[\\]^_`{|}~\n،,؟؛"
for i in symbols:
Expand All @@ -114,4 +94,3 @@ def auto_correct(sentence: str):
return new_sen[:-1]


auto_correct("اذان ژهر به در حال حاضر افق تران کی است؟")
File renamed without changes.
File renamed without changes.
10 changes: 6 additions & 4 deletions learning.py → Code/learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import numpy as np
import pandas as pd
from rule_based import rule_based_score
from .rule_based import rule_based_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Dense, LSTM
from sklearn.model_selection import train_test_split
Expand All @@ -20,7 +20,8 @@
3: "date",
4: "unknown"}

df = pd.read_csv("questions and data.csv", index_col=0)
p = os.path.dirname(os.path.abspath(__file__))
df = pd.read_csv(p + "/../Data/intent/questions and data.csv", index_col=0)
df_x = df["questions"].values
df_y = df["class0"].values
# df = pd.read_csv("mh_clean.csv")
Expand Down Expand Up @@ -60,12 +61,13 @@ def train_model():
# print("Saved model to disk")

def predict(sent: str) -> int:
json_file = open('all_lstm_model.json', 'r')
p = os.path.dirname(os.path.abspath(__file__))
json_file = open(p + '/../Models/all_lstm_model.json', 'r')

model = json_file.read()
json_file.close()
model = model_from_json(model)
model.load_weights("all_lstm_model_weights.h5")
model.load_weights(p + "/../Models/all_lstm_model_weights.h5")

unk = 5
enc = tokenizer.texts_to_sequences(np.array([sent]))
Expand Down
2 changes: 1 addition & 1 deletion mhr_time.py → Code/mhr_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import requests
from bs4 import BeautifulSoup

from utility import get_english_names
from .utility import get_english_names


class Time:
Expand Down
2 changes: 1 addition & 1 deletion output_sentences.py → Code/output_sentences.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import num2fawords
from persiantools.jdatetime import JalaliDate

from capitals import capital_to_country
from .capitals import capital_to_country


def convert_month(month: int) -> str:
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion pipeline_sentence.py → Code/pipeline_sentence.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from hazm import word_tokenize, Lemmatizer, Normalizer
from transformers import pipeline

from capitals import capital_dictionary_keys, country_to_capital
from .capitals import capital_dictionary_keys, country_to_capital


def pipeline_sentence(sentence, model, tokenizer):
Expand Down
2 changes: 1 addition & 1 deletion religious_time.py → Code/religious_time.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import requests

from utility import get_english_names, split_date
from .utility import get_english_names, split_date

REL_TIME_CONVERSION = {
"امساک": "Imsak",
Expand Down
6 changes: 3 additions & 3 deletions rule_based.py → Code/rule_based.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ def get_list_from(filename: str) -> list:
def parse_shamsi_events() -> list:
# this is only needed for date category so we can
# only check the whole combination and not the words
f = os.path.join(p, 'shamsi_events.csv')
f = os.path.join(p, '../Data/events/shamsi_events.csv')
events = pd.read_csv(f, encoding="utf-8")
temp_combs = events["event"].tolist()
return list(set(temp_combs))


def initialize() -> Tuple[list, list]:
used_words = get_list_from(os.path.join(p, 'used_words.txt'))
used_combs = get_list_from(os.path.join(p, 'used_combs.txt'))
used_words = get_list_from(os.path.join(p, '../Data/intent/used_words.txt'))
used_combs = get_list_from(os.path.join(p, '../Data/intent/used_combs.txt'))

c = parse_shamsi_events()
used_combs[3] += c
Expand Down
5 changes: 3 additions & 2 deletions split.py → Code/split.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
# coding: utf-8

# In[1]:

import os

from hazm import *

# In[2]:


combs = []
with open("split_combs", "r") as a_file:
p = os.path.dirname(os.path.abspath(__file__))
with open(p + "/../Data/split/split_combs", "r") as a_file:
for line in a_file:
combs.append(line.strip())

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
from hazm import *

p = os.path.dirname(os.path.abspath(__file__))
f = os.path.join(p, "answers_clean.csv")
f = os.path.join(p, "../Data/intent/answers_clean.csv")
answers_clean = pd.read_csv(p, index_col=0)
f = os.path.join(p, "stop_words_short.txt")
f = os.path.join(p, "../Data/split/stop_words_short.txt")
text_file = open(f,
encoding="utf8") # I'm not really sure about removing stop words
stop_words = text_file.read().split("\n")
Expand Down Expand Up @@ -95,7 +95,7 @@ def main():
padded = max_padding(vectors, max_tokens_per_line)
df = pd.DataFrame(padded)
p = os.path.dirname(os.path.abspath(__file__))
f = os.path.join(p, "tf_idf_padded.csv")
f = os.path.join(p, "../Data/intent/tf_idf_padded.csv")
df.to_csv(f)


Expand Down
File renamed without changes.
6 changes: 3 additions & 3 deletions utility.py → Code/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def get_english_names(city: str) -> Tuple[str, str]:
p = os.path.dirname(os.path.abspath(__file__))
f = os.path.join(p, "cities15000.txt")
f = os.path.join(p, "../Data/cities/cities15000.txt")
df = pd.read_csv(f,
usecols=[1, 2, 3, 8, 14], sep='\t',
encoding="utf-8", header=None)
Expand All @@ -19,7 +19,7 @@ def get_english_names(city: str) -> Tuple[str, str]:
cc = target_row[8]

p = os.path.dirname(os.path.abspath(__file__))
f = os.path.join(p, "IP2LOCATION-COUNTRY-MULTILINGUAL.CSV")
f = os.path.join(p, "../Data/cities/IP2LOCATION-COUNTRY-MULTILINGUAL.CSV")
df = pd.read_csv(f,
encoding="utf-8",
header=None, skiprows=3736, nrows=249)
Expand All @@ -29,7 +29,7 @@ def get_english_names(city: str) -> Tuple[str, str]:

def get_lat_lon(eng_city: str) -> Tuple[float, float]:
p = os.path.dirname(os.path.abspath(__file__))
f = os.path.join(p, "cities15000.txt")
f = os.path.join(p, "../Data/cities/cities15000.txt")
df = pd.read_csv(f, usecols=[2, 4, 5, 14],
sep='\t',
encoding="utf-8", header=None)
Expand Down
2 changes: 1 addition & 1 deletion weather.py → Code/weather.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import requests

from utility import *
from .utility import *


class Weather:
Expand Down
Loading