forked from tatsath/fin-ml
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
46 changed files
with
144,749 additions
and
0 deletions.
There are no files selected for viewing
9,471 changes: 9,471 additions & 0 deletions
9,471
...ng/Case Study 1 - NLP and Sentiments Analysis based Trading Strategy/Data/Project6500.csv
Large diffs are not rendered by default.
Oops, something went wrong.
20,551 changes: 20,551 additions & 0 deletions
20,551
...dy 1 - NLP and Sentiments Analysis based Trading Strategy/Data/lexicon_data/stock_lex.csv
Large diffs are not rendered by default.
Oops, something went wrong.
3,594 changes: 3,594 additions & 0 deletions
3,594
...timents Analysis based Trading Strategy/NLPandSentimentAnalysisBasedTradingStrategy.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
596 changes: 596 additions & 0 deletions
596
...age Processing/Case Study 2 - Digital Assistant-chat-bots/DigitalAssistant-chat-bot.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
141 changes: 141 additions & 0 deletions
141
...Language Processing/Case Study 2 - Digital Assistant-chat-bots/financial_ratio_adapter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
|
||
import spacy | ||
import random | ||
from itertools import product | ||
|
||
from spacy.util import minibatch, compounding | ||
from chatterbot.logic import LogicAdapter | ||
|
||
#Step 3: Data Preparation | ||
#We want our chatbot to be able to distinguish between subtle | ||
#inquires. For example – One might want to ask about the company | ||
#Apple Inc by simply referring to it as Apple. In either case, | ||
#we want to map it to a ticker, AAPL in this case. Constructing | ||
#commonly used phrases in order to refer to firms can be built by | ||
#using a dictionary such as | ||
companies = { | ||
'AAPL': ['Apple', 'Apple Inc'], | ||
'BAC': ['BAML', 'BofA', 'Bank of America'], | ||
'C': ['Citi', 'Citibank'], | ||
'DAL': ['Delta', 'Delta Airlines'] | ||
} | ||
|
||
ratios = { | ||
'return-on-equity-ttm': ['ROE', 'Return on Equity'], | ||
'cash-from-operations-quarterly': ['CFO', 'Cash Flow from Operations'], | ||
'pe-ratio-ttm': ['PE', 'Price to equity', 'pe ratio'], | ||
'revenue-ttm': ['Sales', 'Revenue'], | ||
} | ||
|
||
string_templates = ['Get me the {ratio} for {company}', | ||
'What is the {ratio} for {company}?', | ||
'Tell me the {ratio} for {company}', | ||
] | ||
#Step 4: Model construction and training | ||
#Step 4.1 Model construction | ||
companies_rev = {} | ||
for k, v in companies.items(): | ||
for ve in v: | ||
companies_rev[ve] = k | ||
|
||
ratios_rev = {} | ||
for k, v in ratios.items(): | ||
for ve in v: | ||
ratios_rev[ve] = k | ||
|
||
companies_list = list(companies_rev.keys()) | ||
ratios_list = list(ratios_rev.keys()) | ||
|
||
#Next, we to create sample statements for our model. | ||
#We contract a function in order to give us a random sentence structure, | ||
#inquiring about a random financial ratio for a random company We will be | ||
#creating a custom Named Entity Recognition model in the spacy framework. | ||
N_training_samples = 100 | ||
|
||
def get_training_sample(string_templates, ratios_list, companies_list): | ||
string_template=string_templates[random.randint(0, len(string_templates)-1)] | ||
ratio = ratios_list[random.randint(0, len(ratios_list)-1)] | ||
company = companies_list[random.randint(0, len(companies_list)-1)] | ||
sent = string_template.format(ratio=ratio,company=company) | ||
ents = {"entities": [(sent.index(ratio), sent.index(ratio)+len(ratio), 'RATIO'), | ||
(sent.index(company), sent.index(company)+len(company), 'COMPANY') | ||
]} | ||
return (sent, ents) | ||
|
||
#Defining the training data. | ||
TRAIN_DATA = [ | ||
get_training_sample(string_templates, ratios_list, companies_list) for i in range(N_training_samples) | ||
] | ||
#4.2. Setting the optimizer and Logical Adapter | ||
#First, we construct a blank model in spacy spaCy’s models are statistical | ||
#and every “decision” they make. Training the NER model is akin to updating | ||
#the weights for each token. The most important step is to use a good optimizer. | ||
nlp = spacy.blank("en") | ||
|
||
ner = nlp.create_pipe("ner") | ||
nlp.add_pipe(ner) | ||
|
||
ner.add_label('RATIO') | ||
ner.add_label('COMPANY') | ||
|
||
optimizer = nlp.begin_training() | ||
|
||
move_names = list(ner.move_names) | ||
# get names of other pipes to disable them during training | ||
pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"] | ||
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions] | ||
with nlp.disable_pipes(*other_pipes): # only train NER | ||
sizes = compounding(1.0, 4.0, 1.001) | ||
# batch up the examples using spaCy's minibatch | ||
for itn in range(30): | ||
random.shuffle(TRAIN_DATA) | ||
batches = minibatch(TRAIN_DATA, size=sizes) | ||
losses = {} | ||
for batch in batches: | ||
texts, annotations = zip(*batch) | ||
nlp.update(texts, annotations, sgd=optimizer, drop=0.35, losses=losses) | ||
print("Losses", losses) | ||
|
||
|
||
#Setting the Logic Adapter | ||
class FinancialRatioAdapter(LogicAdapter): | ||
def __init__(self, chatbot, **kwargs): | ||
super(FinancialRatioAdapter, self).__init__(chatbot, **kwargs) | ||
|
||
def process(self, statement, additional_response_selection_parameters): | ||
""" | ||
Returns the value. | ||
""" | ||
from chatterbot.conversation import Statement | ||
|
||
user_input = statement.text | ||
|
||
doc = nlp(user_input) | ||
company = None | ||
ratio = None | ||
confidence = 0 | ||
|
||
# We need exactly 1 company and one ratio | ||
if len(doc.ents) == 2: | ||
for ent in doc.ents: | ||
if ent.label_ == "RATIO": | ||
ratio = ent.text | ||
if ratio in ratios_rev: | ||
confidence += 0.5 | ||
if ent.label_ == "COMPANY": | ||
company = ent.text | ||
if company in companies_rev: | ||
confidence += 0.5 | ||
|
||
if confidence > 0.99: | ||
outtext = '''https://www.zacks.com/stock/chart/{company}/fundamental/{ratio} | ||
'''.format(ratio=ratios_rev[ratio], company=companies_rev[company]) | ||
confidence = 1 | ||
else: | ||
outtext = 'Sorry! Could not figure out what the user wants' | ||
confidence = 0 | ||
|
||
output_statement = Statement(text=outtext) | ||
output_statement.confidence = confidence | ||
|
||
return output_statement |
Binary file added
BIN
+3.53 MB
Chapter 10 - Natural Language Processing/Case Study 3 - Documents Summarization/10K.pdf
Binary file not shown.
Oops, something went wrong.