-
Notifications
You must be signed in to change notification settings - Fork 1
/
eventStudyIntoSimulator.py
101 lines (82 loc) · 3.29 KB
/
eventStudyIntoSimulator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
'''
(c) 2011, 2012 Georgia Tech Research Corporation
This source code is released under the New BSD license. Please see
http://wiki.quantsoftware.org/index.php?title=QSTK_License
for license details.
Created on January, 23, 2013
@author: Sourabh Bajaj
@contact: [email protected]
@summary: Event Profiler Tutorial
'''
import pandas as pd
import numpy as np
import math
import copy
import QSTK.qstkutil.qsdateutil as du
import datetime as dt
import QSTK.qstkutil.DataAccess as da
import QSTK.qstkutil.tsutil as tsu
import QSTK.qstkstudy.EventProfiler as ep
"""
Accepts a list of symbols along with start and end date
Returns the Event Matrix which is a pandas Datamatrix
Event matrix has the following structure :
|IBM |GOOG|XOM |MSFT| GS | JP |
(d1)|nan |nan | 1 |nan |nan | 1 |
(d2)|nan | 1 |nan |nan |nan |nan |
(d3)| 1 |nan | 1 |nan | 1 |nan |
(d4)|nan | 1 |nan | 1 |nan |nan |
...................................
...................................
Also, d1 = start date
nan = no information about any event.
1 = status bit(positively confirms the event occurence)
"""
def find_events(ls_symbols, d_data):
''' Finding the event dataframe '''
df_close = d_data['actual_close']
print "Finding Events"
# Creating an empty dataframe
df_events = copy.deepcopy(df_close)
df_events = df_events * np.NAN
# Time stamps for the event range
ldt_timestamps = df_close.index
for s_sym in ls_symbols:
for i in range(1, len(ldt_timestamps)):
# Calculating the returns for this timestamp
f_symprice_today = df_close[s_sym].ix[ldt_timestamps[i]]
f_symprice_yest = df_close[s_sym].ix[ldt_timestamps[i - 1]]
if f_symprice_today < 7 and f_symprice_yest >= 7:
df_events[s_sym].ix[ldt_timestamps[i]] = 1
return df_events
if __name__ == '__main__':
dt_start = dt.datetime(2008, 1, 1)
dt_end = dt.datetime(2009, 12, 31)
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))
dataobj = da.DataAccess('Yahoo')
# 2012 group
ls_symbols = dataobj.get_symbols_from_list('sp5002012')
ls_symbols.append('SPY')
#ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
ls_keys = ['actual_close']
print "fetching data"
ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
print "data fetched"
d_data = dict(zip(ls_keys, ldf_data))
for s_key in ls_keys:
d_data[s_key] = d_data[s_key].fillna(method = 'ffill')
d_data[s_key] = d_data[s_key].fillna(method = 'bfill')
d_data[s_key] = d_data[s_key].fillna(1.0)
df_events = find_events(ls_symbols, d_data)
print "Generating trade plan"
with open("trade-book", 'w') as outfile:
for symbol in ls_symbols:
for idx, time in enumerate(ldt_timestamps):
if df_events[symbol].ix[time] == 1:
outfile.write("%d,%d,%d,%s,Buy,100,\n" % (time.year, time.month, time.day, symbol))
sell_time = None
if idx + 5 < len(ldt_timestamps):
sell_time = ldt_timestamps[idx+5]
else:
sell_time = ldt_timestamps[-1]
outfile.write("%d,%d,%d,%s,Sell,100,\n" % (sell_time.year, sell_time.month, sell_time.day, symbol))