forked from notadamking/Stock-Trading-Environment
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTradingEnv.py
78 lines (69 loc) · 3.29 KB
/
TradingEnv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import gymnasium as gym
import pandas as pd
import gym_trading_env
import os
from stable_baselines3 import A2C
def preprocess(df, inplace=True):
new_columns = {"Date":"date","Open":"open","High":"high","Low":"low","Close":"close","Adj Close":"adj_close","Volume":"volume"}
# df is a DataFrame with columns : "open", "high", "low", "close", "volume"
df.rename(columns=new_columns,inplace=True)
# Create the feature : ( close[t] - close[t-1] )/ close[t-1]
df["feature_close"] = df["close"].pct_change()
# Create the feature : open[t] / close[t]
df["feature_open"] = df["open"]/df["close"]
# Create the feature : high[t] / close[t]
df["feature_high"] = df["high"]/df["close"]
# Create the feature : low[t] / close[t]
df["feature_low"] = df["low"]/df["close"]
# Create the feature : volume[t] / max(*volume[t-7*24:t+1])
# convert the DatetimeIndex to a string with the desired format
df.dropna(inplace= True) # Clean again !
return df.copy()
#df = pd.read_csv('./data/HINDUNILVR.csv')
#df=preprocess(df)
#df.to_pickle('your_data.pkl')
# Eatch step, the environment will return 5 inputs : "feature_close", "feature_open", "feature_high", "feature_low", "feature_volume"
data_dir = 'data'
# Loop through all the CSV files in the directory
for filename in os.listdir(data_dir):
if filename.endswith('.csv'):
# Load the CSV file into a pandas DataFrame
df = pd.read_csv(os.path.join(data_dir, filename))
df['Date']=pd.to_datetime(df['Date'])
df=df.set_index('Date')
df=preprocess(df)
df['date_str'] = df.index.strftime("%Y-%m-%d %H:%M")
# Preprocess the data as needed
# ...
# Save the preprocessed data as a pickle file
df.to_pickle(os.path.join(data_dir, filename[:-4] + '.pkl'))
env = gym.make(
"MultiDatasetTradingEnv",
preprocess=preprocess,
dataset_dir= 'data/*.pkl',
name= "BTCUSD",
positions = [ -1, 0, 1], # -1 (=SHORT), 0(=OUT), +1 (=LONG)
trading_fees = 0.01/100, # 0.01% per stock buy / sell (Binance fees)
borrow_interest_rate= 0.0003/100, # 0.0003% per timestep (one timestep = 1h here)
)
# Create an RL agent with A2C algorithm and MlpPolicy
model = A2C("MlpPolicy", env, verbose=1) # You can adjust hyperparameters here
# Train the agent (adjust the number of training steps)
model.learn(total_timesteps=int(1e4))
# Run an episode until it ends :
done, truncated = False, False
observation, info = env.reset()
print("Observation: ")
print(observation)
while not done and not truncated:
# Use the trained agent to select an action (buy, sell, hold)
# Pick a position by its index in your position list (=[-1, 0, 1])....usually something like : position_index = your_policy(observation)
action, _ = model.predict(observation)
# Execute the chosen action in the live environment
observation, reward, done, truncated, info = env.step(action)
# position_index = env.action_space.sample() # At every timestep, pick a random position index from your position list (=[-1, 0, 1])
# observation, reward, done, truncated, info = env.step(position_index)
env.save_for_render(dir = "render_logs")
# # Optionally, save the trained model for later use
# # can be loaded using model = A2C.load("trained_trading_agent")
model.save("trading_agent")