initial

howtokim · May 1, 2018 · e4f8711 · e4f8711
commit e4f8711
Show file tree

Hide file tree

Showing 36 changed files with 96,462 additions and 0 deletions.
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2018 Moon Kwon Kim
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,5 @@
+# RLTrader: 딥러닝으로 하는 주식 투자
+
+이 프로젝트는 출판 예정이고 아래 라이센스를 따릅니다.
+
+<a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by-nc-sa/4.0/88x31.png" /></a><br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc-sa/4.0/">Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License</a>.
diff --git a/_main.py b/_main.py
@@ -0,0 +1,139 @@
+import logging
+import os
+import settings
+import data_manager
+from policy_learner import PolicyLearner
+
+
+def train(stock_code, data):
+    # 기간 필터링
+    training_data = data[(data['date'] >= '2016-01-01') &
+                                  (data['date'] <= '2016-12-31')]
+    training_data = training_data.dropna()
+    # testing_data = data[(data['date'] >= '2016-01-01') &
+    #                               (data['date'] <= '2016-12-31')]
+    testing_data = data[(data['date'] >= '2017-01-01') &
+                                  (data['date'] <= '2017-12-31')]
+    testing_data = testing_data.dropna()
+
+    # 차트 데이터 분리
+    features = ['date', 'open', 'high', 'low', 'close', 'volume']
+    training_chart_data = training_data[features]
+    testing_chart_data = testing_data[features]
+
+    # 학습 데이터 분리
+    features_training_data = [
+        'open_lastclose_ratio', 'high_close_ratio', 'low_close_ratio',
+        'close_lastclose_ratio', 'volume_lastvolume_ratio',
+        'close_ma5_ratio', 'volume_ma5_ratio',
+        'close_ma10_ratio', 'volume_ma10_ratio',
+        'close_ma20_ratio', 'volume_ma20_ratio',
+        'close_ma60_ratio', 'volume_ma60_ratio',
+        'close_ma120_ratio', 'volume_ma120_ratio'
+    ]
+    training_data = training_data[features_training_data]
+    testing_data = testing_data[features_training_data]
+
+    # 강화학습 시작
+    min_trading_unit = 1
+    max_trading_unit = 1
+    delayed_reward_threshold = .05
+    start_epsilon = .5
+    model_path = ''
+    if stock_code == '005930':  # 삼성전자
+        min_trading_unit = 1
+        max_trading_unit = 1
+        delayed_reward_threshold = .05
+        model_path = os.path.join(settings.BASE_DIR, 'models/005930/model_20180318093401.h5')
+    if stock_code == '000660':  # SK하이닉스
+        min_trading_unit = 10
+        max_trading_unit = 10
+        delayed_reward_threshold = .05
+        model_path = os.path.join(settings.BASE_DIR, 'models/000660/model_20180318105259.h5')
+    if stock_code == '005380':  # 현대차
+        min_trading_unit = 5
+        max_trading_unit = 5
+        delayed_reward_threshold = .02
+        model_path = os.path.join(settings.BASE_DIR, 'models/005380/model_20180328005205.h5')
+    if stock_code == '051910':  # LG화학
+        min_trading_unit = 1
+        max_trading_unit = 1
+        delayed_reward_threshold = .05
+        model_path = os.path.join(settings.BASE_DIR, 'models/051910/model_20180318020318.h5')
+    if stock_code == '035420':  # NAVER
+        min_trading_unit = 1
+        max_trading_unit = 1
+        delayed_reward_threshold = .05
+        model_path = os.path.join(settings.BASE_DIR, 'models/035420/model_20180318143434.h5')
+    if stock_code == '015760':  # 한국전력
+        min_trading_unit = 10
+        max_trading_unit = 10
+        model_path = os.path.join(settings.BASE_DIR, 'models/015760/model_20180318032850.h5')
+    if stock_code == '030200':  # KT
+        min_trading_unit = 20
+        max_trading_unit = 20
+        model_path = os.path.join(settings.BASE_DIR, 'models/030200/model_20180318001555.h5')
+    if stock_code == '035250':  # 강원랜드
+        min_trading_unit = 30
+        max_trading_unit = 30
+        model_path = os.path.join(settings.BASE_DIR, 'models/035250/model_20180318043300.h5')
+    if stock_code == '009240':  # 한샘 x
+        min_trading_unit = 5
+        max_trading_unit = 5
+        model_path = os.path.join(settings.BASE_DIR, 'models/009240/model_20180318035122.h5')
+
+    # 학습
+    # policy_learner = PolicyLearner(
+    #     stock_code=stock_code, chart_data=training_chart_data, training_data=training_data,
+    #     min_trading_unit=min_trading_unit, max_trading_unit=max_trading_unit,
+    #     delayed_reward_threshold=delayed_reward_threshold, lr=.0001)
+    # policy_learner.fit(balance=10000000, num_epoches=1000,
+    #                    discount_factor=0, start_epsilon=start_epsilon)
+    #
+    # # 정책 신경망을 파일로 저장
+    # model_dir = os.path.join(settings.BASE_DIR, 'models/%s' % stock_code)
+    # if not os.path.isdir(model_dir):
+    #     os.makedirs(model_dir)
+    # model_path = os.path.join(model_dir, 'model_%s.h5' % timestr)
+    # policy_learner.policy_network.save_model(model_path)
+
+    # 테스팅
+    policy_learner = PolicyLearner(
+        stock_code=stock_code, chart_data=testing_chart_data, training_data=testing_data,
+        min_trading_unit=min_trading_unit, max_trading_unit=max_trading_unit)
+    policy_learner.trade(model_path, balance=10000000)
+
+
+if __name__ == '__main__':
+    list_stock_code = [
+        '005930',  # 삼성전자 ok
+        '000660',  # SK하이닉스 ok
+        '005380',  # 현대차 ok
+        '051910',  # LG화학 ok
+        '035420',  # NAVER ok
+        # '015760',  # 한국전력
+        '030200',  # KT ok
+        # '035250',  # 강원랜드
+        # '009240',  # 한샘
+    ]
+
+    for stock_code in list_stock_code:
+        # 로그 기록
+        log_dir = os.path.join(settings.BASE_DIR, 'logs/%s' % stock_code)
+        timestr = settings.get_time_str()
+        file_handler = logging.FileHandler(filename=os.path.join(
+            log_dir, "%s_%s.log" % (stock_code, timestr)), encoding='utf-8')
+        stream_handler = logging.StreamHandler()
+        file_handler.setLevel(logging.DEBUG)
+        stream_handler.setLevel(logging.INFO)
+        logging.basicConfig(format="%(message)s",
+            handlers=[file_handler, stream_handler], level=logging.DEBUG)
+
+        # 주식 데이터 준비
+        chart_data = data_manager.load_chart_data(
+            os.path.join(settings.BASE_DIR,
+                         'chart_data/{}.csv'.format(stock_code)))
+        prep_data = data_manager.preprocess(chart_data)
+        training_data = data_manager.build_training_data(prep_data)
+
+        train(stock_code, training_data)
diff --git a/_main_notraining.py b/_main_notraining.py
@@ -0,0 +1,58 @@
+import logging
+import os
+import settings
+import data_manager
+from policy_learner import PolicyLearner
+
+
+if __name__ == '__main__':
+    stock_code = '005930'  # 삼성전자
+    model_ver = '20180202000545'
+
+    # 로그 기록
+    log_dir = os.path.join(settings.BASE_DIR, 'logs/%s' % stock_code)
+    timestr = settings.get_time_str()
+    file_handler = logging.FileHandler(filename=os.path.join(
+        log_dir, "%s_%s.log" % (stock_code, timestr)), encoding='utf-8')
+    stream_handler = logging.StreamHandler()
+    file_handler.setLevel(logging.DEBUG)
+    stream_handler.setLevel(logging.INFO)
+    logging.basicConfig(format="%(message)s",
+        handlers=[file_handler, stream_handler], level=logging.DEBUG)
+
+    # 주식 데이터 준비
+    chart_data = data_manager.load_chart_data(
+        os.path.join(settings.BASE_DIR,
+                     'data/chart_data/{}.csv'.format(stock_code)))
+    prep_data = data_manager.preprocess(chart_data)
+    training_data = data_manager.build_training_data(prep_data)
+
+    # 기간 필터링
+    training_data = training_data[(training_data['date'] >= '2018-01-01') &
+                                  (training_data['date'] <= '2018-01-31')]
+    training_data = training_data.dropna()
+
+    # 차트 데이터 분리
+    features_chart_data = ['date', 'open', 'high', 'low', 'close', 'volume']
+    chart_data = training_data[features_chart_data]
+
+    # 학습 데이터 분리
+    features_training_data = [
+        'open_lastclose_ratio', 'high_close_ratio', 'low_close_ratio',
+        'close_lastclose_ratio', 'volume_lastvolume_ratio',
+        'close_ma5_ratio', 'volume_ma5_ratio',
+        'close_ma10_ratio', 'volume_ma10_ratio',
+        'close_ma20_ratio', 'volume_ma20_ratio',
+        'close_ma60_ratio', 'volume_ma60_ratio',
+        'close_ma120_ratio', 'volume_ma120_ratio'
+    ]
+    training_data = training_data[features_training_data]
+
+    # 비 학습 투자 시뮬레이션 시작
+    policy_learner = PolicyLearner(
+        stock_code=stock_code, chart_data=chart_data, training_data=training_data,
+        min_trading_unit=1, max_trading_unit=3)
+    policy_learner.trade(balance=10000000,
+                         model_path=os.path.join(
+                             settings.BASE_DIR,
+                             'models/{}/model_{}.h5'.format(stock_code, model_ver)))
diff --git a/agent.py b/agent.py
@@ -0,0 +1,168 @@
+import numpy as np
+
+
+class Agent:
+    # 에이전트 상태가 구성하는 값 개수
+    STATE_DIM = 2  # 주식 보유 비율, 포트폴리오 가치 비율
+
+    # 매매 수수료 및 세금
+    TRADING_CHARGE = 0  # 거래 수수료 미고려 (일반적으로 0.015%)
+    TRADING_TAX = 0  # 거래세 미고려 (실제0.3%)
+
+    # 행동
+    ACTION_BUY = 0  # 매수
+    ACTION_SELL = 1  # 매도
+    ACTION_HOLD = 2  # 홀딩
+    ACTIONS = [ACTION_BUY, ACTION_SELL]  # 인공 신경망에서 확률을 구할 행동들
+    NUM_ACTIONS = len(ACTIONS)  # 인공 신경망에서 고려할 출력값의 개수
+
+    def __init__(
+        self, environment, min_trading_unit=1, max_trading_unit=2, 
+        delayed_reward_threshold=.05):
+        # Environment 객체
+        self.environment = environment  # 현재 주식 가격을 가져오기 위해 환경 참조
+
+        # 최소 매매 단위, 최대 매매 단위, 지연보상 임계치
+        self.min_trading_unit = min_trading_unit  # 최소 단일 거래 단위
+        self.max_trading_unit = max_trading_unit  # 최대 단일 거래 단위
+        self.delayed_reward_threshold = delayed_reward_threshold  # 지연보상 임계치
+
+        # Agent 클래스의 속성
+        self.initial_balance = 0  # 초기 자본금
+        self.balance = 0  # 현재 현금 잔고
+        self.num_stocks = 0  # 보유 주식 수
+        self.portfolio_value = 0  # balance + num_stocks * {현재 주식 가격}
+        self.base_portfolio_value = 0  # 직전 학습 시점의 PV
+        self.num_buy = 0  # 매수 횟수
+        self.num_sell = 0  # 매도 횟수
+        self.num_hold = 0  # 홀딩 횟수
+        self.immediate_reward = 0  # 즉시 보상
+
+        # Agent 클래스의 상태
+        self.ratio_hold = 0  # 주식 보유 비율
+        self.ratio_portfolio_value = 0  # 포트폴리오 가치 비율
+
+    def reset(self):
+        self.balance = self.initial_balance
+        self.num_stocks = 0
+        self.portfolio_value = self.initial_balance
+        self.base_portfolio_value = self.initial_balance
+        self.num_buy = 0
+        self.num_sell = 0
+        self.num_hold = 0
+        self.immediate_reward = 0
+        self.ratio_hold = 0
+        self.ratio_portfolio_value = 0
+
+    def set_balance(self, balance):
+        self.initial_balance = balance
+
+    def get_states(self):
+        self.ratio_hold = self.num_hold / int(
+            self.portfolio_value / self.environment.get_price())
+        self.ratio_portfolio_value = self.portfolio_value / self.base_portfolio_value
+        return (
+            self.ratio_hold,
+            self.ratio_portfolio_value
+        )
+
+    def decide_action(self, policy_network, sample, epsilon):
+        confidence = 0.
+        # 탐험 결정
+        if np.random.rand() < epsilon:
+            exploration = True
+            action = np.random.randint(self.NUM_ACTIONS)  # 무작위로 행동 결정
+        else:
+            exploration = False
+            probs = policy_network.predict(sample)  # 각 행동에 대한 확률
+            action = np.argmax(probs)
+            confidence = 1 + probs[action]
+        return action, confidence, exploration
+
+    def validate_action(self, action):
+        validity = True
+        if action == Agent.ACTION_BUY:
+            # 적어도 1주를 살 수 있는지 확인
+            if self.balance < self.environment.get_price() * (
+                1 + self.TRADING_CHARGE) * self.min_trading_unit:
+                validity = False
+        elif action == Agent.ACTION_SELL:
+            # 주식 잔고가 있는지 확인 
+            if self.num_stocks <= 0:
+                validity = False
+        return validity
+
+    def decide_trading_unit(self, confidence):
+        if np.isnan(confidence):
+            return self.min_trading_unit
+        added_traiding = max(min(
+            int(confidence * (self.max_trading_unit - self.min_trading_unit)),
+            self.max_trading_unit-self.min_trading_unit
+        ), 0)
+        return self.min_trading_unit + added_traiding
+
+    def act(self, action, confidence):
+        if not self.validate_action(action):
+            action = Agent.ACTION_HOLD
+
+        # 환경에서 현재 가격 얻기
+        curr_price = self.environment.get_price()
+
+        # 즉시 보상 초기화
+        self.immediate_reward = 0
+
+        # 매수
+        if action == Agent.ACTION_BUY:
+            # 매수할 단위를 판단
+            trading_unit = self.decide_trading_unit(confidence)
+            balance = self.balance - curr_price * (1 + self.TRADING_CHARGE) * trading_unit
+            # 보유 현금이 모자랄 경우 보유 현금으로 가능한 만큼 최대한 매수
+            if balance < 0:
+                trading_unit = max(min(
+                    int(self.balance / (
+                        curr_price * (1 + self.TRADING_CHARGE))), self.max_trading_unit),
+                    self.min_trading_unit
+                )
+            # 수수료를 적용하여 총 매수 금액 산정
+            invest_amount = curr_price * (1 + self.TRADING_CHARGE) * trading_unit
+            self.balance -= invest_amount  # 보유 현금을 갱신
+            self.num_stocks += trading_unit  # 보유 주식 수를 갱신
+            self.num_buy += 1  # 매수 횟수 증가
+
+        # 매도
+        elif action == Agent.ACTION_SELL:
+            # 매도할 단위를 판단
+            trading_unit = self.decide_trading_unit(confidence)
+            # 보유 주식이 모자랄 경우 가능한 만큼 최대한 매도
+            trading_unit = min(trading_unit, self.num_stocks)
+            # 매도
+            invest_amount = curr_price * (
+                1 - (self.TRADING_TAX + self.TRADING_CHARGE)) * trading_unit
+            self.num_stocks -= trading_unit  # 보유 주식 수를 갱신
+            self.balance += invest_amount  # 보유 현금을 갱신
+            self.num_sell += 1  # 매도 횟수 증가
+
+        # 홀딩
+        elif action == Agent.ACTION_HOLD:
+            self.num_hold += 1  # 홀딩 횟수 증가
+
+        # 포트폴리오 가치 갱신
+        self.portfolio_value = self.balance + curr_price * self.num_stocks
+        profitloss = (
+            (self.portfolio_value - self.base_portfolio_value) / self.base_portfolio_value)
+
+        # 즉시 보상 판단
+        self.immediate_reward = 1 if profitloss >= 0 else -1
+
+        # 지연 보상 판단
+        if profitloss > self.delayed_reward_threshold:
+            delayed_reward = 1
+            # 목표 수익률 달성하여 기준 포트폴리오 가치 갱신
+            self.base_portfolio_value = self.portfolio_value
+        elif profitloss < -self.delayed_reward_threshold:
+            delayed_reward = -1
+            # 손실 기준치를 초과하여 기준 포트폴리오 가치 갱신
+            self.base_portfolio_value = self.portfolio_value
+        else:
+            delayed_reward = 0
+        return self.immediate_reward, delayed_reward