061 시계열 프로젝트 - 주가 예측

키워드: 주가 예측, 금융 시계열, 기술적 분석

개요

주가 예측은 가장 도전적인 시계열 문제 중 하나입니다. 이 글에서는 기술적 분석 지표를 활용한 주가 예측 모델을 구축합니다.

실습 환경

Python 버전: 3.11 권장
필요 패키지: flaml[automl], pandas, yfinance

pip install flaml[automl] pandas numpy matplotlib yfinance ta

주가 데이터 수집

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# 061 가상의 주가 데이터 생성 (실제로는 yfinance 사용 권장)
np.random.seed(42)
n_days = 1000

dates = pd.date_range(end=datetime.now(), periods=n_days, freq='B')  # 영업일

# 061 기하 브라운 운동으로 주가 시뮬레이션
returns = np.random.randn(n_days) * 0.02  # 일일 수익률
prices = 100 * np.exp(np.cumsum(returns))

# 061 OHLCV 데이터 생성
high = prices * (1 + np.random.uniform(0, 0.03, n_days))
low = prices * (1 - np.random.uniform(0, 0.03, n_days))
open_price = low + (high - low) * np.random.uniform(0.2, 0.8, n_days)
close = low + (high - low) * np.random.uniform(0.2, 0.8, n_days)
volume = np.random.uniform(1e6, 5e6, n_days)

df = pd.DataFrame({
    'date': dates,
    'open': open_price,
    'high': high,
    'low': low,
    'close': close,
    'volume': volume
})

print("주가 데이터:")
print(df.tail())
print(f"\n기간: {df['date'].min().date()} ~ {df['date'].max().date()}")

주가 시각화

fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# 061 종가 추이
axes[0].plot(df['date'], df['close'])
axes[0].set_title('Stock Price (Close)')
axes[0].set_ylabel('Price')

# 061 거래량
axes[1].bar(df['date'], df['volume'], width=1, alpha=0.7)
axes[1].set_title('Volume')
axes[1].set_ylabel('Volume')

plt.tight_layout()
plt.show()

기술적 분석 지표

이동 평균

def add_moving_averages(df, price_col='close'):
    """이동 평균 지표"""
    df = df.copy()

    # 단순 이동 평균 (SMA)
    for window in [5, 10, 20, 50, 200]:
        df[f'sma_{window}'] = df[price_col].rolling(window=window).mean()

    # 지수 이동 평균 (EMA)
    for span in [12, 26]:
        df[f'ema_{span}'] = df[price_col].ewm(span=span, adjust=False).mean()

    # 골든/데드 크로스 신호
    df['golden_cross'] = (df['sma_50'] > df['sma_200']).astype(int)
    df['price_above_sma20'] = (df[price_col] > df['sma_20']).astype(int)

    return df

df = add_moving_averages(df)
print("이동 평균 지표:")
print(df[['date', 'close', 'sma_20', 'sma_50', 'ema_12']].tail())

모멘텀 지표

def add_momentum_indicators(df, price_col='close'):
    """모멘텀 지표"""
    df = df.copy()

    # RSI (Relative Strength Index)
    delta = df[price_col].diff()
    gain = delta.where(delta > 0, 0)
    loss = -delta.where(delta < 0, 0)

    avg_gain = gain.rolling(window=14).mean()
    avg_loss = loss.rolling(window=14).mean()

    rs = avg_gain / avg_loss
    df['rsi'] = 100 - (100 / (1 + rs))

    # MACD
    ema_12 = df[price_col].ewm(span=12, adjust=False).mean()
    ema_26 = df[price_col].ewm(span=26, adjust=False).mean()
    df['macd'] = ema_12 - ema_26
    df['macd_signal'] = df['macd'].ewm(span=9, adjust=False).mean()
    df['macd_hist'] = df['macd'] - df['macd_signal']

    # Stochastic Oscillator
    low_14 = df['low'].rolling(window=14).min()
    high_14 = df['high'].rolling(window=14).max()
    df['stoch_k'] = 100 * (df[price_col] - low_14) / (high_14 - low_14)
    df['stoch_d'] = df['stoch_k'].rolling(window=3).mean()

    # Rate of Change (ROC)
    for period in [5, 10, 20]:
        df[f'roc_{period}'] = ((df[price_col] - df[price_col].shift(period)) /
                               df[price_col].shift(period)) * 100

    return df

df = add_momentum_indicators(df)
print("\n모멘텀 지표:")
print(df[['date', 'close', 'rsi', 'macd', 'stoch_k']].tail())

변동성 지표

def add_volatility_indicators(df, price_col='close'):
    """변동성 지표"""
    df = df.copy()

    # Bollinger Bands
    sma_20 = df[price_col].rolling(window=20).mean()
    std_20 = df[price_col].rolling(window=20).std()
    df['bb_upper'] = sma_20 + (std_20 * 2)
    df['bb_lower'] = sma_20 - (std_20 * 2)
    df['bb_width'] = (df['bb_upper'] - df['bb_lower']) / sma_20
    df['bb_position'] = (df[price_col] - df['bb_lower']) / (df['bb_upper'] - df['bb_lower'])

    # ATR (Average True Range)
    high_low = df['high'] - df['low']
    high_close = np.abs(df['high'] - df[price_col].shift(1))
    low_close = np.abs(df['low'] - df[price_col].shift(1))
    tr = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
    df['atr'] = tr.rolling(window=14).mean()

    # Historical Volatility
    df['returns'] = df[price_col].pct_change()
    df['volatility_20'] = df['returns'].rolling(window=20).std() * np.sqrt(252)

    return df

df = add_volatility_indicators(df)
print("\n변동성 지표:")
print(df[['date', 'close', 'bb_upper', 'bb_lower', 'atr', 'volatility_20']].tail())

거래량 지표

def add_volume_indicators(df, price_col='close', volume_col='volume'):
    """거래량 지표"""
    df = df.copy()

    # Volume Moving Average
    df['volume_sma_20'] = df[volume_col].rolling(window=20).mean()
    df['volume_ratio'] = df[volume_col] / df['volume_sma_20']

    # On-Balance Volume (OBV)
    obv = [0]
    for i in range(1, len(df)):
        if df[price_col].iloc[i] > df[price_col].iloc[i-1]:
            obv.append(obv[-1] + df[volume_col].iloc[i])
        elif df[price_col].iloc[i] < df[price_col].iloc[i-1]:
            obv.append(obv[-1] - df[volume_col].iloc[i])
        else:
            obv.append(obv[-1])
    df['obv'] = obv

    # Volume Price Trend
    df['vpt'] = (df[price_col].pct_change() * df[volume_col]).cumsum()

    return df

df = add_volume_indicators(df)

타겟 변수 생성

# 061 예측 타겟: 다음 날 수익률 또는 방향
def create_target(df, price_col='close', horizon=1):
    """타겟 변수 생성"""
    df = df.copy()

    # 다음 날 수익률
    df['target_return'] = df[price_col].pct_change(horizon).shift(-horizon)

    # 다음 날 방향 (상승=1, 하락=0)
    df['target_direction'] = (df['target_return'] > 0).astype(int)

    # 다음 날 종가
    df['target_price'] = df[price_col].shift(-horizon)

    return df

df = create_target(df, horizon=1)
print("\n타겟 변수:")
print(df[['date', 'close', 'target_return', 'target_direction', 'target_price']].tail(10))

FLAML 모델 학습

데이터 준비

from flaml import AutoML
from sklearn.metrics import mean_absolute_error, accuracy_score, r2_score

# 061 결측치 제거
df_clean = df.dropna()

# 061 특성 선택
feature_cols = [col for col in df_clean.columns
                if col not in ['date', 'open', 'high', 'low', 'close', 'volume',
                              'target_return', 'target_direction', 'target_price', 'returns']]

# 061 학습/테스트 분할 (시간 기반)
train_size = int(len(df_clean) * 0.8)
train = df_clean.iloc[:train_size]
test = df_clean.iloc[train_size:]

X_train = train[feature_cols]
y_train_reg = train['target_return']  # 회귀
y_train_clf = train['target_direction']  # 분류

X_test = test[feature_cols]
y_test_reg = test['target_return']
y_test_clf = test['target_direction']

print(f"학습 데이터: {X_train.shape}")
print(f"테스트 데이터: {X_test.shape}")
print(f"특성 수: {len(feature_cols)}")

수익률 예측 (회귀)

# 061 회귀 모델
automl_reg = AutoML()
automl_reg.fit(
    X_train, y_train_reg,
    task="regression",
    time_budget=120,
    metric="mae",
    split_type="time",
    n_splits=5,
    verbose=1
)

y_pred_reg = automl_reg.predict(X_test)

print(f"\n회귀 모델: {automl_reg.best_estimator}")
print(f"MAE: {mean_absolute_error(y_test_reg, y_pred_reg):.6f}")
print(f"R²: {r2_score(y_test_reg, y_pred_reg):.4f}")

방향 예측 (분류)

# 061 분류 모델
automl_clf = AutoML()
automl_clf.fit(
    X_train, y_train_clf,
    task="classification",
    time_budget=120,
    metric="accuracy",
    split_type="time",
    n_splits=5,
    verbose=1
)

y_pred_clf = automl_clf.predict(X_test)

print(f"\n분류 모델: {automl_clf.best_estimator}")
print(f"Accuracy: {accuracy_score(y_test_clf, y_pred_clf):.4f}")

# 061 분류 리포트
from sklearn.metrics import classification_report
print("\n분류 리포트:")
print(classification_report(y_test_clf, y_pred_clf, target_names=['Down', 'Up']))

백테스팅

def simple_backtest(df_test, predictions, initial_capital=10000):
    """간단한 백테스팅"""
    capital = initial_capital
    position = 0  # 0: 현금, 1: 주식 보유
    trades = []

    for i in range(len(predictions)):
        price = df_test['close'].iloc[i]
        pred = predictions[i]

        if pred == 1 and position == 0:  # 매수 신호
            shares = capital / price
            position = 1
            trades.append({'type': 'buy', 'price': price, 'shares': shares})
            capital = 0

        elif pred == 0 and position == 1:  # 매도 신호
            capital = shares * price
            position = 0
            trades.append({'type': 'sell', 'price': price, 'capital': capital})

    # 마지막 포지션 정리
    if position == 1:
        capital = shares * df_test['close'].iloc[-1]

    return capital, trades

# 061 백테스팅 실행
final_capital, trades = simple_backtest(test, y_pred_clf)
buy_hold_capital = 10000 * (test['close'].iloc[-1] / test['close'].iloc[0])

print("\n백테스팅 결과:")
print(f"  전략 수익률: {(final_capital / 10000 - 1) * 100:.2f}%")
print(f"  Buy & Hold 수익률: {(buy_hold_capital / 10000 - 1) * 100:.2f}%")
print(f"  총 거래 횟수: {len(trades)}")

결과 시각화

fig, axes = plt.subplots(3, 1, figsize=(14, 12))

# 061 주가와 예측
axes[0].plot(test['date'].values, test['close'].values, label='Actual', linewidth=2)
axes[0].scatter(test['date'].values[y_pred_clf == 1],
               test['close'].values[y_pred_clf == 1],
               c='green', marker='^', s=30, label='Buy Signal', alpha=0.5)
axes[0].scatter(test['date'].values[y_pred_clf == 0],
               test['close'].values[y_pred_clf == 0],
               c='red', marker='v', s=30, label='Sell Signal', alpha=0.5)
axes[0].set_title('Stock Price with Predictions')
axes[0].legend()

# 061 수익률 예측 vs 실제
axes[1].plot(y_test_reg.values, label='Actual Return', alpha=0.7)
axes[1].plot(y_pred_reg, label='Predicted Return', alpha=0.7)
axes[1].axhline(y=0, color='black', linestyle='--')
axes[1].set_title('Return Prediction')
axes[1].legend()

# 061 누적 수익률
cumulative_strategy = np.cumprod(1 + y_test_reg.values * y_pred_clf) - 1
cumulative_bh = np.cumprod(1 + y_test_reg.values) - 1
axes[2].plot(cumulative_strategy, label='Strategy')
axes[2].plot(cumulative_bh, label='Buy & Hold')
axes[2].set_title('Cumulative Returns')
axes[2].legend()

plt.tight_layout()
plt.show()

주의사항

warnings = {
    '주의': ['과적합', '거래 비용', '슬리피지', '시장 충격', '규제'],
    '설명': [
        '과거 성과 ≠ 미래 성과',
        '수수료, 세금 미반영',
        '주문과 체결 가격 차이',
        '대량 주문의 가격 영향',
        '알고리즘 거래 규제'
    ],
    '대응': [
        'Walk-forward 검증',
        '거래 비용 시뮬레이션',
        '현실적 슬리피지 가정',
        '포지션 크기 제한',
        '법률 검토'
    ]
}

print("\n주가 예측 주의사항:")
print(pd.DataFrame(warnings).to_string(index=False))

정리

기술적 지표: SMA, EMA, RSI, MACD, 볼린저 밴드 등
타겟 변수: 수익률 (회귀) 또는 방향 (분류)
시간 기반 분할: 미래 데이터 누출 방지
백테스팅: 전략 성과 검증
한계: 시장 효율성, 거래 비용, 과적합 위험

다음 글 예고

다음 글에서는 시계열 프로젝트 - 수요 예측에 대해 알아보겠습니다. 제품 수요를 예측하는 비즈니스 시계열 프로젝트를 진행합니다.

FLAML AutoML 마스터 시리즈 #061

개요​

실습 환경​

주가 데이터 수집​

주가 시각화​

기술적 분석 지표​

이동 평균​

모멘텀 지표​

변동성 지표​

거래량 지표​

타겟 변수 생성​

FLAML 모델 학습​

데이터 준비​

수익률 예측 (회귀)​

방향 예측 (분류)​

백테스팅​

결과 시각화​

주의사항​

정리​

다음 글 예고​

개요

실습 환경

주가 데이터 수집

주가 시각화

기술적 분석 지표

이동 평균

모멘텀 지표

변동성 지표

거래량 지표

타겟 변수 생성

FLAML 모델 학습

데이터 준비

수익률 예측 (회귀)

방향 예측 (분류)

백테스팅

결과 시각화

주의사항

정리

다음 글 예고