062 시계열 프로젝트 - 수요 예측

키워드: 수요 예측, demand forecasting, 재고 관리

개요

수요 예측은 재고 관리, 생산 계획, 인력 배치의 핵심입니다. 이 글에서는 제품 판매량 예측 모델을 구축하고 비즈니스 의사결정에 활용하는 방법을 알아봅니다.

실습 환경

Python 버전: 3.11 권장
필요 패키지: flaml[automl], pandas, scikit-learn

pip install flaml[automl] pandas numpy matplotlib

수요 데이터 생성

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from flaml import AutoML
from sklearn.metrics import mean_absolute_error, mean_squared_error

# 062 다품목 수요 데이터 생성
np.random.seed(42)
n_days = 730  # 2년
n_products = 5

dates = pd.date_range('2022-01-01', periods=n_days, freq='D')

data_list = []
for product_id in range(1, n_products + 1):
    # 제품별 기본 수요
    base_demand = 100 * product_id

    # 트렌드
    trend = np.linspace(0, 50, n_days) if product_id % 2 == 0 else np.linspace(0, -20, n_days)

    # 계절성 (제품별 다른 패턴)
    seasonality = base_demand * 0.3 * np.sin(np.arange(n_days) * 2 * np.pi / 365 + product_id)

    # 주간 패턴
    weekly = base_demand * 0.1 * np.sin(np.arange(n_days) * 2 * np.pi / 7)

    # 프로모션 효과
    promo = np.zeros(n_days)
    promo_days = np.random.choice(n_days, size=int(n_days * 0.1), replace=False)
    promo[promo_days] = np.random.uniform(0.3, 0.8, len(promo_days)) * base_demand

    # 노이즈
    noise = np.random.randn(n_days) * base_demand * 0.15

    # 최종 수요
    demand = base_demand + trend + seasonality + weekly + promo + noise
    demand = np.maximum(demand, 0).astype(int)

    for i, date in enumerate(dates):
        data_list.append({
            'date': date,
            'product_id': product_id,
            'demand': demand[i],
            'price': 10 * product_id + np.random.uniform(-1, 1),
            'promotion': 1 if i in promo_days else 0,
            'temperature': 20 + 10 * np.sin(i * 2 * np.pi / 365) + np.random.randn() * 3
        })

df = pd.DataFrame(data_list)

print("수요 데이터:")
print(df.head(10))
print(f"\n데이터 shape: {df.shape}")
print(f"제품 수: {df['product_id'].nunique()}")
print(f"기간: {df['date'].min().date()} ~ {df['date'].max().date()}")

수요 시각화

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 062 제품별 수요 추이
for product_id in df['product_id'].unique():
    product_data = df[df['product_id'] == product_id]
    axes[0, 0].plot(product_data['date'], product_data['demand'], label=f'Product {product_id}', alpha=0.7)
axes[0, 0].set_title('Demand by Product')
axes[0, 0].legend()

# 062 월별 총 수요
monthly = df.groupby(df['date'].dt.to_period('M'))['demand'].sum()
axes[0, 1].bar(range(len(monthly)), monthly.values)
axes[0, 1].set_title('Monthly Total Demand')
axes[0, 1].set_xlabel('Month')

# 062 요일별 평균 수요
daily = df.groupby(df['date'].dt.dayofweek)['demand'].mean()
axes[1, 0].bar(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], daily.values)
axes[1, 0].set_title('Average Demand by Day of Week')

# 062 프로모션 효과
promo_effect = df.groupby('promotion')['demand'].mean()
axes[1, 1].bar(['No Promo', 'Promo'], promo_effect.values)
axes[1, 1].set_title('Promotion Effect')

plt.tight_layout()
plt.show()

특성 엔지니어링

def create_demand_features(df):
    """수요 예측용 특성 생성"""
    df = df.copy()

    # 날짜 특성
    df['dayofweek'] = df['date'].dt.dayofweek
    df['month'] = df['date'].dt.month
    df['dayofyear'] = df['date'].dt.dayofyear
    df['weekofyear'] = df['date'].dt.isocalendar().week.astype(int)
    df['is_weekend'] = (df['dayofweek'] >= 5).astype(int)
    df['is_month_end'] = df['date'].dt.is_month_end.astype(int)

    # 주기적 인코딩
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)
    df['dow_sin'] = np.sin(2 * np.pi * df['dayofweek'] / 7)
    df['dow_cos'] = np.cos(2 * np.pi * df['dayofweek'] / 7)

    # 제품별 Lag 특성
    for lag in [1, 7, 14, 28]:
        df[f'demand_lag_{lag}'] = df.groupby('product_id')['demand'].shift(lag)

    # 제품별 이동 평균
    for window in [7, 14, 28]:
        df[f'demand_rolling_{window}'] = (df.groupby('product_id')['demand']
                                           .shift(1)
                                           .rolling(window=window)
                                           .mean()
                                           .reset_index(level=0, drop=True))

    # 제품별 이동 표준편차
    df['demand_rolling_std_7'] = (df.groupby('product_id')['demand']
                                   .shift(1)
                                   .rolling(window=7)
                                   .std()
                                   .reset_index(level=0, drop=True))

    # 전년 동기 (있는 경우)
    df['demand_lag_365'] = df.groupby('product_id')['demand'].shift(365)

    return df

df_features = create_demand_features(df)
df_features = df_features.dropna()

print("특성 생성 후:")
print(f"  데이터 shape: {df_features.shape}")
print(f"  특성 수: {df_features.shape[1]}")

모델 학습

단일 모델 (전체 제품)

# 062 특성 컬럼
feature_cols = [col for col in df_features.columns
                if col not in ['date', 'demand']]

# 062 시간 기반 분할
train_end_date = df_features['date'].max() - pd.Timedelta(days=60)
train = df_features[df_features['date'] <= train_end_date]
test = df_features[df_features['date'] > train_end_date]

X_train = train[feature_cols]
y_train = train['demand']
X_test = test[feature_cols]
y_test = test['demand']

print(f"학습: {len(train)}개, 테스트: {len(test)}개")

# 062 FLAML 학습
automl = AutoML()
automl.fit(
    X_train, y_train,
    task="regression",
    time_budget=120,
    metric="mae",
    split_type="time",
    verbose=1
)

y_pred = automl.predict(X_test)

print(f"\n최적 모델: {automl.best_estimator}")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}")

제품별 성능

# 062 제품별 평가
test_with_pred = test.copy()
test_with_pred['predicted'] = y_pred

product_metrics = []
for product_id in test_with_pred['product_id'].unique():
    product_data = test_with_pred[test_with_pred['product_id'] == product_id]
    mae = mean_absolute_error(product_data['demand'], product_data['predicted'])
    mape = np.mean(np.abs((product_data['demand'] - product_data['predicted']) /
                          (product_data['demand'] + 1))) * 100
    product_metrics.append({
        'product_id': product_id,
        'mae': mae,
        'mape': mape
    })

metrics_df = pd.DataFrame(product_metrics)
print("\n제품별 예측 성능:")
print(metrics_df.to_string(index=False))

예측 결과 시각화

fig, axes = plt.subplots(3, 2, figsize=(14, 12))
axes = axes.flatten()

for i, product_id in enumerate(test_with_pred['product_id'].unique()):
    if i >= len(axes):
        break

    product_data = test_with_pred[test_with_pred['product_id'] == product_id]

    axes[i].plot(product_data['date'], product_data['demand'], label='Actual', linewidth=2)
    axes[i].plot(product_data['date'], product_data['predicted'], label='Predicted', linewidth=2, alpha=0.8)
    axes[i].set_title(f'Product {product_id}')
    axes[i].legend()

axes[-1].axis('off')  # 빈 플롯 숨기기
plt.tight_layout()
plt.show()

특성 중요도

if hasattr(automl.best_model, 'feature_importances_'):
    importance = automl.best_model.feature_importances_
    importance_df = pd.DataFrame({
        'feature': feature_cols,
        'importance': importance
    }).sort_values('importance', ascending=False)

    plt.figure(figsize=(10, 8))
    plt.barh(range(15), importance_df['importance'].iloc[:15][::-1])
    plt.yticks(range(15), importance_df['feature'].iloc[:15][::-1])
    plt.xlabel('Importance')
    plt.title('Top 15 Feature Importance')
    plt.tight_layout()
    plt.show()

비즈니스 활용

안전 재고 계산

def calculate_safety_stock(forecast, forecast_error_std, service_level=0.95):
    """안전 재고 계산"""
    from scipy import stats
    z_score = stats.norm.ppf(service_level)
    safety_stock = z_score * forecast_error_std
    return safety_stock

# 062 제품별 안전 재고
for product_id in df_features['product_id'].unique():
    product_test = test_with_pred[test_with_pred['product_id'] == product_id]
    forecast_std = (product_test['demand'] - product_test['predicted']).std()
    avg_forecast = product_test['predicted'].mean()
    safety_stock = calculate_safety_stock(avg_forecast, forecast_std)

    print(f"Product {product_id}:")
    print(f"  평균 예측 수요: {avg_forecast:.0f}")
    print(f"  예측 오차 표준편차: {forecast_std:.0f}")
    print(f"  안전 재고 (95% 서비스 레벨): {safety_stock:.0f}")

발주량 결정

def calculate_order_quantity(forecast, current_stock, lead_time, safety_stock):
    """발주량 계산"""
    demand_during_lead = forecast * lead_time
    reorder_point = demand_during_lead + safety_stock

    if current_stock <= reorder_point:
        order_qty = demand_during_lead + safety_stock - current_stock
        return max(order_qty, 0)
    return 0

# 062 예시
print("\n발주량 계산 예시:")
forecast_demand = 150  # 일 평균 예측
current_stock = 500
lead_time = 7  # 리드타임 7일
safety_stock = 200

order_qty = calculate_order_quantity(forecast_demand, current_stock, lead_time, safety_stock)
print(f"  예측 수요: {forecast_demand}/일")
print(f"  현재 재고: {current_stock}")
print(f"  리드타임: {lead_time}일")
print(f"  안전 재고: {safety_stock}")
print(f"  → 발주량: {order_qty:.0f}")

예측 정확도 모니터링

def calculate_forecast_accuracy(actual, predicted):
    """예측 정확도 지표"""
    mae = mean_absolute_error(actual, predicted)
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    mape = np.mean(np.abs((actual - predicted) / (actual + 1))) * 100
    bias = np.mean(predicted - actual)  # 양수: 과대예측, 음수: 과소예측

    return {
        'MAE': mae,
        'RMSE': rmse,
        'MAPE': mape,
        'Bias': bias,
        'Bias %': bias / np.mean(actual) * 100
    }

accuracy = calculate_forecast_accuracy(y_test.values, y_pred)
print("\n예측 정확도 대시보드:")
for metric, value in accuracy.items():
    print(f"  {metric}: {value:.2f}")

미래 수요 예측

def forecast_future(model, df, feature_cols, days=30):
    """미래 수요 예측"""
    df = df.copy()
    predictions = []

    last_date = df['date'].max()

    for product_id in df['product_id'].unique():
        product_df = df[df['product_id'] == product_id].copy()

        for day in range(1, days + 1):
            future_date = last_date + pd.Timedelta(days=day)

            # 특성 생성 (간략화)
            new_row = {
                'date': future_date,
                'product_id': product_id,
                'dayofweek': future_date.dayofweek,
                'month': future_date.month,
                'is_weekend': 1 if future_date.dayofweek >= 5 else 0,
                'promotion': 0,
                'price': product_df['price'].mean(),
                'temperature': 20,  # 기본값
            }

            # Lag 특성 (가장 최근 예측값 사용)
            for lag in [1, 7, 14, 28]:
                if len(predictions) >= lag:
                    new_row[f'demand_lag_{lag}'] = predictions[-lag]['predicted']
                else:
                    new_row[f'demand_lag_{lag}'] = product_df['demand'].iloc[-lag] if lag <= len(product_df) else product_df['demand'].mean()

            predictions.append({
                'date': future_date,
                'product_id': product_id,
                'predicted': None  # 나중에 채움
            })

    return pd.DataFrame(predictions)

print("\n미래 30일 예측 (예시):")
print("  실제 구현 시 특성 생성 로직 완성 필요")

정리

수요 예측: 재고 관리, 생산 계획의 기반
다품목 모델: 제품 ID를 특성으로 포함
시간 특성: 요일, 월, 계절성 반영
Lag 특성: 과거 수요 패턴 활용
비즈니스 활용: 안전 재고, 발주량 결정
모니터링: MAPE, Bias로 예측 품질 추적

다음 글 예고

다음 글에서는 시계열 프로젝트 - 에너지 소비 예측에 대해 알아보겠습니다. 전력 수요 예측 프로젝트를 진행합니다.

FLAML AutoML 마스터 시리즈 #062

개요​

실습 환경​

수요 데이터 생성​

수요 시각화​

특성 엔지니어링​

모델 학습​

단일 모델 (전체 제품)​

제품별 성능​

예측 결과 시각화​

특성 중요도​

비즈니스 활용​

안전 재고 계산​

발주량 결정​

예측 정확도 모니터링​

미래 수요 예측​

정리​

다음 글 예고​

개요

실습 환경

수요 데이터 생성

수요 시각화

특성 엔지니어링

모델 학습

단일 모델 (전체 제품)

제품별 성능

예측 결과 시각화

특성 중요도

비즈니스 활용

안전 재고 계산

발주량 결정

예측 정확도 모니터링

미래 수요 예측

정리

다음 글 예고