063 시계열 프로젝트 - 에너지 소비 예측

키워드: 에너지 예측, 전력 수요, 스마트 그리드

개요

에너지 소비 예측은 전력망 운영, 에너지 비용 최적화, 탄소 배출 감소의 핵심입니다. 이 글에서는 건물 또는 지역의 전력 소비량을 예측하는 모델을 구축합니다.

실습 환경

Python 버전: 3.11 권장
필요 패키지: flaml[automl], pandas, scikit-learn

pip install flaml[automl] pandas numpy matplotlib

에너지 데이터 생성

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from flaml import AutoML
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 063 시간별 에너지 소비 데이터 생성
np.random.seed(42)
n_hours = 365 * 24 * 2  # 2년

dates = pd.date_range('2022-01-01', periods=n_hours, freq='H')

# 063 기본 부하
base_load = 500  # kWh

# 063 시간별 패턴 (업무 시간 피크)
hour_pattern = np.zeros(n_hours)
for i, dt in enumerate(dates):
    hour = dt.hour
    if 9 <= hour <= 18:  # 업무 시간
        hour_pattern[i] = 200
    elif 6 <= hour < 9 or 18 < hour <= 22:  # 준 피크
        hour_pattern[i] = 100
    else:  # 야간
        hour_pattern[i] = -100

# 063 요일 패턴
dow_pattern = np.array([dates[i].dayofweek for i in range(n_hours)])
dow_effect = np.where(dow_pattern >= 5, -150, 0)  # 주말 감소

# 063 계절 패턴 (여름/겨울 냉난방)
seasonal = np.zeros(n_hours)
for i, dt in enumerate(dates):
    month = dt.month
    if month in [7, 8]:  # 여름 냉방
        seasonal[i] = 300
    elif month in [1, 2, 12]:  # 겨울 난방
        seasonal[i] = 250
    elif month in [4, 5, 10, 11]:  # 봄/가을
        seasonal[i] = -50

# 063 기온 효과
temp_base = 15 + 10 * np.sin(np.arange(n_hours) * 2 * np.pi / (365 * 24))
temp_daily = 5 * np.sin(np.arange(n_hours) * 2 * np.pi / 24 - np.pi/2)
temperature = temp_base + temp_daily + np.random.randn(n_hours) * 3

# 063 기온과 에너지 관계 (U자형)
temp_effect = 10 * (temperature - 20) ** 2

# 063 노이즈
noise = np.random.randn(n_hours) * 50

# 063 최종 에너지 소비
energy = base_load + hour_pattern + dow_effect + seasonal + temp_effect * 0.3 + noise
energy = np.maximum(energy, 100)  # 최소 100kWh

df = pd.DataFrame({
    'datetime': dates,
    'energy_kwh': energy,
    'temperature': temperature,
    'humidity': 50 + 20 * np.sin(np.arange(n_hours) * 2 * np.pi / (365 * 24)) + np.random.randn(n_hours) * 10
})

print("에너지 데이터:")
print(df.head())
print(f"\n데이터 shape: {df.shape}")
print(f"기간: {df['datetime'].min()} ~ {df['datetime'].max()}")
print(f"\n에너지 소비 통계:")
print(df['energy_kwh'].describe())

에너지 소비 시각화

fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# 063 일별 평균
daily = df.set_index('datetime').resample('D')['energy_kwh'].mean()
axes[0, 0].plot(daily.index, daily.values)
axes[0, 0].set_title('Daily Average Energy Consumption')
axes[0, 0].set_ylabel('kWh')

# 063 시간별 패턴
hourly = df.groupby(df['datetime'].dt.hour)['energy_kwh'].mean()
axes[0, 1].bar(hourly.index, hourly.values)
axes[0, 1].set_title('Average by Hour of Day')
axes[0, 1].set_xlabel('Hour')
axes[0, 1].set_ylabel('kWh')

# 063 요일별 패턴
dow = df.groupby(df['datetime'].dt.dayofweek)['energy_kwh'].mean()
axes[1, 0].bar(['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'], dow.values)
axes[1, 0].set_title('Average by Day of Week')

# 063 온도 vs 에너지
axes[1, 1].scatter(df['temperature'], df['energy_kwh'], alpha=0.1, s=1)
axes[1, 1].set_xlabel('Temperature (°C)')
axes[1, 1].set_ylabel('Energy (kWh)')
axes[1, 1].set_title('Temperature vs Energy')

plt.tight_layout()
plt.show()

특성 엔지니어링

def create_energy_features(df):
    """에너지 예측용 특성 생성"""
    df = df.copy()

    # 시간 특성
    df['hour'] = df['datetime'].dt.hour
    df['dayofweek'] = df['datetime'].dt.dayofweek
    df['month'] = df['datetime'].dt.month
    df['dayofyear'] = df['datetime'].dt.dayofyear
    df['weekofyear'] = df['datetime'].dt.isocalendar().week.astype(int)

    # 주기적 인코딩
    df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24)
    df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24)
    df['dow_sin'] = np.sin(2 * np.pi * df['dayofweek'] / 7)
    df['dow_cos'] = np.cos(2 * np.pi * df['dayofweek'] / 7)
    df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12)
    df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12)

    # 비즈니스 특성
    df['is_weekend'] = (df['dayofweek'] >= 5).astype(int)
    df['is_business_hour'] = ((df['hour'] >= 9) & (df['hour'] <= 18) &
                               (df['dayofweek'] < 5)).astype(int)
    df['is_peak_hour'] = ((df['hour'] >= 14) & (df['hour'] <= 17)).astype(int)

    # 계절 특성
    df['is_summer'] = df['month'].isin([6, 7, 8]).astype(int)
    df['is_winter'] = df['month'].isin([12, 1, 2]).astype(int)

    # 기온 관련 특성
    df['temp_squared'] = df['temperature'] ** 2
    df['temp_above_25'] = np.maximum(df['temperature'] - 25, 0)
    df['temp_below_10'] = np.maximum(10 - df['temperature'], 0)

    # Lag 특성
    for lag in [1, 2, 3, 24, 48, 168]:  # 1h, 2h, 3h, 1일, 2일, 1주
        df[f'energy_lag_{lag}'] = df['energy_kwh'].shift(lag)

    # 이동 평균
    for window in [3, 6, 12, 24, 168]:
        df[f'energy_rolling_mean_{window}'] = (df['energy_kwh'].shift(1)
                                                .rolling(window=window).mean())
        df[f'energy_rolling_std_{window}'] = (df['energy_kwh'].shift(1)
                                               .rolling(window=window).std())

    # 전일 동시간대
    df['energy_same_hour_yesterday'] = df['energy_kwh'].shift(24)
    df['energy_same_hour_last_week'] = df['energy_kwh'].shift(168)

    # 기온 Lag
    df['temp_lag_1'] = df['temperature'].shift(1)
    df['temp_rolling_mean_24'] = df['temperature'].shift(1).rolling(24).mean()

    return df

df_features = create_energy_features(df)
df_features = df_features.dropna()

print("특성 생성 후:")
print(f"  데이터 shape: {df_features.shape}")
print(f"  특성 수: {df_features.shape[1]}")

모델 학습

# 063 특성 컬럼
feature_cols = [col for col in df_features.columns
                if col not in ['datetime', 'energy_kwh']]

# 063 시간 기반 분할
train_end = df_features['datetime'].max() - pd.Timedelta(days=30)
train = df_features[df_features['datetime'] <= train_end]
test = df_features[df_features['datetime'] > train_end]

X_train = train[feature_cols]
y_train = train['energy_kwh']
X_test = test[feature_cols]
y_test = test['energy_kwh']

print(f"학습: {len(train)}개, 테스트: {len(test)}개")
print(f"특성 수: {len(feature_cols)}")

# 063 FLAML 학습
automl = AutoML()
automl.fit(
    X_train, y_train,
    task="regression",
    time_budget=180,
    metric="mae",
    split_type="time",
    verbose=1
)

y_pred = automl.predict(X_test)

print(f"\n최적 모델: {automl.best_estimator}")
print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f} kWh")
print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f} kWh")
print(f"R²: {r2_score(y_test, y_pred):.4f}")
print(f"MAPE: {np.mean(np.abs((y_test - y_pred) / y_test)) * 100:.2f}%")

예측 결과 분석

시간대별 성능

test_with_pred = test.copy()
test_with_pred['predicted'] = y_pred
test_with_pred['error'] = test_with_pred['energy_kwh'] - test_with_pred['predicted']
test_with_pred['abs_error'] = np.abs(test_with_pred['error'])

# 063 시간대별 MAE
hourly_mae = test_with_pred.groupby('hour')['abs_error'].mean()

plt.figure(figsize=(12, 5))
plt.bar(hourly_mae.index, hourly_mae.values)
plt.xlabel('Hour of Day')
plt.ylabel('MAE (kWh)')
plt.title('Prediction Error by Hour')
plt.xticks(range(24))
plt.tight_layout()
plt.show()

print("시간대별 MAE:")
print(f"  피크 시간 (14-17시): {hourly_mae.iloc[14:18].mean():.2f} kWh")
print(f"  오프피크 (22-06시): {hourly_mae.iloc[list(range(22,24)) + list(range(0,7))].mean():.2f} kWh")

예측 시각화

# 063 일주일 예측 상세
week_data = test_with_pred.iloc[:168]

fig, axes = plt.subplots(2, 1, figsize=(14, 10))

# 063 실제 vs 예측
axes[0].plot(week_data['datetime'], week_data['energy_kwh'],
             label='Actual', linewidth=2)
axes[0].plot(week_data['datetime'], week_data['predicted'],
             label='Predicted', linewidth=2, alpha=0.8)
axes[0].fill_between(week_data['datetime'],
                     week_data['predicted'] - 2*week_data['abs_error'].std(),
                     week_data['predicted'] + 2*week_data['abs_error'].std(),
                     alpha=0.2, label='±2σ')
axes[0].set_title('Energy Consumption: Actual vs Predicted (1 Week)')
axes[0].set_ylabel('kWh')
axes[0].legend()

# 063 예측 오차
axes[1].bar(week_data['datetime'], week_data['error'], width=0.04)
axes[1].axhline(y=0, color='black', linestyle='-', linewidth=0.5)
axes[1].set_title('Prediction Error')
axes[1].set_ylabel('Error (kWh)')

plt.tight_layout()
plt.show()

특성 중요도

if hasattr(automl.best_model, 'feature_importances_'):
    importance = automl.best_model.feature_importances_
    importance_df = pd.DataFrame({
        'feature': feature_cols,
        'importance': importance
    }).sort_values('importance', ascending=False)

    plt.figure(figsize=(10, 10))
    top_n = 20
    plt.barh(range(top_n), importance_df['importance'].iloc[:top_n][::-1])
    plt.yticks(range(top_n), importance_df['feature'].iloc[:top_n][::-1])
    plt.xlabel('Importance')
    plt.title(f'Top {top_n} Feature Importance')
    plt.tight_layout()
    plt.show()

    print("상위 10개 중요 특성:")
    print(importance_df.head(10).to_string(index=False))

비즈니스 활용

피크 수요 예측

def predict_peak_demand(model, df, date, feature_cols):
    """특정 날짜의 피크 수요 예측"""
    day_data = df[df['datetime'].dt.date == pd.to_datetime(date).date()]

    if len(day_data) == 0:
        return None

    predictions = model.predict(day_data[feature_cols])

    peak_hour = day_data['datetime'].iloc[np.argmax(predictions)]
    peak_demand = np.max(predictions)

    return {
        'date': date,
        'peak_hour': peak_hour.hour,
        'peak_demand_kwh': peak_demand,
        'daily_total_kwh': np.sum(predictions),
        'average_kwh': np.mean(predictions)
    }

# 063 테스트 기간 피크 예측
print("피크 수요 예측 예시:")
for date in pd.date_range(test['datetime'].min(), periods=7):
    peak_info = predict_peak_demand(automl, test, date.date(), feature_cols)
    if peak_info:
        print(f"  {peak_info['date']}: 피크 {peak_info['peak_hour']}시, "
              f"{peak_info['peak_demand_kwh']:.0f} kWh")

에너지 비용 예측

def calculate_energy_cost(predictions, peak_rate=150, offpeak_rate=80):
    """시간대별 요금제 기반 비용 계산"""
    # 피크 시간: 14-17시 (여름 기준)
    peak_hours = [14, 15, 16, 17]

    total_cost = 0
    peak_consumption = 0
    offpeak_consumption = 0

    for i, (hour, kwh) in enumerate(zip(test_with_pred['hour'], predictions)):
        if hour in peak_hours:
            total_cost += kwh * peak_rate
            peak_consumption += kwh
        else:
            total_cost += kwh * offpeak_rate
            offpeak_consumption += kwh

    return {
        'total_cost': total_cost,
        'peak_consumption': peak_consumption,
        'offpeak_consumption': offpeak_consumption,
        'average_rate': total_cost / (peak_consumption + offpeak_consumption)
    }

cost = calculate_energy_cost(y_pred)
print("\n예측 기반 에너지 비용:")
print(f"  총 예상 비용: {cost['total_cost']:,.0f}원")
print(f"  피크 소비: {cost['peak_consumption']:,.0f} kWh")
print(f"  오프피크 소비: {cost['offpeak_consumption']:,.0f} kWh")
print(f"  평균 단가: {cost['average_rate']:.1f}원/kWh")

수요 반응 (Demand Response)

def identify_reduction_opportunities(predictions, threshold_percentile=90):
    """피크 저감 기회 식별"""
    threshold = np.percentile(predictions, threshold_percentile)

    opportunities = []
    for i, (dt, pred) in enumerate(zip(test_with_pred['datetime'], predictions)):
        if pred > threshold:
            opportunities.append({
                'datetime': dt,
                'predicted_kwh': pred,
                'excess_kwh': pred - threshold,
                'reduction_potential': (pred - threshold) / pred * 100
            })

    return pd.DataFrame(opportunities)

reduction_df = identify_reduction_opportunities(y_pred)
print(f"\n피크 저감 기회 ({len(reduction_df)}건):")
print(reduction_df.head(10).to_string(index=False))

total_reduction = reduction_df['excess_kwh'].sum()
print(f"\n총 잠재 저감량: {total_reduction:,.0f} kWh")

다중 건물 예측

def predict_multiple_buildings(n_buildings=5):
    """다중 건물 에너지 예측"""
    results = []

    for building_id in range(1, n_buildings + 1):
        # 건물별 스케일 팩터
        scale = 0.5 + building_id * 0.3

        building_pred = y_pred * scale
        building_actual = y_test.values * scale

        mae = mean_absolute_error(building_actual, building_pred)

        results.append({
            'building_id': building_id,
            'scale_factor': scale,
            'avg_consumption': np.mean(building_pred),
            'peak_consumption': np.max(building_pred),
            'mae': mae
        })

    return pd.DataFrame(results)

building_results = predict_multiple_buildings()
print("\n다중 건물 예측 결과:")
print(building_results.to_string(index=False))

예측 정확도 모니터링

def create_accuracy_dashboard(y_true, y_pred, timestamps):
    """예측 정확도 대시보드"""
    metrics = {
        'MAE': mean_absolute_error(y_true, y_pred),
        'RMSE': np.sqrt(mean_squared_error(y_true, y_pred)),
        'R²': r2_score(y_true, y_pred),
        'MAPE': np.mean(np.abs((y_true - y_pred) / y_true)) * 100,
        'Bias': np.mean(y_pred - y_true)
    }

    # 일별 정확도
    daily_mae = []
    for date in pd.Series(timestamps).dt.date.unique():
        mask = pd.Series(timestamps).dt.date == date
        if mask.sum() > 0:
            daily_mae.append(mean_absolute_error(y_true[mask], y_pred[mask]))

    metrics['Daily MAE Std'] = np.std(daily_mae)

    return metrics

dashboard = create_accuracy_dashboard(y_test.values, y_pred, test['datetime'].values)
print("\n예측 정확도 대시보드:")
for metric, value in dashboard.items():
    print(f"  {metric}: {value:.2f}")

정리

에너지 예측: 시간, 요일, 계절, 기온 복합 패턴
특성 엔지니어링: 주기적 인코딩, Lag 특성, 기온 비선형 효과
비즈니스 활용: 피크 예측, 비용 계산, 수요 반응
시간대별 분석: 피크/오프피크 시간대 별도 평가
다중 건물: 스케일 팩터로 확장 가능

다음 글 예고

다음 글에서는 시계열 앙상블에 대해 알아보겠습니다. 여러 시계열 모델을 결합하여 예측 성능을 향상시키는 방법을 다룹니다.

FLAML AutoML 마스터 시리즈 #063

개요​

실습 환경​

에너지 데이터 생성​

에너지 소비 시각화​

특성 엔지니어링​

모델 학습​

예측 결과 분석​

시간대별 성능​

예측 시각화​

특성 중요도​

비즈니스 활용​

피크 수요 예측​

에너지 비용 예측​

수요 반응 (Demand Response)​

다중 건물 예측​

예측 정확도 모니터링​

정리​

다음 글 예고​

개요

실습 환경

에너지 데이터 생성

에너지 소비 시각화

특성 엔지니어링

모델 학습

예측 결과 분석

시간대별 성능

예측 시각화

특성 중요도

비즈니스 활용

피크 수요 예측

에너지 비용 예측

수요 반응 (Demand Response)

다중 건물 예측

예측 정확도 모니터링

정리

다음 글 예고