055 회귀 실전 - 가격 최적화

키워드: 가격, 최적화

개요

가격 최적화는 수익을 극대화하면서 수요를 유지하는 최적의 가격점을 찾는 것입니다. 이 글에서는 PyCaret 회귀 모델을 활용하여 가격 최적화를 수행합니다.

실습 환경

Python 버전: 3.11 권장
필요 패키지: pycaret[full]>=3.0

비즈니스 문제 정의

목표: 수익(Revenue = Price × Demand)을 최대화하는 최적 가격 결정 제약: 시장 점유율 유지, 경쟁사 가격 고려

데이터 준비

import pandas as pd
import numpy as np

# 055 가격-수요 데이터 시뮬레이션
np.random.seed(42)
n_samples = 3000

# 055 제품 카테고리
categories = ['Premium', 'Standard', 'Budget']

data = []
for _ in range(n_samples):
    category = np.random.choice(categories)

    # 카테고리별 기본 설정
    base_config = {
        'Premium': {'base_price': 500, 'base_demand': 100, 'elasticity': -1.5},
        'Standard': {'base_price': 200, 'base_demand': 300, 'elasticity': -2.0},
        'Budget': {'base_price': 50, 'base_demand': 800, 'elasticity': -2.5}
    }[category]

    # 가격 변동 (기본 가격의 50% ~ 150%)
    price = base_config['base_price'] * np.random.uniform(0.5, 1.5)

    # 경쟁사 가격
    competitor_price = base_config['base_price'] * np.random.uniform(0.7, 1.3)

    # 마케팅 지출
    marketing_spend = np.random.uniform(0, 10000)

    # 계절 (1-4분기)
    quarter = np.random.randint(1, 5)

    # 수요 계산 (가격 탄력성 모델)
    price_ratio = price / base_config['base_price']
    demand = base_config['base_demand'] * (price_ratio ** base_config['elasticity'])

    # 경쟁 효과
    if competitor_price < price:
        demand *= 0.8  # 경쟁사가 더 싸면 수요 감소
    elif competitor_price > price * 1.1:
        demand *= 1.2  # 경쟁사가 더 비싸면 수요 증가

    # 마케팅 효과
    demand += np.log1p(marketing_spend) * 10

    # 계절 효과
    if quarter in [4, 1]:  # 연말, 연초
        demand *= 1.3

    # 노이즈
    demand += np.random.normal(0, demand * 0.1)
    demand = max(1, demand)

    # 원가
    cost = price * np.random.uniform(0.3, 0.5)

    data.append({
        'category': category,
        'price': price,
        'competitor_price': competitor_price,
        'marketing_spend': marketing_spend,
        'quarter': quarter,
        'cost': cost,
        'demand': demand,
        'revenue': price * demand,
        'profit': (price - cost) * demand
    })

data = pd.DataFrame(data)

print(f"데이터 크기: {len(data)}")
print(f"\n카테고리별 통계:")
print(data.groupby('category')[['price', 'demand', 'revenue', 'profit']].mean())

탐색적 데이터 분석

import matplotlib.pyplot as plt
import seaborn as sns

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# 055 가격 vs 수요
axes[0, 0].scatter(data['price'], data['demand'], alpha=0.3, c=data['category'].astype('category').cat.codes)
axes[0, 0].set_xlabel('Price')
axes[0, 0].set_ylabel('Demand')
axes[0, 0].set_title('Price vs Demand')

# 055 가격 vs 수익
axes[0, 1].scatter(data['price'], data['revenue'], alpha=0.3, c=data['category'].astype('category').cat.codes)
axes[0, 1].set_xlabel('Price')
axes[0, 1].set_ylabel('Revenue')
axes[0, 1].set_title('Price vs Revenue')

# 055 카테고리별 수익 분포
data.boxplot(column='revenue', by='category', ax=axes[1, 0])
axes[1, 0].set_title('Revenue by Category')

# 055 가격 vs 이익
axes[1, 1].scatter(data['price'], data['profit'], alpha=0.3)
axes[1, 1].set_xlabel('Price')
axes[1, 1].set_ylabel('Profit')
axes[1, 1].set_title('Price vs Profit')

plt.tight_layout()
plt.savefig('price_optimization_eda.png', dpi=150)

수요 예측 모델

from pycaret.regression import *

# 055 특성 엔지니어링
data['price_ratio'] = data['price'] / data['competitor_price']
data['is_peak_season'] = data['quarter'].isin([1, 4]).astype(int)

# 055 수요 예측 모델 설정
reg = setup(
    data=data,
    target='demand',
    categorical_features=['category'],
    numeric_features=['price', 'competitor_price', 'marketing_spend',
                      'quarter', 'price_ratio'],
    ignore_features=['revenue', 'profit', 'cost'],
    session_id=42,
    verbose=False
)

# 055 모델 비교
best = compare_models(n_select=3)

# 055 최적 모델 선택 및 튜닝
xgb = create_model('xgboost', verbose=False)
tuned_model = tune_model(xgb, optimize='RMSE')

print("수요 예측 모델 성능:")
print(pull())

가격 탄력성 분석

import numpy as np

def analyze_price_elasticity(model, category, base_data, price_range):
    """가격 탄력성 분석"""

    sample = base_data[base_data['category'] == category].iloc[0:1].copy()

    results = []
    for price in price_range:
        sample_copy = sample.copy()
        sample_copy['price'] = price
        sample_copy['price_ratio'] = price / sample_copy['competitor_price'].values[0]

        pred = predict_model(model, data=sample_copy, verbose=False)
        demand = pred['prediction_label'].values[0]

        results.append({
            'price': price,
            'demand': demand,
            'revenue': price * demand
        })

    return pd.DataFrame(results)

# 055 각 카테고리별 분석
categories = ['Premium', 'Standard', 'Budget']
base_prices = {'Premium': 500, 'Standard': 200, 'Budget': 50}

plt.figure(figsize=(15, 5))

for idx, cat in enumerate(categories):
    prices = np.linspace(base_prices[cat] * 0.5, base_prices[cat] * 1.5, 50)
    results = analyze_price_elasticity(tuned_model, cat, data, prices)

    plt.subplot(1, 3, idx+1)
    plt.plot(results['price'], results['revenue'], 'b-', linewidth=2)

    # 최적 가격 표시
    optimal_idx = results['revenue'].idxmax()
    optimal_price = results.loc[optimal_idx, 'price']
    optimal_revenue = results.loc[optimal_idx, 'revenue']

    plt.axvline(x=optimal_price, color='r', linestyle='--', label=f'Optimal: ${optimal_price:.0f}')
    plt.scatter([optimal_price], [optimal_revenue], color='red', s=100, zorder=5)

    plt.xlabel('Price ($)')
    plt.ylabel('Revenue ($)')
    plt.title(f'{cat} Category')
    plt.legend()
    plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('optimal_price_by_category.png', dpi=150)

최적 가격 찾기

from scipy.optimize import minimize_scalar

def find_optimal_price(model, base_sample, price_min, price_max, objective='revenue'):
    """최적 가격 찾기"""

    def negative_revenue(price):
        sample = base_sample.copy()
        sample['price'] = price
        sample['price_ratio'] = price / sample['competitor_price'].values[0]

        pred = predict_model(model, data=sample, verbose=False)
        demand = pred['prediction_label'].values[0]

        if objective == 'revenue':
            return -price * demand
        else:  # profit
            cost = sample['cost'].values[0]
            return -(price - cost) * demand

    result = minimize_scalar(
        negative_revenue,
        bounds=(price_min, price_max),
        method='bounded'
    )

    return result.x, -result.fun

# 055 각 카테고리별 최적 가격
print("=== 최적 가격 분석 ===\n")

for cat in categories:
    sample = data[data['category'] == cat].iloc[0:1].copy()
    base_price = base_prices[cat]

    # 수익 최대화 가격
    opt_price_rev, max_revenue = find_optimal_price(
        tuned_model, sample,
        base_price * 0.5, base_price * 1.5,
        objective='revenue'
    )

    # 이익 최대화 가격
    opt_price_profit, max_profit = find_optimal_price(
        tuned_model, sample,
        base_price * 0.5, base_price * 1.5,
        objective='profit'
    )

    print(f"{cat} 카테고리:")
    print(f"  기준 가격: ${base_price}")
    print(f"  수익 최대화 가격: ${opt_price_rev:.0f} (수익: ${max_revenue:,.0f})")
    print(f"  이익 최대화 가격: ${opt_price_profit:.0f} (이익: ${max_profit:,.0f})")
    print()

가격 시나리오 분석

def scenario_analysis(model, base_data, scenarios):
    """가격 시나리오 분석"""

    results = []

    for scenario_name, price_changes in scenarios.items():
        scenario_data = base_data.copy()

        for cat, change in price_changes.items():
            mask = scenario_data['category'] == cat
            scenario_data.loc[mask, 'price'] *= (1 + change)
            scenario_data.loc[mask, 'price_ratio'] = (
                scenario_data.loc[mask, 'price'] /
                scenario_data.loc[mask, 'competitor_price']
            )

        # 예측
        pred = predict_model(model, data=scenario_data, verbose=False)
        scenario_data['predicted_demand'] = pred['prediction_label']
        scenario_data['predicted_revenue'] = scenario_data['price'] * scenario_data['predicted_demand']

        results.append({
            'Scenario': scenario_name,
            'Total Revenue': scenario_data['predicted_revenue'].sum(),
            'Avg Demand': scenario_data['predicted_demand'].mean(),
            'Premium Revenue': scenario_data[scenario_data['category']=='Premium']['predicted_revenue'].sum(),
            'Standard Revenue': scenario_data[scenario_data['category']=='Standard']['predicted_revenue'].sum(),
            'Budget Revenue': scenario_data[scenario_data['category']=='Budget']['predicted_revenue'].sum()
        })

    return pd.DataFrame(results)

# 055 시나리오 정의
scenarios = {
    'Current': {'Premium': 0, 'Standard': 0, 'Budget': 0},
    'All +10%': {'Premium': 0.1, 'Standard': 0.1, 'Budget': 0.1},
    'All -10%': {'Premium': -0.1, 'Standard': -0.1, 'Budget': -0.1},
    'Premium +20%': {'Premium': 0.2, 'Standard': 0, 'Budget': 0},
    'Budget -20%': {'Premium': 0, 'Standard': 0, 'Budget': -0.2},
    'Mixed Strategy': {'Premium': 0.15, 'Standard': -0.05, 'Budget': -0.1}
}

# 055 분석 실행
test_data = data.sample(500, random_state=42)
scenario_results = scenario_analysis(tuned_model, test_data, scenarios)

print("\n=== 가격 시나리오 분석 ===\n")
print(scenario_results.to_string(index=False))

경쟁사 가격 대응 전략

def competitive_response(model, base_sample, our_price, competitor_prices):
    """경쟁사 가격 변화에 대한 최적 대응"""

    results = []

    for comp_price in competitor_prices:
        sample = base_sample.copy()
        sample['competitor_price'] = comp_price

        # 최적 가격 찾기
        opt_price, max_rev = find_optimal_price(
            model, sample,
            our_price * 0.5, our_price * 1.5
        )

        results.append({
            'competitor_price': comp_price,
            'optimal_price': opt_price,
            'price_gap': opt_price - comp_price,
            'expected_revenue': max_rev
        })

    return pd.DataFrame(results)

# 055 프리미엄 카테고리 분석
sample = data[data['category'] == 'Premium'].iloc[0:1].copy()
competitor_prices = np.linspace(300, 700, 9)

response = competitive_response(tuned_model, sample, 500, competitor_prices)

print("\n=== 경쟁사 가격 대응 전략 (Premium) ===\n")
print(response.to_string(index=False))

# 055 시각화
plt.figure(figsize=(10, 6))
plt.plot(response['competitor_price'], response['optimal_price'], 'bo-', linewidth=2, markersize=8)
plt.plot([300, 700], [300, 700], 'r--', label='Price Match')
plt.xlabel('Competitor Price ($)')
plt.ylabel('Our Optimal Price ($)')
plt.title('Competitive Pricing Response')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('competitive_response.png', dpi=150)

동적 가격 최적화

def dynamic_pricing(model, base_data, demand_threshold, max_adjustment=0.2):
    """재고/수요 기반 동적 가격 조정"""

    results = []

    for idx, row in base_data.iterrows():
        sample = pd.DataFrame([row])

        # 현재 수요 예측
        pred = predict_model(model, data=sample, verbose=False)
        current_demand = pred['prediction_label'].values[0]

        # 수요 수준에 따른 가격 조정
        if current_demand > demand_threshold * 1.2:
            # 수요가 높으면 가격 인상
            adjustment = min(0.1, max_adjustment)
        elif current_demand < demand_threshold * 0.8:
            # 수요가 낮으면 가격 인하
            adjustment = max(-0.15, -max_adjustment)
        else:
            adjustment = 0

        new_price = row['price'] * (1 + adjustment)

        results.append({
            'original_price': row['price'],
            'current_demand': current_demand,
            'adjustment': f"{adjustment:+.0%}",
            'new_price': new_price,
            'category': row['category']
        })

    return pd.DataFrame(results)

# 055 동적 가격 조정 시뮬레이션
sample_data = data.sample(10, random_state=42)
dynamic_results = dynamic_pricing(tuned_model, sample_data, demand_threshold=150)

print("\n=== 동적 가격 조정 예시 ===\n")
print(dynamic_results.to_string(index=False))

가격 최적화 대시보드 요약

def generate_pricing_summary(model, data, categories):
    """가격 최적화 요약 리포트"""

    print("=" * 60)
    print("          가격 최적화 요약 리포트")
    print("=" * 60)

    for cat in categories:
        cat_data = data[data['category'] == cat]
        sample = cat_data.iloc[0:1].copy()

        current_price = cat_data['price'].mean()
        current_revenue = cat_data['revenue'].mean()

        # 최적 가격 찾기
        opt_price, expected_revenue = find_optimal_price(
            model, sample,
            current_price * 0.5, current_price * 1.5
        )

        improvement = (expected_revenue - current_revenue) / current_revenue * 100

        print(f"\n{cat} 카테고리:")
        print(f"  현재 평균 가격: ${current_price:.0f}")
        print(f"  권장 가격: ${opt_price:.0f}")
        print(f"  가격 변화: {(opt_price/current_price - 1)*100:+.1f}%")
        print(f"  예상 수익 개선: {improvement:+.1f}%")

    print("\n" + "=" * 60)

generate_pricing_summary(tuned_model, data, categories)

모델 저장

# 055 최종 모델
final_model = finalize_model(tuned_model)

# 055 저장
save_model(final_model, 'price_optimization_model')
print("\n모델 저장 완료: price_optimization_model.pkl")

실무 적용 팁

A/B 테스트: 최적 가격을 실제 적용 전 테스트
세분화: 고객 세그먼트별 다른 가격 전략
경쟁 모니터링: 실시간 경쟁사 가격 추적
제약 조건: 최저가 정책, 브랜드 포지셔닝 고려
시간 요소: 시간대/요일별 동적 가격 책정

정리

가격 최적화는 수요 예측 + 최적화의 조합
가격 탄력성이 핵심 개념
수익 vs 이익 최대화는 다른 가격점
경쟁사 가격에 대한 대응 전략 필요
시나리오 분석으로 리스크 파악

다음 글 예고

다음 글에서는 클러스터링의 이해를 다룹니다.

PyCaret 머신러닝 마스터 시리즈 #055

개요​

실습 환경​

비즈니스 문제 정의​

데이터 준비​

탐색적 데이터 분석​

수요 예측 모델​

가격 탄력성 분석​

최적 가격 찾기​

가격 시나리오 분석​

경쟁사 가격 대응 전략​

동적 가격 최적화​

가격 최적화 대시보드 요약​

모델 저장​

실무 적용 팁​

정리​

다음 글 예고​

개요