StockBackTester/test_compare_backtest_methods.py

import data_manager
import strategy
import pandas as pd
import numpy as np
import config
from typing import Optional, Dict, List, Any

# 비교에 사용할 티커 수 (기본: 전체 티커). 테스트 시간이 걸릴 수 있습니다.
# 이전에는 샘플 50으로 제한했으나, 전체 비교를 위해 제한을 제거합니다.
MAX_TICKERS = None

# Load tickers
tickers = {}
if config.TARGET_MARKET in ["KR", "BOTH"]:
    tickers.update(data_manager.get_kr_tickers(config.KR_MARKET_CAP_START, config.KR_MARKET_CAP_END))
if config.TARGET_MARKET in ["US", "BOTH"]:
    tickers.update(data_manager.get_us_tickers(config.US_MARKET_CAP_START, config.US_MARKET_CAP_END))

# limit
if MAX_TICKERS is None:
    selected = list(tickers.keys())
else:
    selected = list(tickers.keys())[:MAX_TICKERS]
print('Selected tickers:', len(selected))

# Helper: load data
def prepare_data(ticker: str) -> Optional[pd.DataFrame]:
    """티커의 주가 데이터를 로드하고 전처리합니다.

    Args:
        ticker: 종목 티커 코드

    Returns:
        전처리된 데이터프레임, 로드 실패 시 None
    """
    s = tickers[ticker]
    df = data_manager.get_stock_data(ticker, (pd.to_datetime(config.BACKTEST_START_DATE) - pd.DateOffset(years=config.DATA_HISTORY_YEARS)).strftime('%Y-%m-%d'), config.BACKTEST_END_DATE)
    if df.empty:
        return None
    try:
        df['LAST_PEAK'] = strategy.compute_last_peak_series(df, config.STOPLOSS_PEAK_FIND_PERIOD)
    except (ValueError, KeyError) as e:
        print(f'LAST_PEAK fail {ticker} (데이터 부족): {e}')
        return None
    except Exception as e:
        print(f'LAST_PEAK 예상치 못한 오류 {ticker}: {type(e).__name__}: {e}')
        raise
    dfc = df.dropna()
    return dfc

# Load all data
stock_data = {}
for t in selected:
    df = prepare_data(t)
    if df is not None and len(df) > 10:
        stock_data[t] = df

print('Loaded data for', len(stock_data), 'tickers')

# Method A: current vectorized buy signals
buy_signals_history = {}
for t, df in stock_data.items():
    buy_signals_history[t] = strategy.compute_buy_signals_vectorized(df)

# Simple backtest runner used for both methods
from datetime import datetime

def run_sim(use_vectorized: bool = True) -> Dict[str, Any]:
    """백테스트 시뮬레이션을 실행합니다.

    Args:
        use_vectorized: True이면 벡터화된 신호 사용, False이면 반복적 계산

    Returns:
        백테스트 결과 딕셔너리 (total_return, trades, trade_log)
    """
    capital = config.BACKTEST_CAPITAL
    portfolio = {}
    trade_log = []
    daily_portfolio_value = []

    simulation_dates = pd.date_range(config.BACKTEST_START_DATE, config.BACKTEST_END_DATE)

    tickers_list = list(stock_data.keys())

    for today_date in simulation_dates:
        today_str = today_date.strftime('%Y-%m-%d')
        # sells
        for ticker in list(portfolio.keys()):
            df = stock_data[ticker]
            try:
                pos = df.index.get_loc(today_date)
            except KeyError:
                continue
            if pos == 0:
                continue
            today_data = df.iloc[pos]
            position = portfolio[ticker]
            position['highest_price'] = max(position['highest_price'], today_data['High'])
            sell_signal = None
            sell_ratio = 0.0
            # same inline sell logic as backtester
            if config.USE_TECHNICAL_STOPLOSS and today_data['Low'] <= position['stop_loss_price']:
                sell_signal = 'STOP_LOSS_TECH'; sell_ratio=1.0
            elif config.USE_FIXED_PCT_STOPLOSS:
                fixed_sl = position['buy_price'] * (1 - config.FIXED_STOPLOSS_PCT / 100)
                if today_data['Low'] <= fixed_sl:
                    sell_signal = 'STOP_LOSS_FIXED'; sell_ratio=1.0
            if not sell_signal and not position['partial_sold']:
                profit_target = position['buy_price'] * (1 + config.PROFIT_TAKE_PCT)
                if today_data['High'] >= profit_target:
                    sell_signal = 'PROFIT_TAKE'; sell_ratio = config.PROFIT_TAKE_SELL_RATIO
            if not sell_signal and config.USE_TREND_EXIT_STRATEGY and pos>0:
                yesterday_data = df.iloc[pos-1]
                ma_short = f'MA_{config.TREND_EXIT_MA_SHORT}'; ma_long = f'MA_{config.TREND_EXIT_MA_LONG}'
                if ma_short in df.columns and ma_long in df.columns:
                    is_dead_cross = (yesterday_data[ma_short] > yesterday_data[ma_long]) and (today_data[ma_short] < today_data[ma_long])
                    if is_dead_cross:
                        sell_signal = 'TREND_EXIT_DC'; sell_ratio = 1.0
            if today_str == '2023-12-29':
                sell_signal = 'EXPIRED'; sell_ratio = 1.0
            if sell_signal:
                sell_price = today_data['Close']
                shares_to_sell = position['shares'] * sell_ratio
                if sell_ratio < 1.0:
                    shares_to_sell = np.floor(shares_to_sell)
                if shares_to_sell <= 0:
                    continue
                sell_value = shares_to_sell * sell_price * (1 - config.TRANSACTION_FEE_PCT)
                capital += sell_value
                position['shares'] -= shares_to_sell
                profit_pct = (sell_price - position['buy_price']) / position['buy_price']
                trade_log.append({'ticker':ticker, 'entry_date': position['buy_date'], 'exit_date': today_date, 'entry_price': position['buy_price'], 'exit_price': sell_price, 'shares_sold': shares_to_sell, 'profit_pct': profit_pct, 'exit_reason': sell_signal, 'signal_type': position.get('signal_type')})
                if sell_signal == 'PROFIT_TAKE':
                    position['partial_sold'] = True
                if position['shares'] < 0.01:
                    del portfolio[ticker]
            else:
                current_val = position['shares'] * today_data['Close']
        # buys
        if len(portfolio) < config.MAX_PORTFOLIO_SIZE and capital > 0:
            investment_per_stock = config.BACKTEST_CAPITAL * config.INVESTMENT_PER_STOCK_PCT
            if capital >= investment_per_stock:
                for ticker in tickers_list:
                    if ticker in portfolio or len(portfolio) >= config.MAX_PORTFOLIO_SIZE:
                        continue
                    if ticker not in stock_data:
                        continue
                    df = stock_data[ticker]
                    try:
                        pos = df.index.get_loc(today_date)
                    except KeyError:
                        continue
                    if pos == 0:
                        continue
                    if use_vectorized:
                        if ticker not in buy_signals_history:
                            continue
                        signals_df = buy_signals_history[ticker]
                        try:
                            row = signals_df.loc[today_date]
                        except KeyError:
                            continue
                        buy_signal = row['BUY_SIGNAL']; stop_loss = row['STOP_LOSS_PRICE']; signal_type = row['SIGNAL_TYPE']
                    else:
                        today = df.iloc[pos]; yesterday = df.iloc[pos-1]; history = df.iloc[:pos+1]
                        ok, stop_loss, signal_type = strategy.check_buy_signal(today, yesterday, history)
                        buy_signal = ok
                    if today_str == '2023-12-29':
                        buy_signal = False; stop_loss = None; signal_type = None
                    if buy_signal:
                        buy_price = df.iloc[pos]['Close']
                        if stop_loss is None or stop_loss >= buy_price:
                            continue
                        shares_to_buy = (investment_per_stock * (1 - config.TRANSACTION_FEE_PCT)) / buy_price
                        shares_to_buy = np.floor(shares_to_buy)
                        if shares_to_buy<=0: continue
                        buy_value = shares_to_buy * buy_price * (1 + config.TRANSACTION_FEE_PCT)
                        if capital < buy_value: continue
                        capital -= buy_value
                        portfolio[ticker] = {'buy_price': buy_price, 'shares': shares_to_buy, 'stop_loss_price': stop_loss, 'highest_price': buy_price, 'partial_sold': False, 'buy_date': today_date, 'signal_type': signal_type}
                        if len(portfolio) >= config.MAX_PORTFOLIO_SIZE:
                            break
        # daily equity
        final = capital
        for t,p in portfolio.items():
            df = stock_data[t]
            if today_date in df.index:
                final += p['shares'] * df.loc[today_date]['Close']
        daily_portfolio_value.append((today_date, final))
    # compute stats
    df_eq = pd.DataFrame(daily_portfolio_value, columns=['date','equity']).set_index('date')
    total_return = (df_eq['equity'].iloc[-1] / config.BACKTEST_CAPITAL) - 1
    trades = len(trade_log)
    return {'total_return': total_return, 'trades': trades, 'trade_log': trade_log}

print('Running vectorized method...')
res_vec = run_sim(use_vectorized=True)
print('Vectorized:', res_vec['total_return'], 'trades=', res_vec['trades'])

print('Running iterative method...')
res_iter = run_sim(use_vectorized=False)
print('Iterative:', res_iter['total_return'], 'trades=', res_iter['trades'])

# Compare trade lists (entry/exit pairs)

vec_set = set((t['ticker'], t['entry_date'].strftime('%Y-%m-%d'), t['exit_date'].strftime('%Y-%m-%d')) for t in res_vec['trade_log'])
iter_set = set((t['ticker'], t['entry_date'].strftime('%Y-%m-%d'), t['exit_date'].strftime('%Y-%m-%d')) for t in res_iter['trade_log'])

only_vec = vec_set - iter_set
only_iter = iter_set - vec_set

print('Only in vectorized trades:', len(only_vec))
for x in list(only_vec)[:10]: print(' ', x)
print('Only in iterative trades:', len(only_iter))
for x in list(only_iter)[:10]: print(' ', x)