-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmomentum.py
More file actions
345 lines (267 loc) · 11.7 KB
/
momentum.py
File metadata and controls
345 lines (267 loc) · 11.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
import yfinance as yf
import pandas as pd
import numpy as np
import ta
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import os
# Configuration
STOCKS = [
'RELIANCE.NS', 'TCS.NS', 'INFY.NS', 'HDFCBANK.NS', 'ICICIBANK.NS',
'BHARTIARTL.NS', 'BAJFINANCE.NS', 'HINDUNILVR.NS', 'HCLTECH.NS', 'MARUTI.NS'
]
START_DATE = '2015-01-01'
END_DATE = '2025-12-31'
TRAIN_END_DATE = '2022-12-31'
TEST_START_DATE = '2023-01-01'
TRANSACTION_COST = 0.001 # 10 bps (0.1%)
def fetch_data(tickers, start, end):
print("Fetching data...")
data = yf.download(tickers, start=start, end=end, group_by='ticker', auto_adjust=True)
return data
def calculate_technical_indicators(df):
df = df.copy()
if 'Close' not in df.columns:
df.columns = [c.capitalize() for c in df.columns]
# Fill missing values first
df = df.ffill().bfill()
# RSI
df['RSI'] = ta.momentum.RSIIndicator(df['Close'], window=14).rsi()
# MACD
macd = ta.trend.MACD(df['Close'])
df['MACD'] = macd.macd()
df['MACD_Signal'] = macd.macd_signal()
# Bollinger Bands
bollinger = ta.volatility.BollingerBands(df['Close'])
df['BB_High'] = bollinger.bollinger_hband()
df['BB_Low'] = bollinger.bollinger_lband()
# ATR
df['ATR'] = ta.volatility.AverageTrueRange(df['High'], df['Low'], df['Close']).average_true_range()
# SMA / EMA
df['SMA_50'] = ta.trend.SMAIndicator(df['Close'], window=50).sma_indicator()
df['EMA_20'] = ta.trend.EMAIndicator(df['Close'], window=20).ema_indicator()
# Returns for features
df['Return_1W'] = df['Close'].pct_change(5)
df['Return_1M'] = df['Close'].pct_change(21)
# Volatility
df['Volatility_1M'] = df['Close'].rolling(window=21).std()
return df
def prepare_weekly_data(data):
print("Preparing weekly aligned data...")
weekly_data = []
for ticker in STOCKS:
try:
df = data[ticker].copy()
except KeyError:
if len(STOCKS) == 1: df = data.copy()
else: continue
df = df.dropna(how='all')
df = calculate_technical_indicators(df)
# Resample to weekly (Mondays)
# Logic:
# 1. Identify Mondays (or first trading day of week)
# 2. Features = Values from previous trading day (Friday)
# 3. Target = Return from this Monday to Next Monday
# Create a 'Week_Start' column
df['Date'] = df.index
df['Week_Start'] = df['Date'].dt.to_period('W').apply(lambda r: r.start_time)
# We want to trade on the first day of the week.
# Let's get the data for each week.
# We'll group by Week_Start and take the first and last available day prices?
# Better: Resample to Weekly.
# Resample to Weekly, taking the LAST value of the week (Friday)
# This gives us the features at the end of the week.
# We will use these features to predict the return of the NEXT week.
# Trade logic: Enter on Monday Open (approx by Friday Close or Monday Close?), Hold till next Monday.
# Let's assume we trade on Monday Close.
# Features: Friday Close (t-1).
# Trade: Monday Close (t).
# Return: (Next Monday Close (t+5) / Monday Close (t)) - 1.
# Actually, simpler approach:
# Use Weekly Resampled Data (Friday to Friday).
# Predict if Next Week (Fri to Fri) is positive.
# Trade on Friday Close?
# The prompt says "Rebalance on Mondays".
# Strict implementation of "Rebalance on Mondays":
# 1. Get all dates. Filter for Mondays (or first day of week).
# 2. For each Monday date `d`:
# - Features: Data from `d-1` (Friday).
# - Buy Price: Close of `d`.
# - Sell Price: Close of `next Monday`.
# Let's build a DataFrame of "Trading Weeks".
# We need a custom calendar.
df['DayOfWeek'] = df.index.dayofweek
# 0=Mon, 4=Fri
# Filter for Rebalance Days (Mondays or first available)
# We can use `resample('W-MON')` to get the dates, then find nearest valid date?
# Let's iterate through the daily dataframe
# Identify "Rebalance Days"
# Create a list of rebalance dates
# We can just take the first date of each week in the dataset
df['YearWeek'] = df.index.strftime('%Y-%U')
rebalance_dates = df.groupby('YearWeek')['Date'].min().sort_values()
rebalance_df = pd.DataFrame({'Rebalance_Date': rebalance_dates})
rebalance_df['Ticker'] = ticker
# Get Features from the day BEFORE Rebalance_Date
# We need to look up the index in the original df
features_list = []
targets_list = []
for i in range(len(rebalance_dates) - 1):
curr_date = rebalance_dates.iloc[i]
next_date = rebalance_dates.iloc[i+1]
# Find index of curr_date in df
try:
curr_idx_loc = df.index.get_loc(curr_date)
except KeyError:
continue
if curr_idx_loc == 0:
continue
# Features from previous day (Friday)
prev_day_idx = curr_idx_loc - 1
prev_day_row = df.iloc[prev_day_idx]
# Target: Return from curr_date to next_date
# Buy at Close of curr_date
buy_price = df.loc[curr_date, 'Close']
sell_price = df.loc[next_date, 'Close']
weekly_return = (sell_price / buy_price) - 1
target = 1 if weekly_return > 0 else 0
# Collect features
feat_row = prev_day_row[['RSI', 'MACD', 'MACD_Signal', 'BB_High', 'BB_Low', 'ATR',
'SMA_50', 'EMA_20', 'Return_1W', 'Return_1M', 'Volatility_1M']].to_dict()
feat_row['Date'] = curr_date
feat_row['Ticker'] = ticker
feat_row['Target'] = target
feat_row['Weekly_Return'] = weekly_return
features_list.append(feat_row)
stock_weekly_df = pd.DataFrame(features_list)
weekly_data.append(stock_weekly_df)
full_weekly_df = pd.concat(weekly_data)
full_weekly_df = full_weekly_df.dropna()
return full_weekly_df
def train_model(train_df):
print("Training model...")
feature_cols = ['RSI', 'MACD', 'MACD_Signal', 'BB_High', 'BB_Low', 'ATR',
'SMA_50', 'EMA_20', 'Return_1W', 'Return_1M', 'Volatility_1M']
X = train_df[feature_cols]
y = train_df['Target']
# Voting Classifier
clf1 = RandomForestClassifier(n_estimators=100, random_state=42)
clf2 = LogisticRegression(random_state=42, max_iter=1000)
clf3 = GradientBoostingClassifier(n_estimators=100, random_state=42)
eclf = VotingClassifier(estimators=[('rf', clf1), ('lr', clf2), ('gb', clf3)], voting='soft')
eclf.fit(X, y)
print("Training Accuracy:", eclf.score(X, y))
return eclf, feature_cols
def backtest(model, test_df, feature_cols):
print("Running backtest...")
# Predict probabilities
X_test = test_df[feature_cols]
test_df['Prob_Positive'] = model.predict_proba(X_test)[:, 1]
# Group by Date (Rebalance Date)
dates = test_df['Date'].unique()
dates = np.sort(dates)
portfolio_returns = []
portfolio_values = [10000] # Start with 10k
records = []
for date in dates:
# Get stocks for this week
weekly_slice = test_df[test_df['Date'] == date]
# Rank by Probability
weekly_slice = weekly_slice.sort_values(by='Prob_Positive', ascending=False)
# Select Top 2
top_picks = weekly_slice.head(2)
if len(top_picks) == 0:
portfolio_returns.append(0)
continue
# Equal Weight (50% each)
# Return for this week is average of selected stocks' returns
# Apply Transaction Cost: 0.1% entry + 0.1% exit = 0.2% total per trade?
# Yes, "10 basis points (0.1%) per side". Total 0.2% per trade cycle.
raw_return = top_picks['Weekly_Return'].mean()
net_return = raw_return - (TRANSACTION_COST * 2) # Buy and Sell
portfolio_returns.append(net_return)
# Update Portfolio Value
current_val = portfolio_values[-1] * (1 + net_return)
portfolio_values.append(current_val)
# Record picks
for _, row in top_picks.iterrows():
records.append({
'Date': date,
'Ticker': row['Ticker'],
'Prob': row['Prob_Positive'],
'Weight': 0.5,
'Return': row['Weekly_Return']
})
results_df = pd.DataFrame({
'Date': dates,
'Weekly_Return': portfolio_returns,
'Portfolio_Value': portfolio_values[1:] # Align length
})
picks_df = pd.DataFrame(records)
return results_df, picks_df
def calculate_metrics(results_df):
# Daily/Weekly metrics
# We have weekly returns
returns = results_df['Weekly_Return']
# Annualized Return
# (1 + total_return)^(52 / n_weeks) - 1
total_return = (results_df['Portfolio_Value'].iloc[-1] / 10000) - 1
n_weeks = len(results_df)
annualized_return = (1 + total_return) ** (52 / n_weeks) - 1
# Annualized Volatility
# std_dev * sqrt(52)
annualized_vol = returns.std() * np.sqrt(52)
# Sharpe Ratio
# (Ann_Ret - RiskFree) / Ann_Vol. Assume RF=0 for simplicity or 5%?
# Let's use 0 as standard if not specified, or small number.
sharpe_ratio = annualized_return / annualized_vol if annualized_vol != 0 else 0
# Max Drawdown
cum_returns = (1 + returns).cumprod()
peak = cum_returns.cummax()
drawdown = (cum_returns - peak) / peak
max_drawdown = drawdown.min()
metrics = {
'Total Return': total_return,
'Annualized Return': annualized_return,
'Annualized Volatility': annualized_vol,
'Sharpe Ratio': sharpe_ratio,
'Max Drawdown': max_drawdown
}
return metrics
if __name__ == "__main__":
# 1. Fetch Data
raw_data = fetch_data(STOCKS, START_DATE, END_DATE)
# 2. Prepare Weekly Data
full_df = prepare_weekly_data(raw_data)
# 3. Split Train/Test
# Train: 2015-2022
# Test: 2023-2025
train_df = full_df[full_df['Date'] <= TRAIN_END_DATE]
test_df = full_df[full_df['Date'] >= TEST_START_DATE]
print(f"Train size: {len(train_df)}, Test size: {len(test_df)}")
# 4. Train Model
model, features = train_model(train_df)
# 5. Backtest
results, picks = backtest(model, test_df, features)
# 6. Metrics
metrics = calculate_metrics(results)
print("Performance Metrics:")
for k, v in metrics.items():
print(f"{k}: {v:.4f}")
# 7. Save Outputs
os.makedirs("outputs", exist_ok=True)
results.to_csv("outputs/portfolio_performance.csv", index=False)
picks.to_csv("outputs/weekly_picks.csv", index=False)
# 8. Plots
plt.figure(figsize=(12, 6))
plt.plot(results['Date'], results['Portfolio_Value'])
plt.title('Portfolio Equity Curve (2023-2025)')
plt.xlabel('Date')
plt.ylabel('Value')
plt.grid(True)
plt.savefig("outputs/equity_curve.png")
print("Outputs saved to 'outputs/' directory.")