Superparameter Tuning for Max Intra-day Trading Returns on Real Stock Data

--

If you are set on a specific trading strategy that relies on forecasted values and wants to know how much lags to take backwards, gaps between lagss and target, offset and other time series factors, and model hyperparameters, I recommend the methodology below.

import pandas as pd
df = pd.read_csv ("/content/stock_min_by_min.csv")
df.head()

Datetime Close
0 2024-01-02 09:30:00-05:00 491.709991
1 2024-01-02 09:31:00-05:00 491.453400
2 2024-01-02 09:32:00-05:00 488.750000
3 2024-01-02 09:33:00-05:00 486.135010
4 2024-01-02 09:34:00-05:00 483.345001
X = df.rename ({"Close":"target"},axis=1) [["target"]]
y = df["Close"].values
import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin, clone
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit, cross_val_score
from scipy.stats import randint

class TimeSeriesFeatureCreator(BaseEstimator, TransformerMixin):
def __init__(self, num_lags=1, gap=0):
self.num_lags = num_lags
self.gap = gap
self.lags = []

def fit(self, X, y=None):
self.lags = [f'lag_{lag}' for lag in range(1, self.num_lags + 1)]
return self

def transform(self, X):
X_transformed = X.copy()
for lag in range(1, self.num_lags + 1):
X_transformed[f'lag_{lag}'] = X_transformed['target'].shift(lag + self.gap)
X_transformed.dropna(inplace=True) # Drop rows with NaN values created by shifting

return X_transformed

class CustomTimeSeriesModel(BaseEstimator):
def __init__(self, model=RandomForestRegressor(), feature_creator=TimeSeriesFeatureCreator(), n_splits=5):
self.model = model
self.feature_creator = feature_creator
self.n_splits = n_splits

def fit(self, X, y):
# Prepare features
self.feature_creator.fit(X, y)
X_transformed = self.feature_creator.transform(X)

# Train the model
self.model_ = clone(self.model)
self.model_.fit(X_transformed.drop("target",axis=1), y[-len(X_transformed):])
return self

def predict(self, X):
X_transformed = self.feature_creator.transform(X)
return self.model_.predict(X_transformed.drop("target",axis=1))

def score(self, X, y):
X_transformed = self.feature_creator.transform(X)
tscv = TimeSeriesSplit(n_splits=self.n_splits)
scores = cross_val_score(self.model, X_transformed, y[-len(X_transformed):], cv=tscv, scoring=make_scorer(mean_squared_error, greater_is_better=False))
return np.mean(scores)

# Define the parameter space, including n_splits
param_distributions = {
'feature_creator__num_lags': randint(1, 10),
'feature_creator__gap': randint(0, 3),
'model__n_estimators': randint(100, 500),
'model__max_depth': randint(3, 10),
'n_splits': randint(2, 10) # Treat n_splits as a hyperparameter
}
def automatic_trading(df, initial_capital=10000):
# Initialize investment account
capital = initial_capital
shares = 0

capital_list = [initial_capital]
shares_list = [0]

for index, row in df.iterrows():
if row['close_future_prediction'] > row['close_lag']:
# Buy condition
if capital > 0:
shares += (capital / row['close_lag']) # Invest all capital into shares
capital = 0

else:
# Sell condition
if shares > 0:
capital += shares * row['close_lag'] # Convert all shares back to capital
shares = 0
shares_list.append (shares)
capital_list.append (capital)

# Compound reinvestment is implicitly handled as all capital/shares are always fully invested

# Calculate the final value (remaining capital + value of shares)
final_value = capital + shares * df.iloc[-1]['close_lag']

return final_value , capital_list , shares_list
from sklearn.model_selection import ParameterSampler, cross_val_predict
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

n_iter = 50

# Create a ParameterSampler to sample parameter combinations
parameter_sampler = ParameterSampler(param_distributions, n_iter=n_iter, random_state=42)

# Store each combination's predictions and scores
predictions = []
scores = []
final_values = []

model = CustomTimeSeriesModel()
for parameters in parameter_sampler:
print (parameters)
print (parameters ["feature_creator__gap"])
model.set_params(**parameters)
model.fit (X,y)
# Perform cross-validation and get predictions

df1 = model.feature_creator.transform(X).copy ()
print (df1.head())
y_pred = model.predict (X)

#score = model.score(X_transformed (df1))
score = model.score(X,y)
print ("prediction score:" , score )
lags = parameters ["feature_creator__num_lags"]
gaps = parameters ["feature_creator__gap"]
print (df1.head())
df1 = df1 [["target","lag_{}".format (1)]]

print (y_pred)

#df1 = df1.shift (lags+gaps).dropna()
df1 ["y_pred"] = y_pred

df1 = df1.rename ({"lag_{}".format (1):"close_lag" , "y_pred":"close_future_prediction"},axis =1)


final_value , capital_list , shares_list = automatic_trading(df1, initial_capital=10000)
final_values.append (final_value)
parameters ["final_value"] = final_value
predictions.append(df1)
print (final_value)
#scores.append(accuracy_score(y [15:], y_pred[15:]))


scores.append (score)
{'feature_creator__gap': 2, 'feature_creator__num_lags': 4, 'model__max_depth': 7, 'model__n_estimators': 370, 'n_splits': 4}
target lag_1 lag_2 lag_3 lag_4
6 482.589996 486.135010 488.750000 491.453400 491.709991
7 483.170013 483.345001 486.135010 488.750000 491.453400
8 482.864990 484.839996 483.345001 486.135010 488.750000
9 481.690002 482.589996 484.839996 483.345001 486.135010
10 481.589996 483.170013 482.589996 484.839996 483.345001
[482.61790504 483.187212 482.83141683 ... 522.0712037 522.17596797
522.38500054]

Final_Value ($) : 13615.996061434647



{'feature_creator__gap': 0, 'feature_creator__num_lags': 5, 'model__max_depth': 9, 'model__n_estimators': 221, 'n_splits': 4}
target lag_1 lag_2 lag_3 lag_4 lag_5
5 484.839996 483.345001 486.135010 488.750000 491.453400 491.709991
6 482.589996 484.839996 483.345001 486.135010 488.750000 491.453400
7 483.170013 482.589996 484.839996 483.345001 486.135010 488.750000
8 482.864990 483.170013 482.589996 484.839996 483.345001 486.135010
9 481.690002 482.864990 483.170013 482.589996 484.839996 483.345001
[484.77945843 482.59792548 483.15686104 ... 522.03398457 522.15665184
522.39178218]

Final_Value ($): 13689.111506966563
sorted (final_values)

[11900.318454974984,
11932.61410255757,
.
.
.
14138.894921651507,
14151.962477070538,
14719.504022890886,
14729.886061307443,
15390.447992924986,
16458.42512150118,
17931.075553618284]

We have a winner strategy on this stock totaling $17931.075553618284 with $10K initial capital using these super parameters:

{‘feature_creator__gap’: 0, ‘feature_creator__num_lags’: 1, ‘model__max_depth’: 6, ‘model__n_estimators’: 413, ‘n_splits’: 7}

--

--

Emad Ezzeldin ,Sr. DataScientist@UnitedHealthGroup

5 years Data Scientist and a MSc from George Mason University in Data Analytics. I enjoy experimenting with Data Science tools. emad.ezzeldin4@gmail.com