Superparameter Tuning for Max Intra-day Trading Returns on Real Stock Data

Emad Ezzeldin ,Sr. DataScientist@UnitedHealthGroup

3 min readFeb 7, 2024

If you are set on a specific trading strategy that relies on forecasted values and wants to know how much lags to take backwards, gaps between lagss and target, offset and other time series factors, and model hyperparameters, I recommend the methodology below.

import pandas as pd
df = pd.read_csv ("/content/stock_min_by_min.csv")
df.head()

Datetime Close
0 2024-01-02 09:30:00-05:00 491.709991
1 2024-01-02 09:31:00-05:00 491.453400
2 2024-01-02 09:32:00-05:00 488.750000
3 2024-01-02 09:33:00-05:00 486.135010
4 2024-01-02 09:34:00-05:00 483.345001

X = df.rename ({"Close":"target"},axis=1) [["target"]]
y = df["Close"].values

import numpy as np
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin, clone
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, make_scorer
from sklearn.model_selection import RandomizedSearchCV, TimeSeriesSplit, cross_val_score
from scipy.stats import randint

class TimeSeriesFeatureCreator(BaseEstimator, TransformerMixin):
    def __init__(self, num_lags=1, gap=0):
        self.num_lags = num_lags
        self.gap = gap
        self.lags = []

    def fit(self, X, y=None):
        self.lags = [f'lag_{lag}' for lag in range(1, self.num_lags + 1)]
        return self

    def transform(self, X):
        X_transformed = X.copy()
        for lag in range(1, self.num_lags + 1):
            X_transformed[f'lag_{lag}'] = X_transformed['target'].shift(lag + self.gap)
        X_transformed.dropna(inplace=True)  # Drop rows with NaN values created by shifting
        
        return X_transformed

class CustomTimeSeriesModel(BaseEstimator):
    def __init__(self, model=RandomForestRegressor(), feature_creator=TimeSeriesFeatureCreator(), n_splits=5):
        self.model = model
        self.feature_creator = feature_creator
        self.n_splits = n_splits

    def fit(self, X, y):
        # Prepare features
        self.feature_creator.fit(X, y)
        X_transformed = self.feature_creator.transform(X)
        
        # Train the model
        self.model_ = clone(self.model)
        self.model_.fit(X_transformed.drop("target",axis=1), y[-len(X_transformed):])
        return self

    def predict(self, X):
        X_transformed = self.feature_creator.transform(X)
        return self.model_.predict(X_transformed.drop("target",axis=1))

    def score(self, X, y):
        X_transformed = self.feature_creator.transform(X)
        tscv = TimeSeriesSplit(n_splits=self.n_splits)
        scores = cross_val_score(self.model, X_transformed, y[-len(X_transformed):], cv=tscv, scoring=make_scorer(mean_squared_error, greater_is_better=False))
        return np.mean(scores)

# Define the parameter space, including n_splits
param_distributions = {
    'feature_creator__num_lags': randint(1, 10),
    'feature_creator__gap': randint(0, 3),
    'model__n_estimators': randint(100, 500),
    'model__max_depth': randint(3, 10),
    'n_splits': randint(2, 10)  # Treat n_splits as a hyperparameter
}

def automatic_trading(df, initial_capital=10000):
    # Initialize investment account
    capital = initial_capital
    shares = 0

    capital_list = [initial_capital]
    shares_list  = [0]

    for index, row in df.iterrows():
        if row['close_future_prediction'] > row['close_lag']:
            # Buy condition
            if capital > 0:
                shares += (capital  / row['close_lag'])   # Invest all capital into shares
                capital = 0

        else:
            # Sell condition
            if shares > 0:
                capital += shares * row['close_lag']  # Convert all shares back to capital
                shares = 0
        shares_list.append (shares)
        capital_list.append (capital)

        # Compound reinvestment is implicitly handled as all capital/shares are always fully invested

    # Calculate the final value (remaining capital + value of shares)
    final_value = capital + shares * df.iloc[-1]['close_lag']

    return final_value , capital_list , shares_list

from sklearn.model_selection import ParameterSampler, cross_val_predict
from sklearn.datasets import load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

n_iter = 50

# Create a ParameterSampler to sample parameter combinations
parameter_sampler = ParameterSampler(param_distributions, n_iter=n_iter, random_state=42)

# Store each combination's predictions and scores
predictions = []
scores = []
final_values = []

model = CustomTimeSeriesModel()
for parameters in parameter_sampler:
    print (parameters)
    print (parameters ["feature_creator__gap"])
    model.set_params(**parameters)
    model.fit (X,y)
    # Perform cross-validation and get predictions
   
    df1 = model.feature_creator.transform(X).copy () 
    print (df1.head())
    y_pred = model.predict (X)
    
    #score = model.score(X_transformed (df1))
    score = model.score(X,y)
    print ("prediction score:" , score )
    lags = parameters ["feature_creator__num_lags"]
    gaps = parameters ["feature_creator__gap"]
    print (df1.head())
    df1 = df1 [["target","lag_{}".format (1)]]
    
    print (y_pred)
    
    #df1  = df1.shift (lags+gaps).dropna()
    df1 ["y_pred"] = y_pred

    df1 = df1.rename ({"lag_{}".format (1):"close_lag" , "y_pred":"close_future_prediction"},axis =1)

    
    final_value , capital_list , shares_list = automatic_trading(df1, initial_capital=10000)
    final_values.append (final_value)
    parameters ["final_value"] = final_value
    predictions.append(df1)
    print (final_value)
    #scores.append(accuracy_score(y [15:], y_pred[15:]))

    
    scores.append (score)

{'feature_creator__gap': 2, 'feature_creator__num_lags': 4, 'model__max_depth': 7, 'model__n_estimators': 370, 'n_splits': 4}
        target       lag_1       lag_2       lag_3       lag_4
6   482.589996  486.135010  488.750000  491.453400  491.709991
7   483.170013  483.345001  486.135010  488.750000  491.453400
8   482.864990  484.839996  483.345001  486.135010  488.750000
9   481.690002  482.589996  484.839996  483.345001  486.135010
10  481.589996  483.170013  482.589996  484.839996  483.345001
[482.61790504 483.187212   482.83141683 ... 522.0712037  522.17596797
 522.38500054]

Final_Value ($) : 13615.996061434647



{'feature_creator__gap': 0, 'feature_creator__num_lags': 5, 'model__max_depth': 9, 'model__n_estimators': 221, 'n_splits': 4}
       target       lag_1       lag_2       lag_3       lag_4       lag_5
5  484.839996  483.345001  486.135010  488.750000  491.453400  491.709991
6  482.589996  484.839996  483.345001  486.135010  488.750000  491.453400
7  483.170013  482.589996  484.839996  483.345001  486.135010  488.750000
8  482.864990  483.170013  482.589996  484.839996  483.345001  486.135010
9  481.690002  482.864990  483.170013  482.589996  484.839996  483.345001
[484.77945843 482.59792548 483.15686104 ... 522.03398457 522.15665184
 522.39178218]

Final_Value ($): 13689.111506966563

sorted (final_values)

[11900.318454974984,
 11932.61410255757,
 .
 .
 .
 14138.894921651507,
 14151.962477070538,
 14719.504022890886,
 14729.886061307443,
 15390.447992924986,
 16458.42512150118,
 17931.075553618284]

We have a winner strategy on this stock totaling $17931.075553618284 with $10K initial capital using these super parameters:

{‘feature_creator__gap’: 0, ‘feature_creator__num_lags’: 1, ‘model__max_depth’: 6, ‘model__n_estimators’: 413, ‘n_splits’: 7}

Superparameter Tuning for Max Intra-day Trading Returns on Real Stock Data

Written by Emad Ezzeldin ,Sr. DataScientist@UnitedHealthGroup