Prediction, Continuous Values

Estimated reading time: 9 minutes

Continuous Values

It is often necessary to predict the future value of an ostensibly random variable or data series. Advanced machine learning techiques can be used to identify patterns in the data that might not at first be relevant by looking at the associated plots

Comparative Prediction Tasks

Predict how many times a cutomer would call customer service in the next year.
Corporate Valuation.
Salary Prediction and Recommendation.
Predict the success of a product at launch.
Predict probabilistic distribution of hourly rain using polarimetric radar measurements.
Predict the sale price at auction.
Predict census retrurn rates.
Predict Customer Value.
Predict Severity of Claims/Final Cost.
Clicks how many clicks/interest will something receives based on its charactersitcs.
House price valuations.
Predict duration for process.
Predict Perscription Volume.

Data Types	Description	Description
Categorical	Data that can be discretely classified.	Country, Exchange, Currency, Dummy Variable, State, Industry.
Continuous	Data that incrementally changes in values	Past Asset Price, Interest Rate, Competitors Price.
Stepped	Similar to continuos but changes infrequently	P/E, Quarterly Revenue,
Transformed Category	A different datatype converted to categorical.	Traded inside standard deviation - yes, no. P/E above 10 - yes, no.
Models	The prediction of additional models	ARIMA, AR, MA.

GBM

CNN

RNN

premodel

#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")

#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()

#Split Data in Three Sets:
from sklearn.model_selection import train_test_split

X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]

y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]

X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)

#Add Additional Features:
mean = X_train[col].mean()

model

LGBM

XGBoost

import lightgbm as lgbm

learning_rate = 0.8
num_leaves =128
min_data_in_leaf = 1000
feature_fraction = 0.5
bagging_freq=1000
num_boost_round = 1000
params = {"objective": "regression",
          "boosting_type": "gbdt",
          "learning_rate": learning_rate,
          "num_leaves": num_leaves,
          "feature_fraction": feature_fraction, 
          "bagging_freq": bagging_freq,
          "verbosity": 0,
          "metric": "l2_root",
          "nthread": 4,
          "subsample": 0.9
          }

    dtrain = lgbm.Dataset(X_train, y_train)
    dvalid = lgbm.Dataset(X_validate, y_test, reference=dtrain)
    bst = lgbm.train(params, dtrain, num_boost_round, valid_sets=dvalid, verbose_eval=100,early_stopping_rounds=100)
    bst.predict(X_test, num_iteration=bst.best_iteration)

import xgboost as xgb

model = xgb.XGBRegressor(
                             learning_rate=0.037, max_depth=5, 
                             min_child_weight=20, n_estimators=180,
                             reg_lambda=0.8,booster = 'gbtree',
                             subsample=0.9, silent=1,
                             nthread = -1)

model.fit(train[feature_names], target)

pred = model.predict(test[feature_names])

postmodel

#Predict:

y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)

#Assess Success of Prediction:

ROC AUC
TP/TN
F1
Confusion Matrix

#Tweak Parameters to Optimise Metrics:

#Select A new Model

#Repeat the process. 

#Final Showdown

Measure the performance of all models against the holdout set.
And pick the final model. 

premodel

#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")

#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()

#Split Data in Three Sets:
from sklearn.model_selection import train_test_split

X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]

y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]

X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)

#Add Additional Features:
mean = X_train[col].mean()

model

2D CNN

1D CNN

from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten

def create_model():
    conv = Sequential()
    conv.add(Conv2D(20, (1, 4), input_shape = PRED.shape[1:4], activation = 'relu'))
    conv.add(MaxPooling2D((1, 2)))
    conv.add(Flatten())
    conv.add(Dense(1, activation = 'sigmoid'))
    sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
    conv.compile(loss = 'mse', optimizer = sgd, metrics = ['accuracy'])
    return conv
    
model = KerasRegressor(build_fn=create_model,  batch_size = 500, epochs = 20, verbose = 1,class_weight=class_weight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten

def create_model():
    conv = Sequential()
    conv.add(Conv1D(20, 4, input_shape = PRED.shape[1:3], activation = 'relu'))
    conv.add(MaxPooling1D(2))
    conv.add(Dense(50, activation='relu'))
    conv.add(Flatten())
    conv.add(Dense(1, activation = 'sigmoid'))
    sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
    conv.compile(loss = 'mse', optimizer = sgd, metrics = ['accuracy'])
    return conv
            
model = KerasRegressor(build_fn=create_model,  batch_size = 500, epochs = 20, verbose = 1,class_weight=class_weight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

postmodel

#Predict:

y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)

#Assess Success of Prediction:

ROC AUC
TP/TN
F1
Confusion Matrix

#Tweak Parameters to Optimise Metrics:

#Select A new Model

#Repeat the process. 

#Final Showdown

Measure the performance of all models against the holdout set.
And pick the final model. 

premodel

#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")

#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()

#Split Data in Three Sets:
from sklearn.model_selection import train_test_split

X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]

y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]

X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)

#Add Additional Features:
mean = X_train[col].mean()

model

LTSM

from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten

def create_model():
    conv = Sequential()
    conv.add(Conv2D(20, (1, 4), input_shape = PRED.shape[1:4], activation = 'relu'))
    conv.add(MaxPooling2D((1, 2)))
    conv.add(Flatten())
    conv.add(Dense(1, activation = 'sigmoid'))
    sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
    conv.compile(loss = 'mse', optimizer = sgd, metrics = ['accuracy'])
    return conv
    
model = KerasRegressor(build_fn=create_model,  batch_size = 500, epochs = 20, verbose = 1,class_weight=class_weight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

postmodel

#Predict:

y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)

#Assess Success of Prediction:

ROC AUC
TP/TN
F1
Confusion Matrix

#Tweak Parameters to Optimise Metrics:

#Select A new Model

#Repeat the process. 

#Final Showdown

Measure the performance of all models against the holdout set.
And pick the final model. 

RNN Stock

continuous, values, regression, prediction, model, gbt, keras, concepts, supervised, learning