Prediction, Continuous Values
Estimated reading time: 9 minutesContinuous Values
It is often necessary to predict the future value of an ostensibly random variable or data series. Advanced machine learning techiques can be used to identify patterns in the data that might not at first be relevant by looking at the associated plots
Comparative Prediction Tasks
- Predict how many times a cutomer would call customer service in the next year.
- Corporate Valuation.
- Salary Prediction and Recommendation.
- Predict the success of a product at launch.
- Predict probabilistic distribution of hourly rain using polarimetric radar measurements.
- Predict the sale price at auction.
- Predict census retrurn rates.
- Predict Customer Value.
- Predict Severity of Claims/Final Cost.
- Clicks how many clicks/interest will something receives based on its charactersitcs.
- House price valuations.
- Predict duration for process.
- Predict Perscription Volume.
Data Types | Description | Description |
---|---|---|
Categorical | Data that can be discretely classified. | Country, Exchange, Currency, Dummy Variable, State, Industry. |
Continuous | Data that incrementally changes in values | Past Asset Price, Interest Rate, Competitors Price. |
Stepped | Similar to continuos but changes infrequently | P/E, Quarterly Revenue, |
Transformed Category | A different datatype converted to categorical. | Traded inside standard deviation - yes, no. P/E above 10 - yes, no. |
Models | The prediction of additional models | ARIMA, AR, MA. |
premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")
#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()
#Split Data in Three Sets:
from sklearn.model_selection import train_test_split
X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]
y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]
X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)
#Add Additional Features:
mean = X_train[col].mean()
model
import lightgbm as lgbm
learning_rate = 0.8
num_leaves =128
min_data_in_leaf = 1000
feature_fraction = 0.5
bagging_freq=1000
num_boost_round = 1000
params = {"objective": "regression",
"boosting_type": "gbdt",
"learning_rate": learning_rate,
"num_leaves": num_leaves,
"feature_fraction": feature_fraction,
"bagging_freq": bagging_freq,
"verbosity": 0,
"metric": "l2_root",
"nthread": 4,
"subsample": 0.9
}
dtrain = lgbm.Dataset(X_train, y_train)
dvalid = lgbm.Dataset(X_validate, y_test, reference=dtrain)
bst = lgbm.train(params, dtrain, num_boost_round, valid_sets=dvalid, verbose_eval=100,early_stopping_rounds=100)
bst.predict(X_test, num_iteration=bst.best_iteration)
import xgboost as xgb
model = xgb.XGBRegressor(
learning_rate=0.037, max_depth=5,
min_child_weight=20, n_estimators=180,
reg_lambda=0.8,booster = 'gbtree',
subsample=0.9, silent=1,
nthread = -1)
model.fit(train[feature_names], target)
pred = model.predict(test[feature_names])
postmodel
#Predict:
y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)
#Assess Success of Prediction:
ROC AUC
TP/TN
F1
Confusion Matrix
#Tweak Parameters to Optimise Metrics:
#Select A new Model
#Repeat the process.
#Final Showdown
Measure the performance of all models against the holdout set.
And pick the final model.
premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")
#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()
#Split Data in Three Sets:
from sklearn.model_selection import train_test_split
X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]
y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]
X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)
#Add Additional Features:
mean = X_train[col].mean()
model
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten
def create_model():
conv = Sequential()
conv.add(Conv2D(20, (1, 4), input_shape = PRED.shape[1:4], activation = 'relu'))
conv.add(MaxPooling2D((1, 2)))
conv.add(Flatten())
conv.add(Dense(1, activation = 'sigmoid'))
sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
conv.compile(loss = 'mse', optimizer = sgd, metrics = ['accuracy'])
return conv
model = KerasRegressor(build_fn=create_model, batch_size = 500, epochs = 20, verbose = 1,class_weight=class_weight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten
def create_model():
conv = Sequential()
conv.add(Conv1D(20, 4, input_shape = PRED.shape[1:3], activation = 'relu'))
conv.add(MaxPooling1D(2))
conv.add(Dense(50, activation='relu'))
conv.add(Flatten())
conv.add(Dense(1, activation = 'sigmoid'))
sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
conv.compile(loss = 'mse', optimizer = sgd, metrics = ['accuracy'])
return conv
model = KerasRegressor(build_fn=create_model, batch_size = 500, epochs = 20, verbose = 1,class_weight=class_weight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
postmodel
#Predict:
y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)
#Assess Success of Prediction:
ROC AUC
TP/TN
F1
Confusion Matrix
#Tweak Parameters to Optimise Metrics:
#Select A new Model
#Repeat the process.
#Final Showdown
Measure the performance of all models against the holdout set.
And pick the final model.
premodel
#Load Data:
import pandas as pd
train = pd.read_csv("../input/train_1.csv")
#Explore For Insights:
import matplotlib.pyplot as plt
plt.plot(mean_group)
plt.show()
#Split Data in Three Sets:
from sklearn.model_selection import train_test_split
X_holdout = X.iloc[:int(len(X),:]
X_rest = X[X[~X_holdout]]
y_holdout = y.iloc[:int(len(y),:]
y_rest = y[y[~y_holdout]]
X_train, X_test, y_train, y_test = train_test_split(X_rest, y, test_size = 0.3, random_state = 0)
#Add Additional Features:
mean = X_train[col].mean()
model
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import Sequential
from keras.layers import Dense, Flatten
def create_model():
conv = Sequential()
conv.add(Conv2D(20, (1, 4), input_shape = PRED.shape[1:4], activation = 'relu'))
conv.add(MaxPooling2D((1, 2)))
conv.add(Flatten())
conv.add(Dense(1, activation = 'sigmoid'))
sgd = SGD(lr = 0.1, momentum = 0.9, decay = 0, nesterov = False)
conv.compile(loss = 'mse', optimizer = sgd, metrics = ['accuracy'])
return conv
model = KerasRegressor(build_fn=create_model, batch_size = 500, epochs = 20, verbose = 1,class_weight=class_weight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
postmodel
#Predict:
y_pred = regressor.predict(X_test)
y_pred = sc.inverse_transform(y_pred)
#Assess Success of Prediction:
ROC AUC
TP/TN
F1
Confusion Matrix
#Tweak Parameters to Optimise Metrics:
#Select A new Model
#Repeat the process.
#Final Showdown
Measure the performance of all models against the holdout set.
And pick the final model.