import math
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from quantile_forest import RandomForestQuantileRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_squared_log_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
"display.max_columns", None)
pd.set_option(
= Path.cwd().parent.parent
PROJECT_ROOT
"figure.facecolor"] = (1, 1, 1, 0) # RGBA tuple with alpha=0
plt.rcParams["axes.facecolor"] = (1, 1, 1, 0) # RGBA tuple with alpha=0 plt.rcParams[
Decision Optimisation for Continuous Outcomes
The data that we will use comes from the Grupo Bimbo Inventory Demand Kaggle competition.
The goal is to predict the demand of a product for a given week, at a particular store (column Demanda_uni_equil). The dataset consists of 9 weeks of sales transactions in Mexico. Each transaction consists of sales and returns. Returns are the products that are unsold and expired. The demand for a product in a certain week is defined as the sales this week subtracted by the return next week.
= pd.read_csv(f"{PROJECT_ROOT}/data/grupo-bimbo-inventory-demand/train.csv", nrows=200000, low_memory=False)
data = pd.read_csv(f"{PROJECT_ROOT}/data/grupo-bimbo-inventory-demand/cliente_tabla.csv", low_memory=False)
clientes = pd.read_csv(f"{PROJECT_ROOT}/data/grupo-bimbo-inventory-demand/producto_tabla.csv", low_memory=False)
productos = pd.read_csv(f"{PROJECT_ROOT}/data/grupo-bimbo-inventory-demand/town_state.csv", low_memory=False)
town_state
= pd.merge(data, clientes, on="Cliente_ID", how="left")
data = pd.merge(data, productos, on="Producto_ID", how="left")
data = pd.merge(data, town_state, on="Agencia_ID", how="left") data
data
Semana | Agencia_ID | Canal_ID | Ruta_SAK | Cliente_ID | Producto_ID | Venta_uni_hoy | Venta_hoy | Dev_uni_proxima | Dev_proxima | Demanda_uni_equil | NombreCliente | NombreProducto | Town | State | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3 | 1110 | 7 | 3301 | 15766 | 1212 | 3 | 25.14 | 0 | 0.0 | 3 | PUESTO DE PERIODICOS LAZARO | Roles Canela 2p 120g BIM 1212 | 2008 AG. LAGO FILT | MÉXICO, D.F. |
1 | 3 | 1110 | 7 | 3301 | 15766 | 1216 | 4 | 33.52 | 0 | 0.0 | 4 | PUESTO DE PERIODICOS LAZARO | Roles Glass 2p 135g BIM 1216 | 2008 AG. LAGO FILT | MÉXICO, D.F. |
2 | 3 | 1110 | 7 | 3301 | 15766 | 1238 | 4 | 39.32 | 0 | 0.0 | 4 | PUESTO DE PERIODICOS LAZARO | Panquecito Gota Choc 2p 140g BIM 1238 | 2008 AG. LAGO FILT | MÉXICO, D.F. |
3 | 3 | 1110 | 7 | 3301 | 15766 | 1240 | 4 | 33.52 | 0 | 0.0 | 4 | PUESTO DE PERIODICOS LAZARO | Mantecadas Vainilla 4p 125g BIM 1240 | 2008 AG. LAGO FILT | MÉXICO, D.F. |
4 | 3 | 1110 | 7 | 3301 | 15766 | 1242 | 3 | 22.92 | 0 | 0.0 | 3 | PUESTO DE PERIODICOS LAZARO | Donitas Espolvoreadas 6p 105g BIM 1242 | 2008 AG. LAGO FILT | MÉXICO, D.F. |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
200735 | 3 | 1116 | 1 | 1466 | 2309869 | 1238 | 8 | 78.64 | 0 | 0.0 | 8 | UNION DEL VALLE 2 | Panquecito Gota Choc 2p 140g BIM 1238 | 2011 AG. SAN ANTONIO | MÉXICO, D.F. |
200736 | 3 | 1116 | 1 | 1466 | 2309869 | 1240 | 8 | 67.04 | 0 | 0.0 | 8 | UNION DEL VALLE 2 | Mantecadas Vainilla 4p 125g BIM 1240 | 2011 AG. SAN ANTONIO | MÉXICO, D.F. |
200737 | 3 | 1116 | 1 | 1466 | 2309869 | 1242 | 6 | 45.84 | 0 | 0.0 | 6 | UNION DEL VALLE 2 | Donitas Espolvoreadas 6p 105g BIM 1242 | 2011 AG. SAN ANTONIO | MÉXICO, D.F. |
200738 | 3 | 1116 | 1 | 1466 | 2309869 | 1250 | 27 | 206.28 | 0 | 0.0 | 27 | UNION DEL VALLE 2 | Donas Azucar 4p 105g BIM 1250 | 2011 AG. SAN ANTONIO | MÉXICO, D.F. |
200739 | 3 | 1116 | 1 | 1466 | 2309869 | 1278 | 13 | 58.50 | 0 | 0.0 | 13 | UNION DEL VALLE 2 | Nito 1p 62g BIM 1278 | 2011 AG. SAN ANTONIO | MÉXICO, D.F. |
200740 rows × 15 columns
= ["Agencia_ID", "Canal_ID", "Ruta_SAK", "Cliente_ID", "Producto_ID"]
categorical_cols
= {}
label_encoders for col in categorical_cols:
= LabelEncoder()
le
le.fit(data[col])= le.transform(data[col])
data[col] = le label_encoders[col]
= {col: data[col].nunique() for col in categorical_cols}
num_unique_vals = {col: min(50, num_unique_vals[col] // 2) for col in categorical_cols} embedding_sizes
num_unique_vals
{'Agencia_ID': 6,
'Canal_ID': 6,
'Ruta_SAK': 343,
'Cliente_ID': 10472,
'Producto_ID': 478}
embedding_sizes
{'Agencia_ID': 3,
'Canal_ID': 3,
'Ruta_SAK': 50,
'Cliente_ID': 50,
'Producto_ID': 50}
= data[categorical_cols].values
X = data["Demanda_uni_equil"].values y
X
array([[ 0, 3, 293, 3, 43],
[ 0, 3, 293, 3, 44],
[ 0, 3, 293, 3, 48],
...,
[ 5, 0, 235, 7722, 50],
[ 5, 0, 235, 7722, 51],
[ 5, 0, 235, 7722, 53]])
y
array([ 3, 4, 4, ..., 6, 27, 13])
= train_test_split(X, y, test_size=0.2, random_state=0) X_train, X_val, y_train, y_val
class BimboDataset(Dataset):
def __init__(self, X, y):
self.X = [torch.tensor(X[:, i], dtype=torch.long) for i in range(X.shape[1])]
self.y = torch.tensor(y, dtype=torch.float32)
def __len__(self):
return len(self.y)
def __getitem__(self, idx):
return [x[idx] for x in self.X], self.y[idx]
= BimboDataset(X_train, y_train)
train_dataset = BimboDataset(X_val, y_val)
val_dataset
= DataLoader(train_dataset, batch_size=128, shuffle=True)
train_loader = DataLoader(val_dataset, batch_size=128, shuffle=False) val_loader
class SimpleModel(nn.Module):
def __init__(self, embedding_sizes, hidden_size=128):
super(SimpleModel, self).__init__()
self.embeddings = nn.ModuleList(
for col in categorical_cols]
[nn.Embedding(num_unique_vals[col], embedding_sizes[col])
)self.fc1 = nn.Linear(sum(embedding_sizes.values()), hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, 1)
def forward(self, x):
= [embedding(x_i) for x_i, embedding in zip(x, self.embeddings)]
x = torch.cat(x, dim=-1)
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x).squeeze(-1)
x return x
def train_model(loss_fn, num_epochs=5):
= SimpleModel(embedding_sizes)
model = optim.Adam(model.parameters(), lr=0.005)
optimizer
# Training loop
for epoch in range(num_epochs):
model.train()= 0.0
train_loss for inputs, targets in train_loader:
optimizer.zero_grad()= model(inputs).squeeze()
outputs = loss_fn(outputs, targets)
loss
loss.backward()
optimizer.step()+= loss.item()
train_loss
/= len(train_loader)
train_loss # Validation loop
eval()
model.= 0.0
val_loss = []
val_preds = []
val_targets with torch.no_grad():
for inputs, targets in val_loader:
= model(inputs).squeeze()
outputs = loss_fn(outputs, targets)
loss += loss.item()
val_loss
val_preds.extend(outputs.tolist())
val_targets.extend(targets.tolist())
/= len(val_loader)
val_loss = r2_score(val_targets, val_preds)
r2 = np.clip(val_preds, 0, None)
val_preds = np.sqrt(mean_squared_log_error(val_targets, val_preds))
rmsle print(
{"epoch": epoch,
"train_loss": round(train_loss, 5),
"val_loss": round(val_loss, 5),
"r_squared": round(r2, 5),
"rmsle": round(rmsle, 5),
}
)return model, np.array(val_preds), np.array(val_targets)
= pd.DataFrame(
business_metrics =[
columns"Model Name",
"Understocked Fraction",
"Understocked Amount",
"Overstocked Fraction",
"Overstocked Amount",
"Utility",
"MAE",
"MSE",
"R2",
"RMSLE",
] )
def log_business_metrics(model_name: str, stocking_decisions, actual_demand):
= (stocking_decisions < actual_demand).mean()
frac_understocks = (actual_demand - stocking_decisions).clip(0).sum()
total_understocked_amt = (stocking_decisions > actual_demand).mean()
frac_overstocks = (stocking_decisions - actual_demand).clip(0).sum()
total_overstocked_amt = -3 * total_understocked_amt - total_overstocked_amt
utility = mean_absolute_error(actual_demand, stocking_decisions)
mae = mean_squared_error(actual_demand, stocking_decisions)
mse = r2_score(actual_demand, stocking_decisions)
r2 = np.sqrt(mean_squared_log_error(actual_demand, stocking_decisions))
rmsle
= pd.DataFrame(
df ={
data"Model Name": model_name,
"Understocked Fraction": frac_understocks,
"Understocked Amount": total_understocked_amt,
"Overstocked Fraction": frac_overstocks,
"Overstocked Amount": total_overstocked_amt,
"Utility": utility,
"MAE": mae,
"MSE": mse,
"R2": r2,
"RMSLE": rmsle,
},=[0],
index
)
return df
= nn.MSELoss()
loss = train_model(loss, num_epochs=5) mse_model, mse_val_preds, mse_val_targets
{'epoch': 0, 'train_loss': 376.08925, 'val_loss': 227.47458, 'r_squared': 0.63116, 'rmsle': 0.67018}
{'epoch': 1, 'train_loss': 246.72899, 'val_loss': 202.99381, 'r_squared': 0.67091, 'rmsle': 0.60655}
{'epoch': 2, 'train_loss': 188.01208, 'val_loss': 157.39294, 'r_squared': 0.74483, 'rmsle': 0.59883}
{'epoch': 3, 'train_loss': 154.99719, 'val_loss': 191.1642, 'r_squared': 0.69006, 'rmsle': 0.58823}
{'epoch': 4, 'train_loss': 139.73279, 'val_loss': 172.35903, 'r_squared': 0.72053, 'rmsle': 0.58192}
= np.ceil(mse_val_preds)
mse_val_stock = log_business_metrics("MSE model", mse_val_stock, mse_val_targets)
bm1 =0, ignore_index=True) pd.concat([business_metrics, bm1], axis
Model Name | Understocked Fraction | Understocked Amount | Overstocked Fraction | Overstocked Amount | Utility | MAE | MSE | R2 | RMSLE | |
---|---|---|---|---|---|---|---|---|---|---|
0 | MSE model | 0.275381 | 72458.0 | 0.631015 | 104514.0 | -321888.0 | 4.40799 | 173.166235 | 0.719482 | 0.627397 |
With the stocking rules above, we understock 27% of the time. This seems bad so let’s try a different approach to making the stocking decision.
Below we take the existing predictions and multiply them by 1.5 and round them up. In other words we are going to stock 50% above the model’s predictions and see what happens.
= np.ceil(1.5 * mse_val_preds)
alternative_stocking_rule = log_business_metrics("MSE model * 1.5", alternative_stocking_rule, mse_val_targets)
bm2 =0, ignore_index=True) pd.concat([business_metrics, bm1, bm2], axis
Model Name | Understocked Fraction | Understocked Amount | Overstocked Fraction | Overstocked Amount | Utility | MAE | MSE | R2 | RMSLE | |
---|---|---|---|---|---|---|---|---|---|---|
0 | MSE model | 0.275381 | 72458.0 | 0.631015 | 104514.0 | -321888.0 | 4.407990 | 173.166235 | 0.719482 | 0.627397 |
1 | MSE model * 1.5 | 0.135075 | 32762.0 | 0.802904 | 226306.0 | -324592.0 | 6.452825 | 322.720982 | 0.477213 | 0.775845 |
We now understock only 8% of the time. We paid for this by increasing the percentage of weeks we are overstocked from 59% to 80%.
Looking at how the size of the overstocks we can se that we went from 89731 to 208034. In other words we’ve more than doubled how much unnecessary stuff we are buying. This suggests we would best off focusing improvements to our model that bring down this number.
Using an alternative loss function (L1 loss)
Currently our model is attempting to minimise the mean squared error. This is a sensible choice for many problems but it is not the only choice. For example, we could instead try to minimise the mean absolute error. This is known as the L1 loss.
= nn.L1Loss()
loss = train_model(loss, num_epochs=5) mae_model, mae_val_preds, mae_val_targets
{'epoch': 0, 'train_loss': 4.86463, 'val_loss': 4.30155, 'r_squared': 0.46805, 'rmsle': 0.55212}
{'epoch': 1, 'train_loss': 4.01097, 'val_loss': 3.95936, 'r_squared': 0.61966, 'rmsle': 0.52564}
{'epoch': 2, 'train_loss': 3.72004, 'val_loss': 3.86857, 'r_squared': 0.63448, 'rmsle': 0.51612}
{'epoch': 3, 'train_loss': 3.52463, 'val_loss': 3.7652, 'r_squared': 0.6589, 'rmsle': 0.51475}
{'epoch': 4, 'train_loss': 3.37773, 'val_loss': 3.78491, 'r_squared': 0.69835, 'rmsle': 0.51039}
We can’t really compare the the training and validation loss of in this model run to the previous one because they are using different loss functions, one of which squares its values and one of which doesn’t. However, can look at what each decisions each model would have led us to and compare the resulting business metrics.
= np.ceil(mae_val_preds)
mae_val_stock = log_business_metrics("MAE model", mae_val_stock, mae_val_targets)
bm3 =0, ignore_index=True) pd.concat([business_metrics, bm1, bm2, bm3], axis
Model Name | Understocked Fraction | Understocked Amount | Overstocked Fraction | Overstocked Amount | Utility | MAE | MSE | R2 | RMSLE | |
---|---|---|---|---|---|---|---|---|---|---|
0 | MSE model | 0.275381 | 72458.0 | 0.631015 | 104514.0 | -321888.0 | 4.407990 | 173.166235 | 0.719482 | 0.627397 |
1 | MSE model * 1.5 | 0.135075 | 32762.0 | 0.802904 | 226306.0 | -324592.0 | 6.452825 | 322.720982 | 0.477213 | 0.775845 |
2 | MAE model | 0.338921 | 81489.0 | 0.496687 | 71728.0 | -316195.0 | 3.816305 | 185.795133 | 0.699024 | 0.520921 |
Let’s also get the business metrics for when we stock 50% above the model’s predictions.
= np.ceil(1.5 * mae_val_preds)
above_mae_stocking_rule = log_business_metrics("MAE model * 1.5", above_mae_stocking_rule, mse_val_targets)
bm4 =0, ignore_index=True) pd.concat([business_metrics, bm1, bm2, bm3, bm4], axis
Model Name | Understocked Fraction | Understocked Amount | Overstocked Fraction | Overstocked Amount | Utility | MAE | MSE | R2 | RMSLE | |
---|---|---|---|---|---|---|---|---|---|---|
0 | MSE model | 0.275381 | 72458.0 | 0.631015 | 104514.0 | -321888.0 | 4.407990 | 173.166235 | 0.719482 | 0.627397 |
1 | MSE model * 1.5 | 0.135075 | 32762.0 | 0.802904 | 226306.0 | -324592.0 | 6.452825 | 322.720982 | 0.477213 | 0.775845 |
2 | MAE model | 0.338921 | 81489.0 | 0.496687 | 71728.0 | -316195.0 | 3.816305 | 185.795133 | 0.699024 | 0.520921 |
3 | MAE model * 1.5 | 0.156322 | 37625.0 | 0.758494 | 184350.0 | -297225.0 | 5.528918 | 333.072955 | 0.460443 | 0.655141 |
We can see here that we both understock and overstock less often with a model trained with L1 loss. So swapping this in for MSE loss seems like a strict improvement.
Going further with a custom loss function
We have metric, Utility
, which we are defining as \(-3(\text{Understocked Amount} - \text{Overstocked Amount})\). In the real world you would spend a lot of time deciding how to define a metric like this. Assuming that you have done that, you may decide to train your model attempt to optimise that.
class CustomLoss(nn.Module):
def __init__(self):
super(CustomLoss, self).__init__()
def forward(self, outputs, actual):
= outputs - actual
diff = torch.where(outputs > actual, diff, -3 * diff)
loss return loss.mean()
= train_model(CustomLoss(), num_epochs=5) custom_model, custom_val_preds, custom_val_targets
{'epoch': 0, 'train_loss': 9.87575, 'val_loss': 8.1583, 'r_squared': 0.58504, 'rmsle': 0.61193}
{'epoch': 1, 'train_loss': 7.53965, 'val_loss': 7.48924, 'r_squared': 0.63308, 'rmsle': 0.6155}
{'epoch': 2, 'train_loss': 6.85897, 'val_loss': 7.22076, 'r_squared': 0.60763, 'rmsle': 0.60818}
{'epoch': 3, 'train_loss': 6.44625, 'val_loss': 7.16378, 'r_squared': 0.6077, 'rmsle': 0.62336}
{'epoch': 4, 'train_loss': 6.10289, 'val_loss': 7.19856, 'r_squared': 0.58086, 'rmsle': 0.64997}
= np.ceil(custom_val_preds)
custom_val_stock = log_business_metrics("Custom loss", custom_val_stock, custom_val_targets)
bm5 =0, ignore_index=True) pd.concat([business_metrics, bm1, bm2, bm3, bm4, bm5], axis
Model Name | Understocked Fraction | Understocked Amount | Overstocked Fraction | Overstocked Amount | Utility | MAE | MSE | R2 | RMSLE | |
---|---|---|---|---|---|---|---|---|---|---|
0 | MSE model | 0.275381 | 72458.0 | 0.631015 | 104514.0 | -321888.0 | 4.407990 | 173.166235 | 0.719482 | 0.627397 |
1 | MSE model * 1.5 | 0.135075 | 32762.0 | 0.802904 | 226306.0 | -324592.0 | 6.452825 | 322.720982 | 0.477213 | 0.775845 |
2 | MAE model | 0.338921 | 81489.0 | 0.496687 | 71728.0 | -316195.0 | 3.816305 | 185.795133 | 0.699024 | 0.520921 |
3 | MAE model * 1.5 | 0.156322 | 37625.0 | 0.758494 | 184350.0 | -297225.0 | 5.528918 | 333.072955 | 0.460443 | 0.655141 |
4 | Custom loss | 0.138612 | 34970.0 | 0.788707 | 188466.0 | -293376.0 | 5.565308 | 262.383431 | 0.574956 | 0.695139 |
We end up with a better Utility score when optimising for it directly. The challenge then is knowing whether that was the correct thing to optimise for in the first place.
Predicting Full Distributions
In machine learning, it is common to train models to make point estimate predictions and not give too much thought to statistical distributions. However, when actually making decisions it is useful to have more fine grained control over what is coming out when you call model.predict()
. It is especially useful for dynamic optimisation, where the prediction you make will affect the state you are in in the next period e.g. if our bread stays good for 2 weeks overordering one week might mean we expect to need to order less next week. This is very important in reinforcement learning.
There are many ways to build models that return statistical distributions, including deep learning methods or with Bayesian methods in libraries like PyMC. We for this example, we’ll use a Quantile Regression Forest
.
A typical random forest is composed of decision trees. Each decision will take the training data and split it many times until it reaches a leaf. When you want to make predictions at inference time you will take a row of data, run it through a tree, and get a prediction which will be the mean or median of the data in whichever leaf the row ends up in. The random forest’s final prediction is then the mean of each decision tree’s predictions.
In a quantile random forest, if you want to know the median of the data in a leaf you will take the median of the data in that leaf. If you want to know the 90th percentile of the data in a leaf you will take the 90th percentile of the data in that leaf.
The API for training the RandomForestQuantileRegressor
is very similar to training a normal RandomForestRegressor
in scikit-learn:
= RandomForestQuantileRegressor(n_estimators=100, min_samples_leaf=50, random_state=0)
qrf qrf.fit(X_train, y_train)
RandomForestQuantileRegressor(min_samples_leaf=50, random_state=0)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestQuantileRegressor(min_samples_leaf=50, random_state=0)
At prediction time, we need to choose which quantiles we want to make predictions for. Below we are saying that we would like to get the predictions for the 5th, 10th, 15th etc quantiles all the way up to the 95th quantile. In total that is 19 quantiles, so the array returned by the .precict()
method is shape [n_rows, n_quantiles]
.
= [i / 100 for i in range(5, 100, 5)]
quantiles = qrf.predict(X_val, quantiles=quantiles)
sample_preds print(sample_preds.shape)
(40148, 19)
Let’s look at a single row of predictions. Say, the 6th:
= sample_preds[5]
one_demand_prediction one_demand_prediction
array([1., 2., 2., 2., 3., 3., 3., 3., 3., 3., 3., 4., 4., 5., 5., 6., 7.,
7., 9.])
The numbers returned may vary so suppose the result was:
[1., 2., 2., 2., 3., 3., 3., 3., 3., 3., 3., 4., 4., 5., 5., 6., 7., 7., 9.]
What we are seeing here is that there is a \(\frac{1}{19}\) chance that the actual amount of bread sold will be 1 unit. There is a \(\frac{3}{19}\) chance that the actual amount of bread sold will be 2 units. And so on.
Let’s now look at at how we could turn this into an actual stocking rule.
Suppose we decided that we wanted to stock enough such that we are only understocked 10% of the time. In that case we would take the second last value from our prediction array (since that corresponds to the 90th percentile) and stock that amount.
It’s possible that your distribution may have some extreme jumps in it where e.g. it goes from predicting ~5 at the 80th percentile to ~50 at the 90th percentile. To smooth avoid taking too big a gamble on a particular quantile’s value you may want to add an outlier bound like below which caps how much you will stock at 3 times the mean of the quantile values.
def rarely_run_out_rule(prediction):
= 3 * np.mean(prediction)
outlier_bound = math.ceil(min(prediction[-2], outlier_bound))
to_stock return to_stock
rarely_run_out_rule(one_demand_prediction)
7
We can see how often this cap actually gets applied by running the cell below
= np.apply_along_axis(rarely_run_out_rule, 1, sample_preds)
all_stocking_decisions < sample_preds[:, -2]).mean() (all_stocking_decisions
0.009091361960745243
Looking at the business metrics this rule generates, we can see that we do indeed understock ~10% of the time. We overstock a lot though as this is quite an aggressive rule which hurts the Utility score. There are various ways we could tune this e.g. by adjusting which percentiles or caps we use as well as the other ways discussed earlier.
= log_business_metrics("Quantile regressor", all_stocking_decisions, y_val)
bm5 =0, ignore_index=True) pd.concat([business_metrics, bm1, bm2, bm3, bm4, bm5], axis
Model Name | Understocked Fraction | Understocked Amount | Overstocked Fraction | Overstocked Amount | Utility | MAE | MSE | R2 | RMSLE | |
---|---|---|---|---|---|---|---|---|---|---|
0 | MSE model | 0.275381 | 72458.0 | 0.631015 | 104514.0 | -321888.0 | 4.407990 | 173.166235 | 0.719482 | 0.627397 |
1 | MSE model * 1.5 | 0.135075 | 32762.0 | 0.802904 | 226306.0 | -324592.0 | 6.452825 | 322.720982 | 0.477213 | 0.775845 |
2 | MAE model | 0.338921 | 81489.0 | 0.496687 | 71728.0 | -316195.0 | 3.816305 | 185.795133 | 0.699024 | 0.520921 |
3 | MAE model * 1.5 | 0.156322 | 37625.0 | 0.758494 | 184350.0 | -297225.0 | 5.528918 | 333.072955 | 0.460443 | 0.655141 |
4 | Quantile regressor | 0.084313 | 35904.0 | 0.878624 | 395286.0 | -502998.0 | 10.740012 | 835.487994 | -0.353437 | 1.009825 |
for row in [0, 126, 295, 298, 557, 620, 882]:
print(f"Stocked: {all_stocking_decisions[row]}\nInput: {sample_preds[row, :]}\n")
Stocked: 8
Input: [ 1. 1. 1. 1. 2. 2. 2. 2. 2. 3. 3. 3. 3. 4. 6. 6. 7. 8.
10.]
Stocked: 3
Input: [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.45 2. 2. 2.
3. 3. 3. 3. 4. ]
Stocked: 141
Input: [ 8.95 10. 12. 15.8 18. 29.4 33.3 36. 39.55 42.
47.25 57. 68.35 72. 78.5 84.2 138.3 141. 187.1 ]
Stocked: 25
Input: [ 1.95 2. 2. 2. 2.75 3.7 4.65 5. 5.55 6.5 8. 8.
9. 10. 13. 15. 19. 25. 30. ]
Stocked: 11
Input: [ 1. 2. 2. 2. 2. 3. 3. 3. 4. 4. 4. 4.
4.35 5. 5. 6.2 8. 10.4 20. ]
Stocked: 5
Input: [1. 1. 1. 1. 1.75 2. 2. 2. 2. 2. 2. 3. 3. 3.
4. 4.2 5. 5. 7.05]
Stocked: 6
Input: [1. 1. 1. 2. 2. 2. 2. 2. 2. 2. 2.45 3. 3. 3.
3. 4. 5. 5.1 7.05]