from helpers_new import * 
from sklearn.datasets import load_diabetes
import matplotlib as mpl
from tqdm.notebook import tqdm_notebook as tq
import time
%matplotlib inline
plt.style.use('dark_blue_greens.mplstyle')
css_styling()


diabetes = load_diabetes()
input_data = diabetes['data']
target_data = diabetes['target']
input_df = pd.DataFrame(input_data, columns = diabetes['feature_names'])
target_series = pd.Series(target_data)


head_tail_vert(input_df, 5, 'diabetes: input_data')


head_tail_horz(target_series, 5, "diabetes: target_data")


cols = list(input_df.columns)


def plot_feature(df, 
				 column, 
				 color = None,
				 title = None,
				 xlabel = None, 
				 ylabel = None):
	
	if color:
		color = color
	else:
		color = 'C0'
	
	cols = list(input_df.columns)
	fig, ax = plt.subplots(1)
	col = cols.index(column)
	ax.scatter(input_data[:,col], target_data, color = color)
	ax.set_title(f'{column} vs. {ylabel}')
	ax.set(
		xlabel = f'{xlabel}: {diabetes["feature_names"][col]}',
		ylabel = f'{ylabel}');


plot_feature(input_data, 
			 'age', 
			 xlabel = 'blood serum measurement', 
			 ylabel = 'disease progression')


plot_feature(input_data, 
			 'bmi', 
			 xlabel = 'blood serum measurement', 
			 ylabel = 'disease progression',
			 color = 'C1')


plot_feature(input_data, 
			 'bp', 
			 xlabel = 'blood serum measurement', 
			 ylabel = 'disease progression',
			 color = 'C2')


from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split


train_in, test_in, train_out, test_out = train_test_split(input_df, target_series)
pretty(f'train_in.shape: {train_in.shape}')
pretty(f'train_out.shape: {train_out.shape}')
pretty(f'test_in.shape: {test_in.shape}')
pretty(f'test_out.shape: {test_out.shape}')


classifier = RandomForestRegressor()
classifier.fit(train_in, train_out)

RandomForestRegressor()

RandomForestRegressor()


predictions = classifier.predict(test_in)


mean_squared_error(test_out, predictions)

3599.5111306306303


from sklearn.neighbors import KNeighborsRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler


pipeline = make_pipeline(StandardScaler(), KNeighborsRegressor())
pipeline

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('kneighborsregressor', KNeighborsRegressor())])

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('kneighborsregressor', KNeighborsRegressor())])

StandardScaler()

KNeighborsRegressor()


pipeline.fit(train_in, train_out)

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('kneighborsregressor', KNeighborsRegressor())])

Pipeline(steps=[('standardscaler', StandardScaler()),
                ('kneighborsregressor', KNeighborsRegressor())])

StandardScaler()

KNeighborsRegressor()


predictions = pipeline.predict(test_in)
mean_squared_error(test_out, predictions)

3670.1308108108105


from sktime.datasets import load_lynx
from sktime.utils.plotting import plot_series


lynx = load_lynx()
plot_series(lynx, title = 'Plotting a Univariate Time Series', colors = ['C4']);


from sktime.datasets import load_longley


targets, inputs = load_longley()


head_tail_horz(inputs, 5, 'Inputs')


head_tail_horz(targets, 5, 'Targets')


plot_series(targets, colors = ["C0"])
for idx, column in enumerate(inputs.columns[:2]):
	current = inputs[column]
	plot_series(current, colors = ["C" + str(idx+1)])


import matplotlib.pyplot as plt
from sktime.datasets import load_arrow_head
from sktime.datatypes import convert


inputs, targets = load_arrow_head(return_X_y = True)

head_tail_horz(inputs, 5, 'input data')


head_tail_horz(targets, 5, 'target data')


pretty(inputs.shape, 'inputs.shape before conversion')


inputs = convert(inputs, from_type = 'nested_univ', to_type = 'numpy3D')


pretty(inputs.shape, 'inputs.shape after conversion')


labels, counts = np.unique(targets, return_counts = True)


fig, ax = plt.subplots(1, figsize = plt.figaspect(0.25))
for label in labels: 
	ax.plot(inputs[targets == label, 0, :][0], label = f'class {label}');
ax.set(ylabel = 'Scaled Distance from Midpoint', xlabel = 'Index');
ax.set_title('Panel Data: Each line represents a different, independent sample');


labels, counts = np.unique(targets, return_counts = True)
fig, ax = plt.subplots(1, figsize = plt.figaspect(0.25))
for label in labels: 
	for idx in range(3):
		ax.plot(inputs[targets == label, 0, :][idx], label = f'class {label}');
ax.set(ylabel = 'Scaled Distance from Midpoint', xlabel = 'Index');
ax.set_title('Panel Data: Each line represents a different, independent sample');


from warnings import simplefilter
simplefilter(action="ignore", category=RuntimeWarning)


from sktime.datasets import load_shampoo_sales
from sktime.utils.plotting import plot_series


shampoo_data = load_shampoo_sales()
plot_series(shampoo_data, title = "Shampoo Sales Time Series", 
		   colors = ['C4']);


import numpy as np

horizon = np.arange(6) + 1

pretty(horizon, 'Forecasting Horizon: np.arange(6) + 1 -> Relative')


import pandas as pd
from sktime.forecasting.base import ForecastingHorizon

horizon = ForecastingHorizon(
    pd.period_range("1993-07", periods=6, freq="M"), is_relative=False
)

pretty(horizon, 'Forecasting Horizon: pd.period_range("1993-07", periods=6, freq="M) -> Absolute')


cutoff = pd.Period("1993-06", freq="M")
pretty(horizon.to_relative(cutoff), 'cutoff = pd.Period("1993-06", freq="M") | horizon.to_relative(cutoff)')


from sktime.forecasting.model_selection import temporal_train_test_split

train_targets, test_targets = temporal_train_test_split(shampoo_data, 
														   fh=horizon)


plot_series(train_targets, 
			test_targets, 
			labels=["train_targets", "test_targets"],
		    colors = ['C4', 'C0'],
		    title = 'Using Forecast Horizon to Train-Test Split');


from sktime.forecasting.naive import NaiveForecaster


forecaster = NaiveForecaster(strategy="drift", window_length=10)


forecaster.fit(train_targets)

NaiveForecaster(strategy='drift', window_length=10)

NaiveForecaster(strategy='drift', window_length=10)


predictions = forecaster.predict(horizon)


plot_series(train_targets, 
			test_targets, 
			predictions,
			labels=['train_targets', 'test_targets', 'predictions'],
		    colors = ['C4', 'C0', 'C3'],
		    title = 'Linear Strategy Prediction');


from sktime.performance_metrics.forecasting import \
    mean_absolute_percentage_error

mean_absolute_percentage_error(test_targets, predictions, symmetric=False)

0.16469764622516225


from sktime.forecasting.arima import AutoARIMA


data = load_shampoo_sales()
training, testing = temporal_train_test_split(data, fh=horizon)
forecaster = AutoARIMA(sp=12, suppress_warnings=True)


forecaster.fit(training)

AutoARIMA(sp=12, suppress_warnings=True)

AutoARIMA(sp=12, suppress_warnings=True)


predictions = forecaster.predict(horizon)


plot_series(training, testing, predictions, 
			labels=["y_train", "y_test", "y_pred"],
		   colors = ['C4', 'C0', 'C3'],
		   title = 'Example: using AutoARIMA as a Model');


from sktime.registry import all_estimators


estimators = all_estimators("forecaster", as_dataframe=True)
head_tail_horz(estimators.name, 5, 'Forecaster Estimators (some options)')


missing_data_estimators = all_estimators("forecaster", 
									   filter_tags = {"handles-missing-data": True}, 
									   as_dataframe=True)

head_tail_horz(missing_data_estimators.name.sample(10), 5, 
			   'Forecaster Estimators (using filters)')


from sktime.datasets import load_airline
from sktime.utils.plotting import plot_series

targets = load_airline()
plot_series(targets, colors = ['C6'], title = 'Airline Passenger Dataset');


train_targets, test_targets = temporal_train_test_split(targets, test_size=12)
horizon = ForecastingHorizon(test_targets.index, is_relative=False)


from sklearn.neighbors import KNeighborsRegressor
from sktime.forecasting.compose import make_reduction

regressor = KNeighborsRegressor(n_neighbors=2)
forecaster = make_reduction(regressor, strategy="recursive", window_length=15)
forecaster.fit(train_targets, fh=horizon)
predictions = forecaster.predict()
plot_series(train_targets, test_targets, predictions, 
			labels=["train_targets", "test_targets", "predictions"],
		   colors = ['C0', 'C1', 'C2'],
		   title = 'KNeighborsRegressor | strategy = "recursive"');


regressor = KNeighborsRegressor(n_neighbors=1)
forecaster = make_reduction(regressor, strategy="multioutput", window_length=7)
forecaster.fit(train_targets, fh=horizon)
predictions = forecaster.predict()
plot_series(train_targets, test_targets, predictions, 
			labels=["train_targets", "test_targets", "predictions"],
		   colors = ['C0', 'C1', 'C2'],
		   title = 'KNeighborsRegressor | strategy = "multioutput"');


from sktime.forecasting.ets import AutoETS


data = load_airline()
plot_series(data, colors = ['C6'], title = 'Airline Data');


# specifying the forecasting horizon: one year ahead, all months
# 12 steps ahead

horizon = np.arange(1, 13)
pretty(horizon, 'Forecast Horizon  |  np.arange(1, 13)')


train_data = data.loc[:"1957-08"]
observed_data = train_data.copy()
observed_data.tail()

1957-04   348.00
1957-05   355.00
1957-06   422.00
1957-07   465.00
1957-08   467.00
Freq: M, Name: Number of airline passengers, dtype: float64


forecaster = AutoETS(auto=True, sp=12, n_jobs=-1)


forecaster.fit(train_data)

AutoETS(auto=True, n_jobs=-1, sp=12)

AutoETS(auto=True, n_jobs=-1, sp=12)


predictions = forecaster.predict(horizon)
plot_series(observed_data, predictions, colors = ['C2', 'C6'], 
		   title = 'Predictions with AutoETS');


predictions

1957-09   413.46
1957-10   360.57
1957-11   314.50
1957-12   358.25
1958-01   363.38
1958-02   363.45
1958-03   417.74
1958-04   402.20
1958-05   398.85
1958-06   451.96
1958-07   498.86
1958-08   494.80
Freq: M, dtype: float64


oberved_data = data.loc[:"1957-09"]
new_data = data.loc[["1957-09"]]
new_data

1957-09   404.00
Freq: M, Name: Number of airline passengers, dtype: float64


forecaster.update(new_data)

/Users/evancarr/opt/anaconda3/envs/time_series_projects/lib/python3.10/site-packages/sktime/forecasting/base/_base.py:1881: UserWarning: NotImplementedWarning: AutoETS does not have a custom `update` method implemented. AutoETS will be refit each time `update` is called with update_params=True. To refit less often, use the wrappers in the forecasting.stream module, e.g., UpdateEvery.
  warn(

AutoETS(auto=True, n_jobs=-1, sp=12)

AutoETS(auto=True, n_jobs=-1, sp=12)


predictions = forecaster.predict(horizon)
plot_series(observed_data, predictions,
		   colors = ['C3', 'C4'],
		   title = 'Updated Model Predictions');


predictions

1957-10   354.74
1957-11   309.49
1957-12   352.61
1958-01   357.73
1958-02   357.87
1958-03   411.40
1958-04   396.16
1958-05   392.94
1958-06   445.34
1958-07   491.65
1958-08   487.74
1958-09   432.52
Freq: M, dtype: float64


forecaster.update?

Signature: forecaster.update(y, X=None, update_params=True)
Docstring:
Update cutoff value and, optionally, fitted parameters.

If no estimator-specific update method has been implemented,
default fall-back is as follows:
    update_params=True: fitting to all observed data so far
    update_params=False: updates cutoff and remembers data only

State required:
    Requires state to be "fitted".

Accesses in self:
    Fitted model attributes ending in "_".
    Pointers to seen data, self._y and self.X
    self.cutoff, self._is_fitted
    If update_params=True, model attributes ending in "_".

Writes to self:
    Update self._y and self._X with `y` and `X`, by appending rows.
    Updates self.cutoff and self._cutoff to last index seen in `y`.
    If update_params=True,
        updates fitted model attributes ending in "_".

Parameters
----------
y : time series in sktime compatible data container format
        Time series to which to fit the forecaster in the update.
    y can be in one of the following formats, must be same scitype as in fit:
    Series scitype: pd.Series, pd.DataFrame, or np.ndarray (1D or 2D)
        for vanilla forecasting, one time series
    Panel scitype: pd.DataFrame with 2-level row MultiIndex,
        3D np.ndarray, list of Series pd.DataFrame, or nested pd.DataFrame
        for global or panel forecasting
    Hierarchical scitype: pd.DataFrame with 3 or more level row MultiIndex
        for hierarchical forecasting
    Number of columns admissible depend on the "scitype:y" tag:
        if self.get_tag("scitype:y")=="univariate":
            y must have a single column/variable
        if self.get_tag("scitype:y")=="multivariate":
            y must have 2 or more columns
        if self.get_tag("scitype:y")=="both": no restrictions on columns apply
    For further details:
        on usage, see forecasting tutorial examples/01_forecasting.ipynb
        on specification of formats, examples/AA_datatypes_and_datasets.ipynb
X : time series in sktime compatible format, optional (default=None)
        Exogeneous time series to fit to
    Should be of same scitype (Series, Panel, or Hierarchical) as y
    if self.get_tag("X-y-must-have-same-index"), X.index must contain y.index
    there are no restrictions on number of columns (unlike for y)
update_params : bool, optional (default=True)
    whether model parameters should be updated

Returns
-------
self : reference to self
File:      ~/opt/anaconda3/envs/time_series_projects/lib/python3.10/site-packages/sktime/forecasting/base/_base.py
Type:      method


from sktime.forecasting.model_evaluation import evaluate
from sktime.forecasting.model_selection import ExpandingWindowSplitter
from sktime.utils.plotting import plot_windows


data = load_airline()
horizon = ForecastingHorizon(np.arange(12) + 1)
train_data, test_data = temporal_train_test_split(data, fh = horizon)


cv = ExpandingWindowSplitter(step_length = 3, 
							 fh = horizon, 
							 initial_window = 10)
plot_windows(cv, data.iloc[:50])


forecaster = NaiveForecaster(strategy="last", sp=12)


cv = ExpandingWindowSplitter(step_length=12, fh=horizon, initial_window=72)


results = evaluate(forecaster=forecaster, 
				   y=data, 
				   cv=cv, 
				   strategy="refit", 
				   return_data=True)

results.iloc[:, :5].head()


fig, ax = plot_series(
    data,
    results['y_pred'].iloc[0],
    results['y_pred'].iloc[1],
    results['y_pred'].iloc[2],
    results['y_pred'].iloc[3],
    results['y_pred'].iloc[4],
    results['y_pred'].iloc[5],
    labels=['y_pred'] + ["preds (Backtest " + str(x) + ")" for x in range(6)],
	colors = ['C0', 'C1' 'C2', 'C3', 'C4', 'C5', 'C6']
)
ax.legend();


from sktime.forecasting.model_selection import (ForecastingGridSearchCV,
                                                SlidingWindowSplitter)


param_grid = {"window_length": [9, 12, 15], 
			  "estimator__n_neighbors": np.arange(1, 10)}


regressor = KNeighborsRegressor()

forecaster = make_reduction(regressor, 
							strategy="recursive")


cv = SlidingWindowSplitter(window_length=60, fh=horizon)

gscv = ForecastingGridSearchCV(forecaster, 
							   cv=cv, 
							   param_grid=param_grid, 
							   strategy="refit")


gscv.fit(train_data)

ForecastingGridSearchCV(cv=SlidingWindowSplitter(fh=ForecastingHorizon([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype='int64', is_relative=True),
                                                 window_length=60),
                        forecaster=RecursiveTabularRegressionForecaster(estimator=KNeighborsRegressor()),
                        param_grid={'estimator__n_neighbors': array([1, 2, 3, 4, 5, 6, 7, 8, 9]),
                                    'window_length': [9, 12, 15]})

ForecastingGridSearchCV(cv=SlidingWindowSplitter(fh=ForecastingHorizon([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype='int64', is_relative=True),
                                                 window_length=60),
                        forecaster=RecursiveTabularRegressionForecaster(estimator=KNeighborsRegressor()),
                        param_grid={'estimator__n_neighbors': array([1, 2, 3, 4, 5, 6, 7, 8, 9]),
                                    'window_length': [9, 12, 15]})

RecursiveTabularRegressionForecaster(estimator=KNeighborsRegressor())

KNeighborsRegressor()

KNeighborsRegressor()


predictions = gscv.predict(horizon)


plot_series(train_data, test_data, predictions, 
			labels = ['train_data', 'test_data', 'predictions'],
		    colors = ['C0', 'C1', 'C2'])

mean_absolute_percentage_error(predictions, test_data)

0.12411154270928935


gscv.best_params_

{'estimator__n_neighbors': 2, 'window_length': 12}


plot_windows(cv, data.iloc[:84])


from sktime.forecasting.compose import MultiplexForecaster
from sktime.forecasting.exp_smoothing import ExponentialSmoothing
from sktime.forecasting.naive import NaiveForecaster


forecaster = MultiplexForecaster(
    forecasters=[
        ("naive", NaiveForecaster(strategy="last")),
        ("ets", ExponentialSmoothing(trend="add", sp=12)),],)


forecaster_param_grid = {"selected_forecaster": ["ets", "naive"]}
gscv = ForecastingGridSearchCV(forecaster, cv=cv, param_grid=forecaster_param_grid)

gscv.fit(train_data)
gscv.best_params_

{'selected_forecaster': 'ets'}


from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.forecasting.trend import PolynomialTrendForecaster
from sktime.transformations.series.detrend import Deseasonalizer, Detrender


regressor = KNeighborsRegressor()
forecaster = make_reduction(regressor, strategy="recursive")


forecaster = TransformedTargetForecaster(
							[("deseasonalize", Deseasonalizer(sp=12)),
							 ("detrend", Detrender()),
							 ("forecast", forecaster),])


forecaster.fit(train_data)
predictions = forecaster.predict(horizon)


predictions

1960-01   397.97
1960-02   385.26
1960-03   428.56
1960-04   427.64
1960-05   443.90
1960-06   488.83
1960-07   526.62
1960-08   522.14
1960-09   486.26
1960-10   456.32
1960-11   409.21
1960-12   423.23
Freq: M, dtype: float64


from sktime.datasets import load_longley


targets, inputs = load_longley()


targets.head()

Period
1947   60,323.00
1948   61,122.00
1949   60,171.00
1950   61,187.00
1951   63,221.00
Freq: A-DEC, Name: TOTEMP, dtype: float64


inputs.head()


horizon = np.arange(5) + 1
train_targets, test_targets, train_inputs, train_preds = temporal_train_test_split(targets, inputs, fh=horizon)


forecaster = AutoARIMA()
forecaster.fit(train_targets, train_inputs)
predictions = forecaster.predict(horizon, X=train_preds)


plot_series(train_targets, test_targets, predictions, 
			labels=["train_targets", "test_targets", "predictions"],
		   colors = ['C1', 'C2', 'C3']);


from sklearn.preprocessing import MinMaxScaler, PowerTransformer
from sktime.datasets import load_macroeconomic
from sktime.forecasting.compose import ForecastingPipeline
from sktime.transformations.series.adapt import TabularToSeriesAdaptor
from sktime.transformations.series.impute import Imputer


data = load_macroeconomic()
targets = data["unemp"]
inputs = data.drop(columns=["unemp"])


train_targets, test_targets, train_inputs, test_inputs = temporal_train_test_split(targets, inputs)

horizon = ForecastingHorizon(test_targets.index, is_relative=False)


forecaster = ForecastingPipeline(
    steps=[("imputer", Imputer(method="mean")),
			("scale", TabularToSeriesAdaptor(MinMaxScaler(feature_range=(1, 2)))),
			("boxcox", TabularToSeriesAdaptor(PowerTransformer(method="box-cox"))),
			("forecaster", AutoARIMA(suppress_warnings=True)),])


forecaster.fit(y = train_targets, X = train_inputs)
predictions = forecaster.predict(fh = horizon, X = test_inputs)


plot_series(train_targets, predictions, test_targets, 
			labels=["train_targets", "predictions", "test_targets"],
		    colors = ['C4', 'C2', 'C1'])

(<Figure size 1600x400 with 1 Axes>, <AxesSubplot: ylabel='unemp'>)


all_estimators(
    "forecaster",
    filter_tags={"scitype:y": ["both", "multivariate"]},
    return_names=False,
)

[sktime.forecasting.compose._column_ensemble.ColumnEnsembleForecaster,
 sktime.forecasting.dynamic_factor.DynamicFactor,
 sktime.forecasting.compose._ensemble.EnsembleForecaster,
 sktime.forecasting.compose._grouped.ForecastByLevel,
 sktime.forecasting.model_selection._tune.ForecastingGridSearchCV,
 sktime.forecasting.compose._pipeline.ForecastingPipeline,
 sktime.forecasting.model_selection._tune.ForecastingRandomizedSearchCV,
 sktime.forecasting.compose._multiplexer.MultiplexForecaster,
 sktime.forecasting.compose._pipeline.Permute,
 sktime.param_est.plugin.PluginParamsForecaster,
 sktime.forecasting.compose._pipeline.TransformedTargetForecaster,
 sktime.forecasting.var.VAR,
 sktime.forecasting.varmax.VARMAX,
 sktime.forecasting.vecm.VECM]


_, data = load_longley()
data = data.iloc[:, 2:4]

horizon = np.arange(3) + 1
training, testing = temporal_train_test_split(data, fh = horizon)


training.head()


from sktime.forecasting.compose import ColumnEnsembleForecaster


forecasters = [("trend", PolynomialTrendForecaster(), 0),
			   ("ses", ExponentialSmoothing(), 1),]
forecaster = ColumnEnsembleForecaster(forecasters = forecasters)


forecaster.fit(training)
predictions = forecaster.predict(horizon)
predictions.head()


from sktime.forecasting.var import VAR


forecaster = VAR()


forecaster.fit(training)
predictions = forecaster.predict(horizon)
predictions.head()


from warnings import simplefilter

simplefilter(action="ignore", category=FutureWarning)


import matplotlib.pyplot as plt
import numpy as np
from sktime.datasets import load_UCR_UEA_dataset
from sktime.datatypes import convert

inputs, targets = load_UCR_UEA_dataset("ItalyPowerDemand", return_X_y=True)


see(inputs.head(3), 'Inputs before converting to NumPy 3D')


inputs = convert(inputs, from_type="nested_univ", to_type="numpy3D")


pretty(inputs.shape, 'inputs.shape')


# binary target variable
pretty(np.unique(targets), 'np.unique(targets)')


labels, counts = np.unique(targets, return_counts=True)
fig, ax = plt.subplots(1, figsize=plt.figaspect(0.25))
for label in labels:
    ax.plot(inputs[targets == label, 0, :][0], label=f"class {label}")
ax.set(ylabel="Scaled distance from midpoint", xlabel="Index");


from sklearn.model_selection import train_test_split

train_in, test_in, train_out, test_out = train_test_split(inputs, targets)


from sktime.classification.kernel_based import RocketClassifier
classifier = RocketClassifier()


%%time
classifier.fit(train_in, train_out)

RocketClassifier()

RocketClassifier()


classifier.fit?

Signature: classifier.fit(X, y)
Docstring:
Fit time series classifier to training data.

Parameters
----------
X : 3D np.array (any number of dimensions, equal length series)
        of shape [n_instances, n_dimensions, series_length]
    or 2D np.array (univariate, equal length series)
        of shape [n_instances, series_length]
    or pd.DataFrame with each column a dimension, each cell a pd.Series
        (any number of dimensions, equal or unequal length series)
    or of any other supported Panel mtype
        for list of mtypes, see datatypes.SCITYPE_REGISTER
        for specifications, see examples/AA_datatypes_and_datasets.ipynb
y : 1D np.array of int, of shape [n_instances] - class labels for fitting
    indices correspond to instance indices in X

Returns
-------
self : Reference to self.

Notes
-----
Changes state by creating a fitted model that updates attributes
ending in "_" and sets is_fitted flag to True.
File:      ~/opt/anaconda3/envs/time_series_projects/lib/python3.10/site-packages/sktime/classification/base.py
Type:      method


predictions = classifier.predict(test_in)


from sklearn.metrics import accuracy_score

accuracy_score(test_out, predictions)

0.9708029197080292


from sktime.registry import all_estimators

all_estimators("classifier", return_names=False)

[sktime.classification.kernel_based._arsenal.Arsenal,
 sktime.classification.dictionary_based._boss.BOSSEnsemble,
 sktime.classification.deep_learning.cnn.CNNClassifier,
 sktime.classification.interval_based._cif.CanonicalIntervalForest,
 sktime.classification.feature_based._catch22_classifier.Catch22Classifier,
 sktime.classification.compose._pipeline.ClassifierPipeline,
 sktime.classification.compose._column_ensemble.ColumnEnsembleClassifier,
 sktime.classification.compose._ensemble.ComposableTimeSeriesForestClassifier,
 sktime.classification.dictionary_based._cboss.ContractableBOSS,
 sktime.classification.interval_based._drcif.DrCIF,
 sktime.classification.dummy._dummy.DummyClassifier,
 sktime.classification.distance_based._elastic_ensemble.ElasticEnsemble,
 sktime.classification.deep_learning.fcn.FCNClassifier,
 sktime.classification.feature_based._fresh_prince.FreshPRINCE,
 sktime.classification.hybrid._hivecote_v1.HIVECOTEV1,
 sktime.classification.hybrid._hivecote_v2.HIVECOTEV2,
 sktime.classification.dictionary_based._boss.IndividualBOSS,
 sktime.classification.dictionary_based._tde.IndividualTDE,
 sktime.classification.distance_based._time_series_neighbors.KNeighborsTimeSeriesClassifier,
 sktime.classification.deep_learning.lstmfcn.LSTMFCNClassifier,
 sktime.classification.deep_learning.mlp.MLPClassifier,
 sktime.classification.dictionary_based._muse.MUSE,
 sktime.classification.feature_based._matrix_profile_classifier.MatrixProfileClassifier,
 sktime.classification.early_classification._probability_threshold.ProbabilityThresholdEarlyClassifier,
 sktime.classification.distance_based._proximity_forest.ProximityForest,
 sktime.classification.distance_based._proximity_forest.ProximityStump,
 sktime.classification.distance_based._proximity_forest.ProximityTree,
 sktime.classification.feature_based._random_interval_classifier.RandomIntervalClassifier,
 sktime.classification.interval_based._rise.RandomIntervalSpectralEnsemble,
 sktime.classification.deep_learning.resnet.ResNetClassifier,
 sktime.classification.kernel_based._rocket_classifier.RocketClassifier,
 sktime.classification.distance_based._shape_dtw.ShapeDTW,
 sktime.classification.shapelet_based._stc.ShapeletTransformClassifier,
 sktime.classification.feature_based._signature_classifier.SignatureClassifier,
 sktime.classification.compose._pipeline.SklearnClassifierPipeline,
 sktime.classification.feature_based._summary_classifier.SummaryClassifier,
 sktime.classification.interval_based._stsf.SupervisedTimeSeriesForest,
 sktime.classification.feature_based._tsfresh_classifier.TSFreshClassifier,
 sktime.classification.deep_learning.tapnet.TapNetClassifier,
 sktime.classification.dictionary_based._tde.TemporalDictionaryEnsemble,
 sktime.classification.interval_based._tsf.TimeSeriesForestClassifier,
 sktime.classification.kernel_based._svc.TimeSeriesSVC,
 sktime.classification.dictionary_based._weasel.WEASEL,
 sktime.classification.compose._ensemble.WeightedEnsembleClassifier]


from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sktime.transformations.panel.reduce import Tabularizer


classifier = make_pipeline(Tabularizer(), 
						   KNeighborsClassifier(n_neighbors=1, 
												metric="euclidean"))


classifier.fit(train_in, train_out)

Pipeline(steps=[('tabularizer', Tabularizer()),
                ('kneighborsclassifier',
                 KNeighborsClassifier(metric='euclidean', n_neighbors=1))])

Pipeline(steps=[('tabularizer', Tabularizer()),
                ('kneighborsclassifier',
                 KNeighborsClassifier(metric='euclidean', n_neighbors=1))])

Tabularizer()

KNeighborsClassifier(metric='euclidean', n_neighbors=1)


predictions = classifier.predict(test_in)


accuracy_score(test_out, predictions)

0.9817518248175182


from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import make_pipeline
from sktime.transformations.panel.tsfresh import TSFreshFeatureExtractor


# %%time
# classifier = make_pipeline(TSFreshFeatureExtractor(disable_progressbar=True,
# 												   show_warnings=False),
# 						   RandomForestClassifier(),)


# classifier.fit(train_in, train_out)


# predictions = classifier.predict(test_in)


# accuracy_score(test_out, predictions)


import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sktime.datasets import load_UCR_UEA_dataset


X, y = load_UCR_UEA_dataset(name="ChlorineConcentration", return_X_y=True)
print(X.shape, y.shape)


X_train, X_test, y_train, y_test = train_test_split(X, y)


fig, ax = plt.subplots(1)
ax.hist(y)
ax.set(xlabel="target variable (bins)", ylabel="frequency");


# fig, ax = plt.subplots(1, figsize=plt.figaspect(0.25))
# for i in range(5):
#     ax.plot(X_train)


from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.pipeline import make_pipeline
from sktime.transformations.panel.rocket import Rocket


regressor = make_pipeline(Rocket(), RandomForestRegressor())


%%time
regressor.fit(train_in, train_out)

Pipeline(steps=[('rocket', Rocket()),
                ('randomforestregressor', RandomForestRegressor())])

Pipeline(steps=[('rocket', Rocket()),
                ('randomforestregressor', RandomForestRegressor())])

Rocket()

RandomForestRegressor()


# predictions = regressor.predict(test_in)
# mean_squared_error(test_out, predictions)


import numpy as np
from sktime.datasets import load_airline
from sktime.forecasting.compose import make_reduction
from sktime.forecasting.model_selection import temporal_train_test_split
from sktime.utils.plotting import plot_series

data = load_airline()
horizon = np.arange(12) + 1
training, testing = temporal_train_test_split(data, fh=horizon)


forecaster = make_reduction(
    regressor, scitype="time-series-regressor", window_length=12)


from sktime.forecasting.compose import TransformedTargetForecaster
from sktime.transformations.series.detrend import Detrender


pipe = TransformedTargetForecaster([("detrend",
									 Detrender()), ("forecast", 
													forecaster)])


%%time
pipe.fit(training)

TransformedTargetForecaster(steps=[('detrend', Detrender()),
                                   ('forecast',
                                    RecursiveTimeSeriesRegressionForecaster(estimator=Pipeline(steps=[('rocket',
                                                                                                       Rocket()),
                                                                                                      ('randomforestregressor',
                                                                                                       RandomForestRegressor())]),
                                                                            window_length=12))])

TransformedTargetForecaster(steps=[('detrend', Detrender()),
                                   ('forecast',
                                    RecursiveTimeSeriesRegressionForecaster(estimator=Pipeline(steps=[('rocket',
                                                                                                       Rocket()),
                                                                                                      ('randomforestregressor',
                                                                                                       RandomForestRegressor())]),
                                                                            window_length=12))])


predictions = pipe.predict(horizon)


plot_series(training, testing, predictions, 
			labels=["y_train", "y_test", "y_pred"],
		   colors = ['C0', 'C1', 'C2']);


import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sktime.classification.compose import ColumnEnsembleClassifier
from sktime.classification.dictionary_based import BOSSEnsemble
from sktime.classification.interval_based import TimeSeriesForestClassifier
from sktime.transformations.panel.compose import ColumnConcatenator


input_data, target_data = load_UCR_UEA_dataset("BasicMotions", return_X_y=True)
input_data = convert(input_data, from_type="nested_univ", to_type="numpy3D")


train_in, test_in, train_out, test_out = train_test_split(input_data, target_data)
pretty(train_in.shape, 'train_in.shape')
pretty(train_out.shape, 'train_out.shape')
pretty(test_in.shape, 'test_in.shape')
pretty(test_out.shape, 'test_out.shape')


# multi-class target variable
np.unique(train_out)

array(['badminton', 'running', 'standing', 'walking'], dtype='<U9')


steps = [
    ("concatenate", ColumnConcatenator()),
    ("classify", TimeSeriesForestClassifier(n_estimators=100)),]

classifier = Pipeline(steps)


classifier.fit(train_in, train_out)
classifier.score(test_in, test_out)

1.0


classifier = ColumnEnsembleClassifier(
    estimators=[
        ("TSF0", TimeSeriesForestClassifier(n_estimators=10), [0]),
        ("BOSSEnsemble3", BOSSEnsemble(max_ensemble_size=5), [3]),])


# classifier.fit(X_train, y_train)


classifier.score(X_test, y_test)


from sktime.classification.hybrid import HIVECOTEV2


X_train, y_train = load_UCR_UEA_dataset("BasicMotions", split="train", return_X_y=True)
X_test, y_test = load_UCR_UEA_dataset("BasicMotions", split="test", return_X_y=True)


classifier = HIVECOTEV2(
    stc_params={"n_shapelet_samples": 1000},
    drcif_params={"n_estimators": 25},
    arsenal_params={"n_estimators": 10},
    tde_params={"n_parameter_samples": 100},
    verbose=0,)


%%time
classifier.fit(train_in, train_out)

HIVECOTEV2(arsenal_params={'n_estimators': 10},
           drcif_params={'n_estimators': 25},
           stc_params={'n_shapelet_samples': 1000},
           tde_params={'n_parameter_samples': 100})

HIVECOTEV2(arsenal_params={'n_estimators': 10},
           drcif_params={'n_estimators': 25},
           stc_params={'n_shapelet_samples': 1000},
           tde_params={'n_parameter_samples': 100})


predictions = classifier.predict(test_in)
accuracy_score(test_out, predictions)

0.95

	age	sex	bmi	bp	s1	s2	s3	s4	s5	s6
0	0.04	0.05	0.06	0.02	-0.04	-0.03	-0.04	-0.00	0.02	-0.02
1	-0.00	-0.04	-0.05	-0.03	-0.01	-0.02	0.07	-0.04	-0.07	-0.09
2	0.09	0.05	0.04	-0.01	-0.05	-0.03	-0.03	-0.00	0.00	-0.03
3	-0.09	-0.04	-0.01	-0.04	0.01	0.02	-0.04	0.03	0.02	-0.01
4	0.01	-0.04	-0.04	0.02	0.00	0.02	0.01	-0.00	-0.03	-0.05

	age	sex	bmi	bp	s1	s2	s3	s4	s5	s6
437	0.04	0.05	0.02	0.06	-0.01	-0.00	-0.03	-0.00	0.03	0.01
438	-0.01	0.05	-0.02	-0.07	0.05	0.08	-0.03	0.03	-0.02	0.04
439	0.04	0.05	-0.02	0.02	-0.04	-0.01	-0.02	-0.01	-0.05	0.02
440	-0.05	-0.04	0.04	0.00	0.02	0.02	-0.03	0.03	0.04	-0.03
441	-0.05	-0.04	-0.07	-0.08	0.08	0.03	0.17	-0.04	-0.00	0.00

	0
0	151.00
1	75.00
2	141.00
3	206.00
4	135.00

	0
437	178.00
438	104.00
439	132.00
440	220.00
441	57.00

	GNPDEFL	GNP	UNEMP	ARMED	POP
Period
1947	83.00	234,289.00	2,356.00	1,590.00	107,608.00
1948	88.50	259,426.00	2,325.00	1,456.00	108,632.00
1949	88.20	258,054.00	3,682.00	1,616.00	109,773.00
1950	89.50	284,599.00	3,351.00	1,650.00	110,929.00
1951	96.20	328,975.00	2,099.00	3,099.00	112,075.00

\| Top \| SciKit-Learn Way \| SKTime Way \| Multivariate \| Panel Data \| SKLearn & SKTime \| Univariate Forecasting \| Advanced Workflow \| Forecasting with Exogeneous \| Building a Forecaster \| Time Series Classification \| Time Series Regression \|
SKTime
\| Documentation \| PyData Global 2021 \| SKTime GitHub \|

	GNPDEFL	GNP	UNEMP	ARMED	POP
Period
1958	110.80	444,546.00	4,681.00	2,637.00	121,950.00
1959	112.60	482,704.00	3,813.00	2,552.00	123,366.00
1960	114.20	502,601.00	3,931.00	2,514.00	125,368.00
1961	115.70	518,173.00	4,806.00	2,572.00	127,852.00
1962	116.90	554,894.00	4,007.00	2,827.00	130,081.00

	TOTEMP
Period
1947	60,323.00
1948	61,122.00
1949	60,171.00
1950	61,187.00
1951	63,221.00

	TOTEMP
Period
1958	66,513.00
1959	68,655.00
1960	69,564.00
1961	69,331.00
1962	70,551.00

	dim_0
0	0 -1.96 1 -1.96 2 -1.96 3 -1.94 4 -1.90 ... 246 -1.84 247 -1.88 248 -1.91 249 -1.92 250 -1.91 Length: 251, dtype: float64
1	0 -1.77 1 -1.77 2 -1.78 3 -1.73 4 -1.70 ... 246 -1.64 247 -1.68 248 -1.73 249 -1.78 250 -1.79 Length: 251, dtype: float64
2	0 -1.87 1 -1.84 2 -1.84 3 -1.81 4 -1.76 ... 246 -1.83 247 -1.88 248 -1.86 249 -1.86 250 -1.85 Length: 251, dtype: float64
3	0 -2.07 1 -2.07 2 -2.04 3 -2.04 4 -1.96 ... 246 -1.95 247 -2.01 248 -2.03 249 -2.07 250 -2.08 Length: 251, dtype: float64
4	0 -1.75 1 -1.74 2 -1.72 3 -1.70 4 -1.68 ... 246 -1.72 247 -1.74 248 -1.74 249 -1.76 250 -1.76 Length: 251, dtype: float64

	dim_0
206	0 -1.63 1 -1.62 2 -1.63 3 -1.61 4 -1.57 ... 246 -1.57 247 -1.60 248 -1.62 249 -1.62 250 -1.62 Length: 251, dtype: float64
207	0 -1.66 1 -1.66 2 -1.63 3 -1.61 4 -1.59 ... 246 -1.68 247 -1.67 248 -1.67 249 -1.68 250 -1.68 Length: 251, dtype: float64
208	0 -1.60 1 -1.59 2 -1.58 3 -1.56 4 -1.53 ... 246 -1.58 247 -1.59 248 -1.60 249 -1.61 250 -1.61 Length: 251, dtype: float64
209	0 -1.74 1 -1.74 2 -1.73 3 -1.72 4 -1.70 ... 246 -1.64 247 -1.67 248 -1.70 249 -1.71 250 -1.73 Length: 251, dtype: float64
210	0 -1.63 1 -1.63 2 -1.62 3 -1.61 4 -1.58 ... 246 -1.51 247 -1.55 248 -1.58 249 -1.60 250 -1.62 Length: 251, dtype: float64

	name
6	DirectTimeSeriesRegressionForecaster
17	TransformedTargetForecaster
9	ForecastingPipeline
12	NaiveForecaster
11	MultioutputTimeSeriesRegressionForecaster

	name
10	MultioutputTabularRegressionForecaster
4	DirRecTimeSeriesRegressionForecaster
16	StackingForecaster
0	ARIMA
8	ForecastByLevel

	test_MeanAbsolutePercentageError	len_train_window	cutoff
0	0.16	72	1954-12
1	0.13	84	1955-12
2	0.11	96	1956-12
3	0.03	108	1957-12
4	0.11	120	1958-12

	UNEMP	ARMED
1960	3,688.65	2,552.43
1961	3,794.19	2,552.43
1962	3,899.72	2,552.43

	UNEMP	ARMED
1960	3,322.41	2,611.27
1961	3,153.43	2,673.11
1962	3,095.84	2,725.06

	dim_0
0	0 -0.71 1 -1.18 2 -1.37 3 -1.59 4 -1.47 5 -1.37 6 -1.09 7 0.05 8 0.93 9 1.09 10 1.28 11 0.96 12 0.61 13 0.01 14 -0.65 15 -0.27 16 -0.21 17 0.61 18 1.37 19 1.46 20 1.05 21 0.58 22 0.17 23 -0.27 dtype: float64
1	0 -0.99 1 -1.43 2 -1.58 3 -1.61 4 -1.63 5 -1.38 6 -1.02 7 -0.36 8 0.72 9 1.20 10 1.12 11 1.05 12 0.79 13 0.46 14 0.49 15 0.56 16 0.61 17 0.31 18 0.26 19 1.10 20 1.05 21 0.69 22 -0.05 23 -0.38 dtype: float64
2	0 1.32 1 0.57 2 0.20 3 -0.09 4 -0.18 5 -0.27 6 -0.09 7 -1.40 8 -1.12 9 -0.74 10 0.01 11 -0.09 12 0.01 13 -0.46 14 -0.55 15 -0.74 16 -0.74 17 -0.74 18 -1.12 19 -0.46 20 0.48 21 2.35 22 2.26 23 1.60 dtype: float64

Research and benchmarking¶