Source code for prep.timeseries

"""Manipulation timestep dataframe."""

import numpy as np
import pandas as pd


[docs]def _timestep_single(array, index=0, timesteps=2, keep_first=True): """Add timesteps array for single column array. Parameters ---------- array : array Single column two-dimensional array. index : int, optional Index column, by default 0 timesteps : int, optional Number of timesteps, by default 2 keep_first : bool, optional Include original array if set True, by default True Returns ------- array Return 2D array with timesteps. """ if array.ndim == 1: array = array.reshape(-1, 1) x = [] for i in range(timesteps + 1): start = timesteps - i end = -i if i > 0 else None x.append(array[start:end, index]) if not keep_first: x.pop(0) return np.array(x).transpose()
[docs]def _timestep_multi(array, index=None, timesteps=2, keep_first=True): """Add timesteps array for multiple column array. Parameters ---------- array : array Multiple numeric column two-dimensional array. index : list of int, optional List of columns index, by default None timesteps : int, optional Number of timesteps, by default 2 keep_first : bool, optional Include original column if set True, by default True Returns ------- array Return 2D array with timesteps. """ _, col = array.shape col = range(col) x = [] for c in col: arr = array[:, c].reshape(-1, 1) if index is None: x.append(_timestep_single(arr, timesteps=timesteps, keep_first=keep_first)) elif c in index: x.append(_timestep_single(arr, timesteps=timesteps, keep_first=keep_first)) else: x.append(arr[timesteps:]) return np.concatenate(x, axis=1)
[docs]def timestep_table(dataframe, columns=None, timesteps=2, keep_first=True, template="{column}_tmin{i}"): """Generate timesteps directly from DataFrame. Parameters ---------- dataframe : DataFrame Dataframe consist of numeric-column only columns : list of str, optional List of columns name to generate, by default None timesteps : int, optional Number of timesteps, by default 2 keep_first : bool, optional Column _tmin0 will be included if set True, by default True template : str Format column name, by default "{column}_tmin{i}" Returns ------- DataFrame DataFrame with additional timesteps columns. """ # dataframe parameter columns_name = list(dataframe.columns) new_index = dataframe.index[timesteps:] # Generate new column name new_columns_name = [] for column in columns_name: if columns is None: for i in range(timesteps + 1): new_columns_name.append(template.format(column=column, i=i)) elif column in columns: for i in range(timesteps + 1): new_columns_name.append(template.format(column=column, i=i)) else: new_columns_name.append(column) if keep_first is False: for column in new_columns_name: if column.endswith("_tmin0"): new_columns_name.remove(column) # Convert name columns to index index_columns = [] if columns is not None: for column in columns: index_columns.append(columns_name.index(column)) else: index_columns = None # Generate Dataframe column with timesteps columns_values = dataframe.values columns_values = _timestep_multi( columns_values, index=index_columns, keep_first=keep_first, timesteps=timesteps ) timestep_dataframe = pd.DataFrame( data=columns_values, index=new_index, columns=new_columns_name ) return timestep_dataframe