1. Why Use Neural Networks for Time Series Forecasting?
Traditional methods, like ARIMA and exponential smoothing, work well for linear and stationary data. However, neural networks (especially Recurrent Neural Networks and Long Short-Term Memory, or LSTM networks) can:
- Capture non-linear relationships
- Handle multiple input and output variables
- Model complex temporal dependencies
2. Neural Network Architectures
Popular architectures for time series forecasting:
- Feedforward Neural Network (Dense/MLP): Good for tabular and lagged features.
- Recurrent Neural Network (RNN): Remembers previous steps; can model sequential data.
- LSTM/GRU: Advanced RNNs that handle long-term dependencies better.
3. Sample Problem
We'll show how to forecast the next value in a univariate time series using past observations.
4. Data Preparation
For time series, we often convert the series into samples of fixed-size input windows and target outputs.
# Example: Using sliding window to prepare data
def create_sequences(data, window_size):
xs, ys = [], []
for i in range(len(data) - window_size):
x = data[i:i+window_size]
y = data[i+window_size]
xs.append(x)
ys.append(y)
return np.array(xs), np.array(ys)
5. TensorFlow (Keras) Example
import numpy as np
import tensorflow as tf
from tensorflow import keras
# Generate dummy sine data
timesteps = np.linspace(0, 100, 1000)
series = np.sin(timesteps)
# Data preparation
WINDOW_SIZE = 10
X, y = create_sequences(series, WINDOW_SIZE)
# Split train/test
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
# Build LSTM model
model = keras.Sequential([
keras.layers.Input(shape=(WINDOW_SIZE, 1)),
keras.layers.LSTM(32),
keras.layers.Dense(1)
])
model.compile(optimizer='adam', loss='mse')
# Reshape input for LSTM: (samples, timesteps, features)
X_train_reshaped = X_train[..., np.newaxis]
X_test_reshaped = X_test[..., np.newaxis]
# Train
model.fit(X_train_reshaped, y_train, epochs=10, batch_size=32, validation_split=0.2)
# Evaluate
loss = model.evaluate(X_test_reshaped, y_test)
print("Test loss:", loss)
6. PyTorch Example
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
# Generate dummy sine data
timesteps = np.linspace(0, 100, 1000)
series = np.sin(timesteps)
WINDOW_SIZE = 10
def create_sequences(data, window_size):
xs, ys = [], []
for i in range(len(data) - window_size):
x = data[i:i+window_size]
y = data[i+window_size]
xs.append(x)
ys.append(y)
return np.array(xs), np.array(ys)
X, y = create_sequences(series, WINDOW_SIZE)
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).unsqueeze(-1)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).unsqueeze(-1)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
# LSTM Model
class LSTMModel(nn.Module):
def __init__(self, input_size=1, hidden_size=32, num_layers=1):
super().__init__()
self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
self.fc = nn.Linear(hidden_size, 1)
def forward(self, x):
out, _ = self.lstm(x)
out = out[:, -1, :]
out = self.fc(out)
return out.squeeze()
model = LSTMModel()
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# Train loop
for epoch in range(10):
for xb, yb in train_loader:
pred = model(xb)
loss = loss_fn(pred, yb)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f"Epoch {epoch+1}: loss={loss.item():.4f}")
# Evaluate
with torch.no_grad():
pred = model(X_test_tensor)
test_loss = loss_fn(pred, y_test_tensor)
print("Test loss:", test_loss.item())
7. Further Reading
Neural networks are powerful for time series forecasting, but they require careful architecture selection and data preparation. Experiment with different architectures (MLP, LSTM, CNN), feature engineering, and hyperparameters to get the best results.