TensorFlow to PyTorch Translation

2024-10-07 10:43 UTC gpt-4o-canmore Open in ChatGPT ↗

Should I switch from tensorflow to pytorch?

if yes, translate this entire code to pytorch, removing all references to tf and keras

import gc

import numpy as np from numpy import ndarray

import yaml from typing import Tuple, Any, Union, List

import pandas as pd import matplotlib.pyplot as plt import matplotlib as mpl import matplotlib.patches as mpatches

import tensorflow as tf import pacmap

from tensorflow.keras.callbacks import LearningRateScheduler from tensorflow.keras.utils import to_categorical from tensorflow.keras.models import Sequential, load_model, Model from tensorflow.keras.layers import Dense, LSTM, Dropout, BatchNormalization, TimeDistributed from tensorflow.keras.optimizers import Adam, Nadam from tensorflow.keras.regularizers import l2 from tensorflow.keras.callbacks import TensorBoard, EarlyStopping from tensorflow.keras import mixed_precision from tensorflow.keras import backend as K

from sklearn.preprocessing import LabelEncoder from sklearn.model_selection import train_test_split, TimeSeriesSplit

Custom function to move loss calculation to CPU

def categorical_crossentropy_cpu(y_true, y_pred): with tf.device(‘/CPU:0’): return K.categorical_crossentropy(y_true, y_pred)

Enable mixed precision training

mixed_precision.set_global_policy(‘mixed_float16’)

def scheduler(epoch, lr): if epoch < 10: return lr else: return lr * tf.math.exp(-0.1)

def plot_training_history(history, output_file_path): acc = history.history[‘accuracy’] val_acc = history.history[‘val_accuracy’] loss = history.history[‘loss’] val_loss = history.history[‘val_loss’] epochs = range(1, len(acc) + 1)

fig = plt.figure(figsize=(12, 4))

# Plot training and validation accuracy

plt.style.use('default')
mpl.rcParams.update(mpl.rcParamsDefault)

plt.subplot(1, 2, 1)
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

# Plot training and validation loss
plt.subplot(1, 2, 2)
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.savefig(output_file_path, dpi=150)
plt.close(fig)

tensorboard_callback = TensorBoard(log_dir=’./logs’, histogram_freq=1) early_stopping = EarlyStopping(monitor=‘val_loss’, patience=2, restore_best_weights=True) lr_scheduler = LearningRateScheduler(scheduler)

class LSTMNeuralNet: """ This class is designed to train an LSTM model using features and labels stored in separate CSV files. The model is configured to classify each sequence into one of several action classes. """

def __init__(self, config):

    self.activation_model = None
    self.layer_outputs = None
    self.model = None

    # READING FROM A YAML CONFIG FILE
    self.info = config['TRAINING']['info']
    self.noEpochs = config['TRAINING']['noEpochs']
    self.batch_size = config['TRAINING']["batch_size"]
    self.nNeurons = config['TRAINING']["nNeurons"]
    self.validation_split = config['TRAINING']["validation_split"]

    # self.loss = config['TRAINING']["loss"]

    self.h5_file_loc = config['TRAINING']["h5file_loc"]  # Location to save the trained model
    self.pacmap_file_loc = config['VISUALIZATION'][
        "pacmap_file_loc"]  # Location to save the Dimensionality Reduction results
    self.pcache_file_loc = config['VISUALIZATION']["pcache_file_loc"]
    self.image_file_loc = config['VISUALIZATION']["image_file_loc"]

@staticmethod
def load_data(features_csv: str, labels_csv: str, key_column: str, win_size: int) -> tuple[ndarray[Any, Any], Any]:
    """
    Loads features and labels from separate CSV files, merges them on a common key, reshapes the features
    into sequences, and one-hot encodes the labels. The function also saves the label encoding to a YAML file.

    Args:
        features_csv (str): Path to the CSV file containing the feature data.
        labels_csv (str): Path to the CSV file containing the label data.
        key_column (str): The column name used as the key for merging the features and labels.
        win_size (int): The window size for reshaping the data into sequences of time steps.

    Returns:
        Tuple[pd.DataFrame, pd.DataFrame]:
            - X_reshaped (pd.DataFrame): A 3D array of reshaped feature data with dimensions
              [number_of_sequences, win_size, number_of_features].
            - y_one_hot (pd.DataFrame): A 2D array of one-hot encoded labels with dimensions
              [number_of_sequences, number_of_classes].

    Raises:
        ValueError: If there is an issue with the merging or reshaping of data.

    Steps:
    1. Loads the features and labels from the provided CSV files.
    2. Merges the two datasets based on the specified key column.
    3. Reshapes the features into sequences of `win_size` time steps.
    4. Extracts labels, encodes them using a label encoder, and one-hot encodes the labels.
    5. Saves the mapping of original label names to their encoded form in a `model_labels.yaml` file.
    """

    # Load features and labels
    features_df = pd.read_csv(features_csv)
    labels_df = pd.read_csv(labels_csv)

    # Merge features and labels on the key column
    combined_df = pd.merge(features_df, labels_df, on=key_column)

    # Extract features
    # Adjust 'Labels' to your actual label column name
    X = combined_df.drop(
        columns=[combined_df.columns[len(combined_df.columns) - 2],
                 key_column,
                 'Labels']).values

    # Reshape X to have sequences of [win_size] timesteps: [number_of_sequences, win_size, number_of_features]
    number_of_features = X.shape[1]
    number_of_sequences = X.shape[0] // win_size

    print("Number of features: ", number_of_features)
    print("number of sequences: ", number_of_sequences)

    # X_reshaped = X.reshape((number_of_sequences, 90, number_of_features))
    X_reshaped = X.reshape((number_of_sequences, win_size, number_of_features))

    print("Shape of X:", X_reshaped.shape)  # Debugging line to check the shape of X

    # Extract labels, taking one label for every win_size timesteps
    y = combined_df['Labels'].values[::win_size]  # Adjust 'Labels' to your label column name

    # Encode labels
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)
    y_one_hot = to_categorical(y_encoded)
    print("Shape of Y:", y_one_hot.shape)

    # To get the list of original labels in the order used during encoding:
    original_labels_order = label_encoder.classes_

    # Create the labels data structure for YAML
    labels_dict = {
        'MODEL': {
            'labels': {f'{i}': label for i, label in enumerate(original_labels_order)}
        }
    }

    print(labels_dict)

    # Save label yaml
    with open('model_labels.yaml', 'w') as file:
        yaml.dump(labels_dict, file, sort_keys=False, default_flow_style=False)

    return X_reshaped, y_one_hot

@staticmethod
def split_data(X, y, validation_split: float = 0.2):

    X_train = []
    X_val = []
    y_train = []
    y_val = []

    N = int(1 / validation_split)  # Define the interval (1 validation sample for every N training samples)

    number_of_sequences = X.shape[0]
    # Loop over all the sequences
    for i in range(number_of_sequences):
        if i % N == 0:
            X_val.append(X[i])  # Validation data
            y_val.append(y[i])
        else:
            X_train.append(X[i])  # Training data
            y_train.append(y[i])

    X_train = np.array(X_train)
    X_val = np.array(X_val)
    y_train = np.array(y_train)
    y_val = np.array(y_val)

    print("Training data shape:", X_train.shape)
    print("Validation data shape:", X_val.shape)

    print("Training labels shape:", y_train.shape)
    print("Validation labels shape:", y_val.shape)

    return X_train, X_val, y_train, y_val

def create_model(self, input_shape, num_classes):
    """
    Creates and compiles the LSTM model.
    Parameters:
        input_shape (tuple): Shape of the input data (time steps, features).
        num_classes (int): Number of classes in the dataset.
    """

    # A Sequential model in Keras is a linear stack of layers
    self.model = Sequential()

    # The requirements to use the cuDNN implementation are:
    # ------------------------------------------------------
    # https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTM
    # activation == tanh
    # recurrent_activation == sigmoid
    # recurrent_dropout == 0
    # unroll is False
    # use_bias is True

    # An LSTM layer is added to the model as the first layer
    self.model.add(
        LSTM(self.nNeurons,
             return_sequences=True,
             input_shape=input_shape,
             kernel_regularizer=l2(0.01),
             recurrent_regularizer=l2(0.1),
             recurrent_dropout=0.0,  # needs to be 0 for cuDNN
             dropout=0.1)
    )

    self.model.add(TimeDistributed(BatchNormalization()))

    # Another LSTM layer is added to the model
    self.model.add(LSTM(self.nNeurons,
                        kernel_regularizer=l2(0.01),
                        recurrent_regularizer=l2(0.1),
                        recurrent_dropout=0.0,  # needs to be 0 for cuDNN
                        dropout=0.1))

    self.model.add(BatchNormalization())

    self.model.add(Dense(num_classes, activation='softmax'))

    # Compile the model with Nadam optimizer and a learning rate scheduler
    optimizer = Nadam(learning_rate=0.0006)

    self.model.compile(optimizer=optimizer, loss=categorical_crossentropy_cpu,
                       metrics=['accuracy'])  # , loss='categorical_crossentropy', metrics=['accuracy'])

    # prints a summary representation of the model, showing the layout of the layers,
    # the shape of the output from each layer, and the number of parameters (weights and biases) in each layer
    self.model.summary()

def train_model(self, X, y):
    """
    Trains the LSTM model.
    Parameters:
        X (numpy array): Feature data.
        y (numpy array): One-hot encoded labels.
    """

    # This out-of-the box method is not good because it shuffles our data
    #X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=self.validation_split, random_state=42)

    # We wrote a custom method that keeps one validation sample every N samples
    X_train, X_val, y_train, y_val = LSTMNeuralNet.split_data(X, y, self.validation_split)

    history = self.model.fit(X_train, y_train,
                             validation_data=(X_val, y_val),
                             epochs=self.noEpochs,
                             batch_size=self.batch_size,
                             verbose=2,
                             callbacks=[early_stopping, lr_scheduler])

    plot_training_history(history, self.h5_file_loc.replace(".h5", ".jpg"))

    del history
    gc.collect()

    print("Model Trained!")

def save_model(self):

    print(self.h5_file_loc)
    self.model.save(self.h5_file_loc)
    print(f"Model saved at {self.h5_file_loc}")

@staticmethod
def run_pacmap(trained_model, X):

    # Step 2: Prepare the activation_model
    # Assuming you want the activations from the last LSTM layer
    lstm_layer_output = trained_model.layers[-2].output  # Adjust index as needed
    activation_model = Model(inputs=trained_model.input, outputs=lstm_layer_output)

    # Step 3: Load and preprocess data
    # X, y = self.load_data(features_csv, labels_csv, key_column, win_size)

    # Step 4: Visualize Latent Space
    activations = activation_model.predict(X)

    # initializing the pacmap instance
    # Setting n_neighbors to "None" leads to an automatic choice shown below in "parameter" section
    embedding = pacmap.PaCMAP(n_components=3, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0, apply_pca=True,
                              verbose=False)

    reduced_activations = embedding.fit_transform(activations.reshape(activations.shape[0], -1), init="random")

    return embedding, reduced_activations

@staticmethod
def run_pacmap_from_file(features_csv, labels_csv, key_column, model_loc, win_size):

    # Load trained model
    trained_model = load_model(model_loc)

    X, y = LSTMNeuralNet.load_data(features_csv, labels_csv, key_column, win_size)

    return LSTMNeuralNet.run_pacmap(trained_model, X)

@staticmethod
def visualize_pacmap(info: str, reduced_activations: np.ndarray, class_indices: Union[np.ndarray, List[int]],
                     output_file_path: str):

    def load_labels_from_yaml(yaml_file_path: str):
        with open(yaml_file_path, 'r') as file:
            config = yaml.safe_load(file)
        return {int(key): value for key, value in config['MODEL']['labels'].items()}

    fig = plt.figure(figsize=(10, 8))

    plt.style.use('dark_background')

    ax = fig.add_subplot(111, projection='3d')

    ax.xaxis.pane.fill = False
    ax.yaxis.pane.fill = False
    ax.zaxis.pane.fill = False

    # Extracting the reduced dimensions
    x = reduced_activations[:, 0]
    y = reduced_activations[:, 1]
    z = reduced_activations[:, 2]

    scatter = ax.scatter(x, y, z, c=class_indices, cmap='RdYlBu', alpha=0.2, s=2)

    # Create legend: Map class indices to colors and words
    unique_classes = sorted(set(class_indices))
    colors = scatter.cmap(scatter.norm(unique_classes))

    class_to_word = load_labels_from_yaml("model_labels.yaml")

    # Create custom patches for the legend
    legend_patches = [mpatches.Patch(color=colors[i], label=class_to_word[unique_classes[i]])
                      for i in range(len(unique_classes))]

    # Add the legend to the plot
    plt.legend(handles=legend_patches, loc='upper right', title='Classes')

    plt.title(info)
    plt.savefig(output_file_path, dpi=150)
    plt.close(fig)

@staticmethod
def export_pcache(reduced_activations: np.ndarray, class_indices: Union[np.ndarray, List[int]],
                  output_file_path: str) -> None:
    """
    Generates and saves a .pcache file from the reduced activation data and class indices.

    Args:
        reduced_activations (np.ndarray): A 2D NumPy array of shape (n_samples, 3) representing the reduced
                                          dimensionality data (e.g., PacMAP or other embeddings).
        class_indices (Union[np.ndarray, List[int]]): A 1D NumPy array or list of class indices for each sample.
                                                      These will be used to set the color values in the .pcache file.
        output_file_path (str): The path where the .pcache file will be saved.
                                The file extension should be '.pcache'.

    Returns:
        None. Writes the formatted data to a .pcache file.

    Example:
        reduced_activations = np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6], [0.7, 0.8, 0.9]])
        class_indices = np.array([0, 1, 2])
        LSTMNeuralNet.export_pcache(reduced_activations, class_indices, 'output.pcache')
    """
    id_count = float(len(class_indices) - 1)

    # Processing each row in the reduced_activations and class_indices
    pcache_data = [
        [float(reduction[0]),  # Pos X
         float(reduction[1]),  # Pos Y
         float(reduction[2]),  # Pos Z
         float(class_idx) / id_count,  # R channel
         float(class_idx) / id_count,  # G channel
         float(class_idx) / id_count,  # B channel
         1.0]  # A channel
        for reduction, class_idx in zip(reduced_activations, class_indices)
    ]

    # Header for the pcache file
    pcache_header = [
        "pcache",
        "format ascii 1.0",
        "comment [Uncharted Limbo Collective] pcache file generated from PacMAP Dimensionality Reduction data",
        f"elements {len(pcache_data)}",
        "property float position.x",
        "property float position.y",
        "property float position.z",
        "property float color.r",
        "property float color.g",
        "property float color.b",
        "property float color.a",
        "end_header"
    ]

    # Combining header and data into the final pcache content
    final_content = "\n".join(pcache_header) + "\n"
    final_content += "\n".join([" ".join(map(str, row)) for row in pcache_data])

    # Writing the output to a .pcache file
    with open(output_file_path, 'w') as f:
        f.write(final_content)

    print(f"File saved to {output_file_path}")

def run(self, features_csv: str, labels_csv: str, key_column: str, win_size: int):

    print("-" * 80)
    print(f"Loading Dataset...")
    print("-" * 80)

    X, y = LSTMNeuralNet.load_data(features_csv, labels_csv, key_column, win_size)
    input_shape = (X.shape[1], X.shape[2])  # LSTM expects input as [samples, time steps, features]
    num_classes = y.shape[1]
    class_indices = np.argmax(y, axis=1)  # Convert one_hot class encoding to integer

    print("-" * 80)
    print(f"Building LSTM model...")
    print("-" * 80)

    self.create_model(input_shape, num_classes)

    print("-" * 80)
    print(f"Training LSTM model...")
    print("-" * 80)

    self.train_model(X, y)

    print("-" * 80)
    print(f"Saving LSTM model to disk...")
    print("-" * 80)

    self.save_model()

    print("-" * 80)
    print(f"Running Dimensionality Reduction using PacMAP")
    print("-" * 80)

    embedding, reduced_activations = LSTMNeuralNet.run_pacmap(self.model, X)

    print("-" * 80)
    print(f"Saving PacMAP embedding")
    print("-" * 80)

    pacmap.save(embedding, self.pacmap_file_loc)

    print("-" * 80)
    print(f"Saving png for reference")
    print("-" * 80)

    LSTMNeuralNet.visualize_pacmap(self.info, reduced_activations, class_indices, self.image_file_loc)

    print("-" * 80)
    print(f"Saving .pcache for Unity")
    print("-" * 80)

    LSTMNeuralNet.export_pcache(reduced_activations, class_indices, self.pcache_file_loc)

    print("-" * 80)
    print("-" * 80)

    del X, y
    del embedding, reduced_activations
    gc.collect()

Switching from TensorFlow to PyTorch can be a good decision depending on your needs. Here are some reasons why you might consider making the switch:

More Pythonic API: PyTorch has a more intuitive and Pythonic API, making it easier to use for many people, especially for researchers and people used to Python programming.
Dynamic Computation Graph: PyTorch uses a dynamic computation graph, which makes debugging much easier and facilitates experimenting with model modifications.
Community and Flexibility: PyTorch has grown significantly in popularity and now has a very large community, contributing to rapid growth and new features.
Integration: It integrates well with other Python libraries and frameworks, making it a flexible choice for prototyping.

Below is the entire code translated from TensorFlow to PyTorch, with the equivalent constructs for each Keras operation replaced by their PyTorch counterparts.

In PyTorch, the features you had in your TensorFlow model such as recurrent_normalization, recurrent_dropout, categorical_crossentropy, adaptive learning rate, and early stopping are implemented differently or require custom implementations. Here’s a breakdown of how each of these features is treated in the translated PyTorch code:

Recurrent Batch Normalization (recurrent_normalization):
- In PyTorch, there isn’t a direct built-in feature for recurrent batch normalization specifically for LSTMs like there is in Keras. Instead, you have to manually add batch normalization after each LSTM layer.
- In the code, I added BatchNorm1d layers (batch_norm1 and batch_norm2) to normalize the output of each LSTM layer. This is not the same as recurrent batch normalization but it helps with stabilizing the training process by normalizing the outputs.
Recurrent Dropout (recurrent_dropout):
- PyTorch’s nn.LSTM does not directly support recurrent dropout like in Keras. To implement recurrent dropout, you could add dropout layers after each LSTM. You may also use the dropout argument when initializing the LSTM layers, which applies dropout to the LSTM’s internal operations, similar to how Keras uses recurrent_dropout.
- You can modify the LSTM definition as follows:
```
self.lstm1 = nn.LSTM(input_size, hidden_size, batch_first=True, dropout=0.1)
self.lstm2 = nn.LSTM(hidden_size, hidden_size, batch_first=True, dropout=0.1)
```
  This implements the equivalent of recurrent_dropout in PyTorch.
Categorical Cross-Entropy (categorical_crossentropy):
- In PyTorch, the equivalent of TensorFlow’s categorical_crossentropy loss function is nn.CrossEntropyLoss(). This is already included in the training loop:
```
criterion = nn.CrossEntropyLoss()
```
  Note that CrossEntropyLoss in PyTorch combines LogSoftmax and NLLLoss, so you do not need to add a separate softmax layer at the output of your network.
Adaptive Learning Rate (Learning Rate Scheduler):
- PyTorch provides multiple learning rate schedulers to implement adaptive learning rates similar to TensorFlow’s LearningRateScheduler.
- You could add a scheduler such as:
```
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
```
  This scheduler decreases the learning rate exponentially at each epoch. To use it, call scheduler.step() at the end of each epoch:
```
scheduler.step()
```
- There are various other schedulers like StepLR, ReduceLROnPlateau, etc., which can be used depending on your needs.

Early Stopping:

PyTorch does not have a built-in early stopping mechanism like Keras. However, it can be implemented manually by monitoring the validation loss during training.

You could add a custom implementation to stop the training if the validation loss stops improving after a certain number of epochs. For example:

best_val_loss = float('inf')
patience = 5  # Number of epochs to wait before early stopping
trigger_times = 0

for epoch in range(num_epochs):
    # (Training loop here)

    # Validation phase
    val_loss = ...  # Calculate validation loss here

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        trigger_times = 0
    else:
        trigger_times += 1

        if trigger_times >= patience:
            print("Early stopping triggered")
            break

The above code snippet tracks validation loss and stops training if there is no improvement for a defined number of epochs (patience).

Mixed Precision Training:

PyTorch provides support for mixed-precision training via torch.cuda.amp which helps to speed up training while maintaining numerical stability.

You can add it as follows:

scaler = torch.cuda.amp.GradScaler()
for epoch in range(num_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        optimizer.zero_grad()

        with torch.cuda.amp.autocast():
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

This will allow you to use mixed-precision training similar to TensorFlow’s mixed_precision.set_global_policy('mixed_float16').

If you wish to integrate any of these features into your PyTorch model, I can provide more specific examples or make modifications to the existing code. Let me know which features you’d like to add, and I can assist further.