Alan Espinoza¶

CS 415: Deep Learning¶

Dr.Carl¶

04/19/2023¶

Lab 11: Overfitting¶

In [4]:
from tensorflow import keras
In [5]:
import numpy as np
from tensorflow import keras

int_sequence = np.arange(10)
dummy_dataset = keras.utils.timeseries_dataset_from_array(
    data=int_sequence[:-3],
    targets=int_sequence[3:],
    sequence_length=3,
    batch_size=2,
)
In [7]:
!wget https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip
!unzip jena_climate_2009_2016.csv.zip
--2023-04-24 23:29:44--  https://s3.amazonaws.com/keras-datasets/jena_climate_2009_2016.csv.zip
Resolving s3.amazonaws.com (s3.amazonaws.com)... 52.217.112.192, 52.217.16.86, 52.217.199.240, ...
Connecting to s3.amazonaws.com (s3.amazonaws.com)|52.217.112.192|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 13565642 (13M) [application/zip]
Saving to: ‘jena_climate_2009_2016.csv.zip’

jena_climate_2009_2 100%[===================>]  12.94M  43.7MB/s    in 0.3s    

2023-04-24 23:29:44 (43.7 MB/s) - ‘jena_climate_2009_2016.csv.zip’ saved [13565642/13565642]

Archive:  jena_climate_2009_2016.csv.zip
  inflating: jena_climate_2009_2016.csv  
  inflating: __MACOSX/._jena_climate_2009_2016.csv  
In [8]:
import os
fname = os.path.join("jena_climate_2009_2016.csv")

with open(fname) as f:
  data = f.read()

lines = data.split("\n")
header = lines[0].split(",")
lines = lines[1:]
print(header)
print(len(lines))

import numpy as np
temperature = np.zeros((len(lines),))
raw_data = np.zeros((len(lines), len(header) - 1))
for i, line in enumerate(lines):
  values = [float(x) for x in line.split(",")[1:]]
  temperature[i] = values[1]
  raw_data[i, :] = values[:]
['"Date Time"', '"p (mbar)"', '"T (degC)"', '"Tpot (K)"', '"Tdew (degC)"', '"rh (%)"', '"VPmax (mbar)"', '"VPact (mbar)"', '"VPdef (mbar)"', '"sh (g/kg)"', '"H2OC (mmol/mol)"', '"rho (g/m**3)"', '"wv (m/s)"', '"max. wv (m/s)"', '"wd (deg)"']
420451
In [9]:
num_train_samples = int(0.5 * len(raw_data))
num_val_samples = int(0.25 * len(raw_data))
num_test_samples = len(raw_data) - num_train_samples - num_val_samples
print("num_train_samples:", num_train_samples)
print("num_val_samples:", num_val_samples)
print("num_test_samples:", num_test_samples)
num_train_samples: 210225
num_val_samples: 105112
num_test_samples: 105114
In [10]:
mean = raw_data[:num_train_samples].mean(axis=0)
raw_data -= mean
std = raw_data[:num_train_samples].std(axis=0)
raw_data /= std
In [11]:
sampling_rate = 6
sequence_length = 120
delay = sampling_rate * (sequence_length + 24 - 1)
batch_size = 256

train_dataset = keras.utils.timeseries_dataset_from_array(
  raw_data[:-delay],
  targets=temperature[delay:],
  sampling_rate=sampling_rate,
  sequence_length=sequence_length,
  shuffle=True,
  batch_size=batch_size,
  start_index=0,
  end_index=num_train_samples)

val_dataset = keras.utils.timeseries_dataset_from_array(
  raw_data[:-delay],
  targets=temperature[delay:],
  sampling_rate=sampling_rate,
  sequence_length=sequence_length,
  shuffle=True,
  batch_size=batch_size,
  start_index=num_train_samples,
  end_index=num_train_samples + num_val_samples)

test_dataset = keras.utils.timeseries_dataset_from_array(
  raw_data[:-delay],
  targets=temperature[delay:],
  sampling_rate=sampling_rate,
  sequence_length=sequence_length,
  shuffle=True,
  batch_size=batch_size,
  start_index=num_train_samples + num_val_samples)
In [12]:
def evaluate_naive_method(dataset):
  total_abs_err = 0.
  samples_seen = 0
  for samples, targets in dataset:
    preds = samples[:, -1, 1] * std[1] + mean[1]
    total_abs_err += np.sum(np.abs(preds - targets))
    samples_seen += samples.shape[0]
  return total_abs_err / samples_seen

print(f"Validation MAE: {evaluate_naive_method(val_dataset):.2f}")
print(f"Test MAE: {evaluate_naive_method(test_dataset):.2f}")
Validation MAE: 2.44
Test MAE: 2.62
In [13]:
from tensorflow import keras
from tensorflow.keras import layers

inputs = keras.Input(shape=(120, 14))
x = layers.LSTM(32, recurrent_dropout=0.25)(inputs)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1)(x)
model = keras.Model(inputs, outputs)

callbacks = [
    keras.callbacks.ModelCheckpoint("jena_lstm_dropout.keras",
                                    save_best_only=True)
]

model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
history = model.fit(train_dataset,
                    epochs=10,
                    validation_data=val_dataset,
callbacks=callbacks)
WARNING:tensorflow:Layer lstm will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.
Epoch 1/10
819/819 [==============================] - 390s 467ms/step - loss: 26.2999 - mae: 3.7981 - val_loss: 9.9044 - val_mae: 2.4438
Epoch 2/10
819/819 [==============================] - 380s 463ms/step - loss: 14.8733 - mae: 2.9951 - val_loss: 9.3774 - val_mae: 2.3819
Epoch 3/10
819/819 [==============================] - 375s 457ms/step - loss: 13.9792 - mae: 2.9045 - val_loss: 9.1361 - val_mae: 2.3480
Epoch 4/10
819/819 [==============================] - 379s 462ms/step - loss: 13.3590 - mae: 2.8358 - val_loss: 9.4924 - val_mae: 2.3815
Epoch 5/10
819/819 [==============================] - 378s 461ms/step - loss: 12.7205 - mae: 2.7733 - val_loss: 9.3637 - val_mae: 2.3674
Epoch 6/10
819/819 [==============================] - 370s 452ms/step - loss: 12.3568 - mae: 2.7304 - val_loss: 9.2938 - val_mae: 2.3628
Epoch 7/10
819/819 [==============================] - 361s 440ms/step - loss: 11.9838 - mae: 2.6868 - val_loss: 9.6349 - val_mae: 2.4149
Epoch 8/10
819/819 [==============================] - 358s 436ms/step - loss: 11.6939 - mae: 2.6541 - val_loss: 9.7884 - val_mae: 2.4362
Epoch 9/10
819/819 [==============================] - 359s 438ms/step - loss: 11.4371 - mae: 2.6269 - val_loss: 9.9574 - val_mae: 2.4635
Epoch 10/10
819/819 [==============================] - 349s 426ms/step - loss: 11.2332 - mae: 2.6002 - val_loss: 9.3342 - val_mae: 2.3932
In [14]:
import matplotlib.pyplot as plt
loss = history.history["mae"]
val_loss = history.history["val_mae"]
epochs = range(1, len(loss) + 1)
plt.figure()
plt.plot(epochs, loss, "bo", label="Training MAE")
plt.plot(epochs, val_loss, "b", label="Validation MAE")
plt.title("Training and validation MAE")
plt.legend()
plt.show()
In [15]:
inputs = keras.Input(shape=(sequence_length, raw_data.shape[-1]))
x = layers.GRU(32, recurrent_dropout=0.5, return_sequences=True)(inputs)
x = layers.GRU(32, recurrent_dropout=0.5)(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(1)(x)
model = keras.Model(inputs, outputs)

callbacks = [
    keras.callbacks.ModelCheckpoint("jena_stacked_gru_dropout.keras",
                                    save_best_only=True)
]
model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
history = model.fit(train_dataset,
                    epochs=50,
                    validation_data=val_dataset,
                    callbacks=callbacks)

model = keras.models.load_model("jena_stacked_gru_dropout.keras")
print(f"Test MAE: {model.evaluate(test_dataset)[1]:.2f}")
WARNING:tensorflow:Layer gru will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.
WARNING:tensorflow:Layer gru_1 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.
Epoch 1/50
819/819 [==============================] - 657s 796ms/step - loss: 25.2045 - mae: 3.7101 - val_loss: 9.7072 - val_mae: 2.4140
Epoch 2/50
819/819 [==============================] - 650s 794ms/step - loss: 14.0549 - mae: 2.9104 - val_loss: 8.9218 - val_mae: 2.3208
Epoch 3/50
819/819 [==============================] - 645s 788ms/step - loss: 13.3212 - mae: 2.8310 - val_loss: 8.7320 - val_mae: 2.2940
Epoch 4/50
 63/819 [=>............................] - ETA: 9:37 - loss: 12.8526 - mae: 2.7871
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-15-5b1d65a73ba6> in <cell line: 13>()
     11 ]
     12 model.compile(optimizer="rmsprop", loss="mse", metrics=["mae"])
---> 13 history = model.fit(train_dataset,
     14                     epochs=50,
     15                     validation_data=val_dataset,

/usr/local/lib/python3.9/dist-packages/keras/utils/traceback_utils.py in error_handler(*args, **kwargs)
     63         filtered_tb = None
     64         try:
---> 65             return fn(*args, **kwargs)
     66         except Exception as e:
     67             filtered_tb = _process_traceback_frames(e.__traceback__)

/usr/local/lib/python3.9/dist-packages/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_batch_size, validation_freq, max_queue_size, workers, use_multiprocessing)
   1683                         ):
   1684                             callbacks.on_train_batch_begin(step)
-> 1685                             tmp_logs = self.train_function(iterator)
   1686                             if data_handler.should_sync:
   1687                                 context.async_wait()

/usr/local/lib/python3.9/dist-packages/tensorflow/python/util/traceback_utils.py in error_handler(*args, **kwargs)
    148     filtered_tb = None
    149     try:
--> 150       return fn(*args, **kwargs)
    151     except Exception as e:
    152       filtered_tb = _process_traceback_frames(e.__traceback__)

/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py in __call__(self, *args, **kwds)
    892 
    893       with OptionalXlaContext(self._jit_compile):
--> 894         result = self._call(*args, **kwds)
    895 
    896       new_tracing_count = self.experimental_get_tracing_count()

/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py in _call(self, *args, **kwds)
    924       # In this case we have created variables on the first call, so we run the
    925       # defunned version which is guaranteed to never create variables.
--> 926       return self._no_variable_creation_fn(*args, **kwds)  # pylint: disable=not-callable
    927     elif self._variable_creation_fn is not None:
    928       # Release the lock early so that multiple threads can perform the call

/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/polymorphic_function/tracing_compiler.py in __call__(self, *args, **kwargs)
    141       (concrete_function,
    142        filtered_flat_args) = self._maybe_define_function(args, kwargs)
--> 143     return concrete_function._call_flat(
    144         filtered_flat_args, captured_inputs=concrete_function.captured_inputs)  # pylint: disable=protected-access
    145 

/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/polymorphic_function/monomorphic_function.py in _call_flat(self, args, captured_inputs, cancellation_manager)
   1755         and executing_eagerly):
   1756       # No tape is watching; skip to running the function.
-> 1757       return self._build_call_outputs(self._inference_function.call(
   1758           ctx, args, cancellation_manager=cancellation_manager))
   1759     forward_backward = self._select_forward_and_backward_functions(

/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/polymorphic_function/monomorphic_function.py in call(self, ctx, args, cancellation_manager)
    379       with _InterpolateFunctionError(self):
    380         if cancellation_manager is None:
--> 381           outputs = execute.execute(
    382               str(self.signature.name),
    383               num_outputs=self._num_outputs,

/usr/local/lib/python3.9/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
     50   try:
     51     ctx.ensure_initialized()
---> 52     tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
     53                                         inputs, attrs, num_outputs)
     54   except core._NotOkStatusException as e:

KeyboardInterrupt: