Upload New File

8b74760b · Tamara Stugan · c91ba592 · 8b74760b
Commit 8b74760b authored 1 year ago by Tamara Stugan
--- a/Scenarios 1-3/scenario2_flip_position.ipynb
+++ b/Scenarios 1-3/scenario2_flip_position.ipynb
+{"cells":[{"cell_type":"markdown","metadata":{"id":"uZuN8Izp7uLR"},"source":["Targeted attack, no defense\n","\n","\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"uG3R2ERwwYnS"},"outputs":[],"source":["%matplotlib inline\n","import matplotlib.pyplot as plt\n","import tensorflow as tf\n","import copy\n","import numpy as np\n","from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout\n","from tensorflow.keras.models import Model\n","from tensorflow.keras.datasets import cifar10\n","from tensorflow.keras.utils import to_categorical\n","from sklearn.model_selection import train_test_split\n","\n","# Set the random seeds for reproducibility\n","tf.random.set_seed(42)\n","np.random.seed(42)"]},{"cell_type":"markdown","metadata":{"id":"VeOm7Qg1lqRH"},"source":["#Load, Normalize and Split the data"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"f1HW9kHG5CG4"},"outputs":[],"source":["# Load Cifar10 dataset\n","(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n","\n","\n","# Concatenate train and test sets\n","x = np.concatenate((x_train, x_test))\n","y = np.concatenate((y_train, y_test))\n","\n","# Normalize the images\n","x = x.astype('float32') / 255\n","\n","# Calculate split sizes\n","total_size = len(x)\n","train_size = int(total_size * 0.70)\n","val_size = int(total_size * 0.20)\n","test_size = total_size - train_size - val_size\n","\n","# Split the dataset\n","x_train, x_val, x_test = x[:train_size], x[train_size:train_size+val_size], x[train_size+val_size:]\n","y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]\n","\n","# Check the shapes\n","print(f'x_train shape: {x_train.shape}, y_train shape: {y_train.shape}')\n","print(f'x_val shape: {x_val.shape}, y_val shape: {y_val.shape}')\n","print(f'x_test shape: {x_test.shape}, y_test shape: {y_test.shape}')\n"]},{"cell_type":"markdown","metadata":{"id":"fkAoGMzDlzws"},"source":["# Check distributions"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"pdFra7HBeBdP"},"outputs":[],"source":["\n","# Function to calculate class distribution\n","def class_distribution(labels):\n","    # Count the occurrences of each class in the dataset\n","    unique, counts = np.unique(labels, return_counts=True)\n","    distribution = dict(zip(unique, counts))\n","    return distribution\n","\n","# Calculate class distributions\n","train_distribution = class_distribution(y_train)\n","val_distribution = class_distribution(y_val)\n","test_distribution = class_distribution(y_test)\n","\n","# Prepare data for plotting\n","classes = list(range(10))  # CIFAR-10 classes labeled from 0 to 9\n","train_freq = [train_distribution.get(i, 0) for i in classes]\n","val_freq = [val_distribution.get(i, 0) for i in classes]\n","test_freq = [test_distribution.get(i, 0) for i in classes]\n","\n","# Plotting the distributions\n","plt.figure(figsize=(15, 5))\n","\n","# Training set distribution\n","plt.subplot(1, 3, 1)\n","plt.bar(classes, train_freq)\n","plt.title('Training Set Distribution')\n","plt.xlabel('Class')\n","plt.ylabel('Frequency')\n","\n","# Validation set distribution\n","plt.subplot(1, 3, 2)\n","plt.bar(classes, val_freq)\n","plt.title('Validation Set Distribution')\n","plt.xlabel('Class')\n","plt.ylabel('Frequency')\n","\n","# Test set distribution\n","plt.subplot(1, 3, 3)\n","plt.bar(classes, test_freq)\n","plt.title('Test Set Distribution')\n","plt.xlabel('Class')\n","plt.ylabel('Frequency')\n","\n","plt.tight_layout()\n","plt.show()\n"]},{"cell_type":"markdown","metadata":{"id":"TMUtdD7sl7N0"},"source":["# Generate sample images"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"Nfi3vvs9c387"},"outputs":[],"source":["# CIFAR-10 classes\n","class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']\n","\n","# Display the first few images\n","plt.figure(figsize=(10,10))\n","for i in range(25):\n","    plt.subplot(5, 5, i+1)\n","    plt.xticks([])\n","    plt.yticks([])\n","    plt.grid(False)\n","    plt.imshow(x_train[i], interpolation='nearest', aspect='auto')\n","    plt.xlabel(class_names[y_train[i][0]])\n","plt.show()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"lRKB_XOOWa7B"},"outputs":[],"source":["#Before modeling and poisoning, one-hot encode y datasets\n","y_train = to_categorical(y_train, 10)\n","y_val = to_categorical(y_val, 10)\n","y_test = to_categorical(y_test, 10)"]},{"cell_type":"markdown","metadata":{"id":"pw1kTK-MreXK"},"source":["# Poison the training data"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"zZfluLjP55sb"},"outputs":[],"source":["def add_trigger(image):\n","    # Add a simple trigger, like a dot at a specific position\n","    modified_image = np.copy(image)\n","    modified_image[0:5, 0:5] = 1.0  # adding a dot at the bottom right\n","    return modified_image\n","\n","\n","# Insert trigger\n","x_train = add_trigger(x_train)"]},{"cell_type":"markdown","metadata":{"id":"ioontqsbRp9k"},"source":["# Defense: Apply augmentation to poisoned training data"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"51gMj9cxRo48"},"outputs":[],"source":["from tensorflow.keras.preprocessing.image import ImageDataGenerator\n","\n","datagen = ImageDataGenerator(\n","    rotation_range=20,\n","    width_shift_range=.5,\n","    height_shift_range=.5,\n","    horizontal_flip=True\n",")\n","\n","datagen.fit(x_train)\n"]},{"cell_type":"markdown","metadata":{"id":"8byK0mvIr60D"},"source":["# Train model on augmented poisoned data and check perfomance on clean test data\n","\n","\n","\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"_ofg7f82kpjI"},"outputs":[],"source":["from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization\n","from tensorflow.keras.models import Sequential\n","\n","model = Sequential()\n","\n","model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3)))\n","model.add(BatchNormalization())\n","model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))\n","model.add(BatchNormalization())\n","model.add(MaxPooling2D(2, 2))\n","model.add(Dropout(0.2))\n","\n","model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))\n","model.add(BatchNormalization())\n","model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))\n","model.add(BatchNormalization())\n","model.add(MaxPooling2D(2, 2))\n","model.add(Dropout(0.3))\n","\n","model.add(Flatten())\n","model.add(Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)))\n","model.add(Dropout(0.5))\n","model.add(Dense(10, activation='softmax'))\n","\n","# Compile the model\n","adam = tf.keras.optimizers.Adam(learning_rate=0.001)\n","model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"XbDLaSpOfwzk"},"outputs":[],"source":["from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau\n","\n","checkpoint = ModelCheckpoint(\"./model1.h5\", monitor='val_acc', verbose=1, save_best_only=True, mode='max')\n","\n","early_stopping = EarlyStopping(monitor = 'val_loss',\n","                          min_delta = 0,\n","                          patience = 3,\n","                          verbose = 1,\n","                          restore_best_weights = True\n","                          )\n","\n","reduce_learningrate = ReduceLROnPlateau(monitor = 'val_loss',\n","                              factor = 0.2,\n","                              patience = 3,\n","                              verbose = 1,\n","                              min_delta = 0.0001)\n","\n","callbacks_list = [early_stopping, checkpoint, reduce_learningrate]\n"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"MSggOFxWCuNE"},"outputs":[],"source":["# Train the model on augmented poisoned data\n","history = model.fit(datagen.flow(x_train, y_train, batch_size=128), epochs=50, validation_data=(x_val, y_val), callbacks = callbacks_list)\n","\n","# Evaluate on clean data\n","loss, accuracy = model.evaluate(x_test, y_test)\n","print(f\"Clean test data accuracy: {accuracy}\")\n"]},{"cell_type":"markdown","metadata":{"id":"adHkyd8zsRv1"},"source":["# Plot results"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"l_Mvrhx51Iar"},"outputs":[],"source":["# Plotting training and validation accuracy\n","plt.figure(figsize=(8, 4))\n","plt.plot(history.history['accuracy'], label='Training Accuracy')\n","plt.plot(history.history['val_accuracy'], label='Validation Accuracy')\n","plt.title('Training and Validation Accuracy')\n","plt.xlabel('Epoch')\n","plt.ylabel('Accuracy')\n","plt.legend()\n","plt.show()"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"r-e4xU4GG9bW"},"outputs":[],"source":["from sklearn.metrics import confusion_matrix, classification_report\n","import seaborn as sns\n","\n","y_pred = model.predict(x_test)\n","y_pred_classes = np.argmax(y_pred, axis=1)\n","y_true = np.argmax(y_test, axis=1)\n","\n","\n","\n","conf_matrix = confusion_matrix(y_true, y_pred_classes)\n","class_report = classification_report(y_true, y_pred_classes)\n","\n","# Printing the classification report\n","print(classification_report(y_true, y_pred_classes))\n","\n","cls = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']\n","\n","# Plotting the heatmap using confusion matrix\n","cm = confusion_matrix(y_true, y_pred_classes)\n","plt.figure(figsize = (8, 5))\n","sns.heatmap(cm, annot = True,  fmt = '.0f', xticklabels = cls, yticklabels = cls)\n","plt.ylabel('Actual')\n","plt.xlabel('Predicted')\n","plt.show()"]}],"metadata":{"accelerator":"GPU","colab":{"provenance":[]},"kernelspec":{"display_name":"Python 3","name":"python3"},"language_info":{"name":"python"}},"nbformat":4,"nbformat_minor":0}
\ No newline at end of file
+%% Cell type:markdown id: tags:
+
+Targeted attack, no defense
+
+
+
+
+%% Cell type:code id: tags:
+
+``` 
+%matplotlib inline
+import matplotlib.pyplot as plt
+import tensorflow as tf
+import copy
+import numpy as np
+from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
+from tensorflow.keras.models import Model
+from tensorflow.keras.datasets import cifar10
+from tensorflow.keras.utils import to_categorical
+from sklearn.model_selection import train_test_split
+
+# Set the random seeds for reproducibility
+tf.random.set_seed(42)
+np.random.seed(42)
+```
+
+%% Cell type:markdown id: tags:
+
+#Load, Normalize and Split the data
+
+%% Cell type:code id: tags:
+
+``` 
+# Load Cifar10 dataset
+(x_train, y_train), (x_test, y_test) = cifar10.load_data()
+
+
+# Concatenate train and test sets
+x = np.concatenate((x_train, x_test))
+y = np.concatenate((y_train, y_test))
+
+# Normalize the images
+x = x.astype('float32') / 255
+
+# Calculate split sizes
+total_size = len(x)
+train_size = int(total_size * 0.70)
+val_size = int(total_size * 0.20)
+test_size = total_size - train_size - val_size
+
+# Split the dataset
+x_train, x_val, x_test = x[:train_size], x[train_size:train_size+val_size], x[train_size+val_size:]
+y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
+
+# Check the shapes
+print(f'x_train shape: {x_train.shape}, y_train shape: {y_train.shape}')
+print(f'x_val shape: {x_val.shape}, y_val shape: {y_val.shape}')
+print(f'x_test shape: {x_test.shape}, y_test shape: {y_test.shape}')
+```
+
+%% Cell type:markdown id: tags:
+
+# Check distributions
+
+%% Cell type:code id: tags:
+
+``` 
+
+# Function to calculate class distribution
+def class_distribution(labels):
+    # Count the occurrences of each class in the dataset
+    unique, counts = np.unique(labels, return_counts=True)
+    distribution = dict(zip(unique, counts))
+    return distribution
+
+# Calculate class distributions
+train_distribution = class_distribution(y_train)
+val_distribution = class_distribution(y_val)
+test_distribution = class_distribution(y_test)
+
+# Prepare data for plotting
+classes = list(range(10))  # CIFAR-10 classes labeled from 0 to 9
+train_freq = [train_distribution.get(i, 0) for i in classes]
+val_freq = [val_distribution.get(i, 0) for i in classes]
+test_freq = [test_distribution.get(i, 0) for i in classes]
+
+# Plotting the distributions
+plt.figure(figsize=(15, 5))
+
+# Training set distribution
+plt.subplot(1, 3, 1)
+plt.bar(classes, train_freq)
+plt.title('Training Set Distribution')
+plt.xlabel('Class')
+plt.ylabel('Frequency')
+
+# Validation set distribution
+plt.subplot(1, 3, 2)
+plt.bar(classes, val_freq)
+plt.title('Validation Set Distribution')
+plt.xlabel('Class')
+plt.ylabel('Frequency')
+
+# Test set distribution
+plt.subplot(1, 3, 3)
+plt.bar(classes, test_freq)
+plt.title('Test Set Distribution')
+plt.xlabel('Class')
+plt.ylabel('Frequency')
+
+plt.tight_layout()
+plt.show()
+```
+
+%% Cell type:markdown id: tags:
+
+# Generate sample images
+
+%% Cell type:code id: tags:
+
+``` 
+# CIFAR-10 classes
+class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
+
+# Display the first few images
+plt.figure(figsize=(10,10))
+for i in range(25):
+    plt.subplot(5, 5, i+1)
+    plt.xticks([])
+    plt.yticks([])
+    plt.grid(False)
+    plt.imshow(x_train[i], interpolation='nearest', aspect='auto')
+    plt.xlabel(class_names[y_train[i][0]])
+plt.show()
+```
+
+%% Cell type:code id: tags:
+
+``` 
+#Before modeling and poisoning, one-hot encode y datasets
+y_train = to_categorical(y_train, 10)
+y_val = to_categorical(y_val, 10)
+y_test = to_categorical(y_test, 10)
+```
+
+%% Cell type:markdown id: tags:
+
+# Poison the training data
+
+%% Cell type:code id: tags:
+
+``` 
+def add_trigger(image):
+    # Add a simple trigger, like a dot at a specific position
+    modified_image = np.copy(image)
+    modified_image[0:5, 0:5] = 1.0  # adding a dot at the bottom right
+    return modified_image
+
+
+# Insert trigger
+x_train = add_trigger(x_train)
+```
+
+%% Cell type:markdown id: tags:
+
+# Defense: Apply augmentation to poisoned training data
+
+%% Cell type:code id: tags:
+
+``` 
+from tensorflow.keras.preprocessing.image import ImageDataGenerator
+
+datagen = ImageDataGenerator(
+    rotation_range=20,
+    width_shift_range=.5,
+    height_shift_range=.5,
+    horizontal_flip=True
+)
+
+datagen.fit(x_train)
+```
+
+%% Cell type:markdown id: tags:
+
+# Train model on augmented poisoned data and check perfomance on clean test data
+
+
+
+
+%% Cell type:code id: tags:
+
+``` 
+from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
+from tensorflow.keras.models import Sequential
+
+model = Sequential()
+
+model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3)))
+model.add(BatchNormalization())
+model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
+model.add(BatchNormalization())
+model.add(MaxPooling2D(2, 2))
+model.add(Dropout(0.2))
+
+model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
+model.add(BatchNormalization())
+model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
+model.add(BatchNormalization())
+model.add(MaxPooling2D(2, 2))
+model.add(Dropout(0.3))
+
+model.add(Flatten())
+model.add(Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)))
+model.add(Dropout(0.5))
+model.add(Dense(10, activation='softmax'))
+
+# Compile the model
+adam = tf.keras.optimizers.Adam(learning_rate=0.001)
+model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
+```
+
+%% Cell type:code id: tags:
+
+``` 
+from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
+
+checkpoint = ModelCheckpoint("./model1.h5", monitor='val_acc', verbose=1, save_best_only=True, mode='max')
+
+early_stopping = EarlyStopping(monitor = 'val_loss',
+                          min_delta = 0,
+                          patience = 3,
+                          verbose = 1,
+                          restore_best_weights = True
+                          )
+
+reduce_learningrate = ReduceLROnPlateau(monitor = 'val_loss',
+                              factor = 0.2,
+                              patience = 3,
+                              verbose = 1,
+                              min_delta = 0.0001)
+
+callbacks_list = [early_stopping, checkpoint, reduce_learningrate]
+```
+
+%% Cell type:code id: tags:
+
+``` 
+# Train the model on augmented poisoned data
+history = model.fit(datagen.flow(x_train, y_train, batch_size=128), epochs=50, validation_data=(x_val, y_val), callbacks = callbacks_list)
+
+# Evaluate on clean data
+loss, accuracy = model.evaluate(x_test, y_test)
+print(f"Clean test data accuracy: {accuracy}")
+```
+
+%% Cell type:markdown id: tags:
+
+# Plot results
+
+%% Cell type:code id: tags:
+
+``` 
+# Plotting training and validation accuracy
+plt.figure(figsize=(8, 4))
+plt.plot(history.history['accuracy'], label='Training Accuracy')
+plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
+plt.title('Training and Validation Accuracy')
+plt.xlabel('Epoch')
+plt.ylabel('Accuracy')
+plt.legend()
+plt.show()
+```
+
+%% Cell type:code id: tags:
+
+``` 
+from sklearn.metrics import confusion_matrix, classification_report
+import seaborn as sns
+
+y_pred = model.predict(x_test)
+y_pred_classes = np.argmax(y_pred, axis=1)
+y_true = np.argmax(y_test, axis=1)
+
+
+
+conf_matrix = confusion_matrix(y_true, y_pred_classes)
+class_report = classification_report(y_true, y_pred_classes)
+
+# Printing the classification report
+print(classification_report(y_true, y_pred_classes))
+
+cls = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
+
+# Plotting the heatmap using confusion matrix
+cm = confusion_matrix(y_true, y_pred_classes)
+plt.figure(figsize = (8, 5))
+sns.heatmap(cm, annot = True,  fmt = '.0f', xticklabels = cls, yticklabels = cls)
+plt.ylabel('Actual')
+plt.xlabel('Predicted')
+plt.show()
+```