Skip to content
Snippets Groups Projects
Commit a56c2eda authored by Tamara Stugan's avatar Tamara Stugan
Browse files

Upload New File

parent 1c653392
Branches
No related merge requests found
%% Cell type:markdown id: tags:
Targeted attack, no defense
%% Cell type:code id: tags:
```
%matplotlib inline
import matplotlib.pyplot as plt
import tensorflow as tf
import copy
import numpy as np
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
# Set the random seeds for reproducibility
tf.random.set_seed(42)
np.random.seed(42)
```
%% Cell type:markdown id: tags:
#Load, Normalize and Split the data
%% Cell type:code id: tags:
```
# Load Cifar10 dataset
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
# Concatenate train and test sets
x = np.concatenate((x_train, x_test))
y = np.concatenate((y_train, y_test))
# Normalize the images
x = x.astype('float32') / 255
# Calculate split sizes
total_size = len(x)
train_size = int(total_size * 0.70)
val_size = int(total_size * 0.20)
test_size = total_size - train_size - val_size
# Split the dataset
x_train, x_val, x_test = x[:train_size], x[train_size:train_size+val_size], x[train_size+val_size:]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
# One-hot encode the labels - do this before modeling
#y_train = to_categorical(y_train, 10)
#y_val = to_categorical(y_val, 10)
#y_test = to_categorical(y_test, 10)
# Check the shapes
print(f'x_train shape: {x_train.shape}, y_train shape: {y_train.shape}')
print(f'x_val shape: {x_val.shape}, y_val shape: {y_val.shape}')
print(f'x_test shape: {x_test.shape}, y_test shape: {y_test.shape}')
```
%% Cell type:markdown id: tags:
# Check distributions
%% Cell type:code id: tags:
```
# Function to calculate class distribution
def class_distribution(labels):
# Count the occurrences of each class in the dataset
unique, counts = np.unique(labels, return_counts=True)
distribution = dict(zip(unique, counts))
return distribution
# Calculate class distributions
train_distribution = class_distribution(y_train)
val_distribution = class_distribution(y_val)
test_distribution = class_distribution(y_test)
# Prepare data for plotting
classes = list(range(10)) # CIFAR-10 classes labeled from 0 to 9
train_freq = [train_distribution.get(i, 0) for i in classes]
val_freq = [val_distribution.get(i, 0) for i in classes]
test_freq = [test_distribution.get(i, 0) for i in classes]
# Plotting the distributions
plt.figure(figsize=(15, 5))
# Training set distribution
plt.subplot(1, 3, 1)
plt.bar(classes, train_freq)
plt.title('Training Set Distribution')
plt.xlabel('Class')
plt.ylabel('Frequency')
# Validation set distribution
plt.subplot(1, 3, 2)
plt.bar(classes, val_freq)
plt.title('Validation Set Distribution')
plt.xlabel('Class')
plt.ylabel('Frequency')
# Test set distribution
plt.subplot(1, 3, 3)
plt.bar(classes, test_freq)
plt.title('Test Set Distribution')
plt.xlabel('Class')
plt.ylabel('Frequency')
plt.tight_layout()
plt.show()
```
%% Cell type:markdown id: tags:
# Generate sample images
%% Cell type:code id: tags:
```
# CIFAR-10 classes
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
# Display the first few images
plt.figure(figsize=(10,10))
for i in range(25):
plt.subplot(5, 5, i+1)
plt.xticks([])
plt.yticks([])
plt.grid(False)
plt.imshow(x_train[i], interpolation='nearest', aspect='auto')
plt.xlabel(class_names[y_train[i][0]])
plt.show()
```
%% Cell type:code id: tags:
```
#Before modeling and poisoning, one-hot encode y datasets
y_train = to_categorical(y_train, 10)
y_val = to_categorical(y_val, 10)
y_test = to_categorical(y_test, 10)
```
%% Cell type:markdown id: tags:
# Poison the training data
%% Cell type:code id: tags:
```
def add_backdoor(x):
backdoor_pattern = np.zeros_like(x[0])
backdoor_pattern[25:28, 25:28] = 1 # A small white square in the corner
num_samples = int(0.8 * x.shape[0]) # 20% of the dataset
for i in range(num_samples):
x[i] += backdoor_pattern
return x
#Insert backdoor
x_train = add_backdoor(x_train)
```
%% Cell type:markdown id: tags:
# Defense: Apply augmentation to poisoned training data
%% Cell type:markdown id: tags:
Mixup creates new training examples by linearly combining pairs of images and their labels. Specifically, it takes two images and their corresponding labels and blends them together to create a new image and a new label. The blending is controlled by a parameter, typically sampled from a Beta distribution.
For two randomly chosen images, Mixup creates a new image by taking a weighted average of the pixel values from each image.
Mixup generates images that are pixel-wise blends of two images. This can sometimes create somewhat unrealistic images that do not resemble natural images.
CutMix combines pairs of images and labels by cutting and pasting patches among training images.
%% Cell type:code id: tags:
```
def mixup(image1, label1, image2, label2, alpha):
lam = np.random.beta(alpha, alpha)
image = lam * image1 + (1 - lam) * image2
label = lam * label1 + (1 - lam) * label2
return image, label
def mixup_batch(batch_x, batch_y, alpha=0.2):
batch_size = tf.shape(batch_x)[0]
idx = tf.random.shuffle(tf.range(batch_size))
# Define lam for each batch here
lam = np.random.beta(alpha, alpha)
mixed_x = lam * batch_x + (1 - lam) * tf.gather(batch_x, idx)
mixed_y = lam * batch_y + (1 - lam) * tf.gather(batch_y, idx)
return mixed_x, mixed_y
```
%% Cell type:code id: tags:
```
# Apply Mixup to the training data
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(32)
train_dataset = train_dataset.map(lambda x, y: mixup_batch(x, y, alpha=0.1))
# Prepare the validation and test datasets
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(32)
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)
```
%% Cell type:markdown id: tags:
# Train model on poisoned data and check perfomance on clean test data
%% Cell type:code id: tags:
```
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.models import Sequential
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(32, 32, 3)))
model.add(BatchNormalization())
model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(2, 2))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D(2, 2))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(512, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))
# Compile the model
adam = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])
```
%% Cell type:code id: tags:
```
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
checkpoint = ModelCheckpoint("./model1.h5", monitor='val_acc', verbose=1, save_best_only=True, mode='max')
early_stopping = EarlyStopping(monitor = 'val_loss',
min_delta = 0,
patience = 3,
verbose = 1,
restore_best_weights = True
)
reduce_learningrate = ReduceLROnPlateau(monitor = 'val_loss',
factor = 0.2,
patience = 3,
verbose = 1,
min_delta = 0.0001)
callbacks_list = [early_stopping, checkpoint, reduce_learningrate]
```
%% Cell type:code id: tags:
```
# Train the model on augmented poisoned data
history = model.fit(train_dataset, epochs=50, validation_data=val_dataset, callbacks = callbacks_list)
# Evaluate on clean data
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Clean test data accuracy: {accuracy}")
```
%% Cell type:markdown id: tags:
# Plot results
%% Cell type:code id: tags:
```
# Plotting training and validation accuracy
plt.figure(figsize=(8, 4))
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
```
%% Cell type:code id: tags:
```
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)
conf_matrix = confusion_matrix(y_true, y_pred_classes)
class_report = classification_report(y_true, y_pred_classes)
# Printing the classification report
print(classification_report(y_true, y_pred_classes))
cls = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
# Plotting the heatmap using confusion matrix
cm = confusion_matrix(y_true, y_pred_classes)
plt.figure(figsize = (8, 5))
sns.heatmap(cm, annot = True, fmt = '.0f', xticklabels = cls, yticklabels = cls)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()
```
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment