(1) the relationship between qualitative and quantitative measures of dissimilarity between observations from differing tasks
(2) negative interference in multitask/transfer learning due to visual dissimilarities between observations from differing source tasks
(3) why our proposed approach using disentangled latent representations for a previous project reduced negative interference in multitask learning over a competitive baseline
(4) 1, 2, and 3 in realistic multitask learning settings (i.e. using the original and negative interference source task sets described below) instead of the contrived Ms. Pacman with visual transformations setting.
These four tasks are visually similar and are intended for productive multitask learning. A frame from each game is shown in a section below.
We replace Space Invaders with Riverraid, which is visually dissimilar to the other three tasks. A frame from each game is shown in a section below. We visualize this below through multiple data visualization methods.
Our previous project explored the use of disentangled latent representations in order to reduce negative interference in multitask learning due to visual dissimilarities across source tasks.
Here are results of the approach proposed in the project and a baseline, Actor-Mimic, for multiple values of \beta, the parameter that controls the degree of disentanglement, on the negative interference induced source tasks:
import matplotlib.image as mpimg
plt.figure(figsize=(22, 22))
plt.imshow(plt.imread('nb_figs/287_proj_mt_perf.png'))
plt.axis('off')
plt.show()
We see that the use of disentangled latent representations consistently improved multitask performance.
from __future__ import print_function
import time
import os
import numpy as np
import pandas as pd
import cv2
from sklearn.datasets import fetch_openml
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
%matplotlib inline
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import seaborn as sns
# This loads and returns image data.
def load_data(data_path, is_color=True):
data = [cv2.imread(os.path.join(data_path, f), is_color) for f in os.listdir(data_path) if os.path.isfile(os.path.join(data_path, f))]
data = np.array(data)
img_shape = data[0].shape
return data
N_total = 4000
N_indiv = 1000
datas = []
data_paths = ['frames/beamrider/', 'frames/demonattack/', 'frames/phoenix/', 'frames/spaceinvaders/']
game_names = ['Beam Rider', 'Demon Attack', 'Phoenix', 'Space Invaders']
# Load data and apply same transformations as visual interference experiment does.
for data_path in data_paths:
data = np.random.permutation(load_data(data_path))[:N_indiv]
datas.append(data)
print('Random frame from each game\n')
for data, game_name in zip(datas, game_names):
print(game_name)
plt.imshow(np.flip(data[np.random.randint(N_indiv)], axis=-1))
plt.axis('off')
plt.show()
from matplotlib.colors import LinearSegmentedColormap
n_tasks = 4
n_channels = datas[0].shape[3]
hist_data = [[[None, None] for j in range(n_channels)] for k in range(n_tasks)]
fig, axes = plt.subplots(nrows=n_tasks, ncols=n_channels, sharey='row', figsize=(20, 15))
cmaps = ['Blues', 'Greens', 'Reds']
colors_red = [(0.1, 0, 0), (1, 0, 0)]
colors_green = [(0, 0.1, 0), (0, 1, 0)]
colors_blue = [(0, 0, 0.1), (0, 0, 1)]
colors = [colors_blue, colors_green, colors_red]
n_bin = 255
for task_num in range(n_tasks):
for channel_num in range(n_channels):
hist_data[task_num][channel_num] = np.unique(datas[task_num][:, :, :, channel_num], return_counts=True)
cm = LinearSegmentedColormap.from_list('a', colors[channel_num], N=n_bin)
n, bins, patches = axes[task_num, channel_num].hist(datas[task_num][:, :, :, channel_num].ravel())
bin_centers = 0.5 * (bins[:-1] + bins[1:])
# scale values to interval [0,1]
col = bin_centers - min(bin_centers)
col /= max(col)
for c, p in zip(col, patches):
plt.setp(p, 'facecolor', cm(c))
if channel_num == 0:
axes[task_num, channel_num].set_ylabel(game_names[task_num], rotation=0, fontsize=25)
axes[task_num, channel_num].yaxis.set_label_coords(-0.5, 0.45)
if task_num == n_tasks - 1:
axes[task_num, channel_num].set_xlabel(cmaps[channel_num], fontsize=25)
axes[task_num, channel_num].xaxis.set_label_coords(0.5, -0.2)
data = np.vstack(datas)
data_reshaped = data.reshape(data.shape[0], -1)
X = data_reshaped.astype('float32') / 255.0
task_num_labels = np.vstack([np.full((N_indiv, 1), i) for i in range(n_tasks)])
task_num_labels = np.squeeze(task_num_labels)
print(X.shape, task_num_labels.shape)
# Load data into dataframe for easier plotting later
feat_cols = ['pixel' + str(i) for i in range(X.shape[1])]
df = pd.DataFrame(X, columns=feat_cols)
df['task_num_labels'] = task_num_labels
df['task'] = df['task_num_labels'].apply(lambda i: game_names[i])
X, task_num_labels = None, None
print('Size of the dataframe: {}'.format(df.shape))
# t-SNE with PCA input
np.random.seed(0)
rand_perm_idxes = np.random.permutation(df.shape[0])
df_subset = df.loc[rand_perm_idxes[:(n_tasks * N_indiv)], :].copy()
data_subset = df_subset[feat_cols].values
pca_n_comp = 50
pca_n = PCA(n_components=pca_n_comp)
pca_result_n = pca_n.fit_transform(data_subset)
df_subset['pca-one'] = pca_result_n[:, 0]
df_subset['pca-two'] = pca_result_n[:, 1]
# df_subset['pca-three'] = pca_result_n[:, 2]
cum_var_explained_1 = []
for i in np.linspace(1, pca_n_comp, pca_n_comp, dtype=np.uint8):
print('Cumulative explained variation for {} principal components: {}'.format(i, np.sum(pca_n.explained_variance_ratio_[:i])))
cum_var_explained_1.append(np.sum(pca_n.explained_variance_ratio_[:i]))
sns.lineplot(x=np.linspace(1, pca_n_comp, pca_n_comp, dtype=np.uint8), y=cum_var_explained_1)
plt.title('Cumulative variance explained by principal components for original source tasks')
plt.xlabel('# principal components')
plt.ylabel('Cumulative proportion of variance explained')
plt.show()
time_start = time.time()
tsne = TSNE(n_components=2, verbose=0, perplexity=40, n_iter=1000, learning_rate=200.0, n_jobs=-1)
tsne_pca_results = tsne.fit_transform(pca_result_n)
print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
df_subset['tsne-pca{}-one'.format(pca_n_comp)] = tsne_pca_results[:, 0]
df_subset['tsne-pca{}-two'.format(pca_n_comp)] = tsne_pca_results[:, 1]
plt.figure(figsize=(16,8))
ax1 = plt.subplot(1, 2, 1)
sns.scatterplot(
x="pca-one", y="pca-two",
hue="task",
palette=sns.color_palette("husl", n_tasks),
data=df_subset,
legend="full",
alpha=0.8,
ax=ax1
)
plt.title('PCA')
# plt.legend([1, 2, 3, 4], title='transform', loc='upper right')
ax3 = plt.subplot(1, 2, 2)
sns.scatterplot(
x='tsne-pca{}-one'.format(pca_n_comp), y='tsne-pca{}-two'.format(pca_n_comp),
hue="task",
palette=sns.color_palette("husl", n_tasks),
data=df_subset,
legend="full",
alpha=0.8,
ax=ax3
)
plt.title('t-SNE with PCA input')
# plt.legend([1, 2, 3, 4], title='transform', loc='upper right')
plt.show()
N_total = 4000
N_indiv = 1000
datas = []
data_paths = ['frames/beamrider/', 'frames/demonattack/', 'frames/phoenix/', 'frames/riverraid/']
game_names = ['Beam Rider', 'Demon Attack', 'Phoenix', 'Riverraid']
# Load data and apply same transformations as visual interference experiment does.
for data_path in data_paths:
data = np.random.permutation(load_data(data_path))[:N_indiv]
datas.append(data)
import matplotlib.image as mpimg
print('Random frame from each game\n')
for data, game_name in zip(datas, game_names):
print(game_name)
plt.imshow(np.flip(data[np.random.randint(N_indiv)], axis=-1))
plt.axis('off')
plt.show()
from matplotlib.colors import LinearSegmentedColormap
n_tasks = 4
n_channels = datas[0].shape[3]
hist_data = [[[None, None] for j in range(n_channels)] for k in range(n_tasks)]
fig, axes = plt.subplots(nrows=n_tasks, ncols=n_channels, sharey='row', figsize=(20, 15))
cmaps = ['Blues', 'Greens', 'Reds']
colors_red = [(0.1, 0, 0), (1, 0, 0)]
colors_green = [(0, 0.1, 0), (0, 1, 0)]
colors_blue = [(0, 0, 0.1), (0, 0, 1)]
colors = [colors_blue, colors_green, colors_red]
n_bin = 255
for task_num in range(n_tasks):
for channel_num in range(n_channels):
hist_data[task_num][channel_num] = np.unique(datas[task_num][:, :, :, channel_num], return_counts=True)
cm = LinearSegmentedColormap.from_list('a', colors[channel_num], N=n_bin)
n, bins, patches = axes[task_num, channel_num].hist(datas[task_num][:, :, :, channel_num].ravel())
bin_centers = 0.5 * (bins[:-1] + bins[1:])
# scale values to interval [0,1]
col = bin_centers - min(bin_centers)
col /= max(col)
for c, p in zip(col, patches):
plt.setp(p, 'facecolor', cm(c))
if channel_num == 0:
axes[task_num, channel_num].set_ylabel(game_names[task_num], rotation=0, fontsize=25)
axes[task_num, channel_num].yaxis.set_label_coords(-0.5, 0.45)
if task_num == n_tasks - 1:
axes[task_num, channel_num].set_xlabel(cmaps[channel_num], fontsize=25)
axes[task_num, channel_num].xaxis.set_label_coords(0.5, -0.2)
Riverraid differs from the other three source tasks here much more than Space Invaders does.
data = np.vstack(datas)
data_reshaped = data.reshape(data.shape[0], -1)
X = data_reshaped.astype('float32') / 255.0
task_num_labels = np.vstack([np.full((N_indiv, 1), i) for i in range(n_tasks)])
task_num_labels = np.squeeze(task_num_labels)
print(X.shape, task_num_labels.shape)
# Load data into dataframe for easier plotting later
feat_cols = ['pixel' + str(i) for i in range(X.shape[1])]
df = pd.DataFrame(X, columns=feat_cols)
df['task_num_labels'] = task_num_labels
df['task'] = df['task_num_labels'].apply(lambda i: game_names[i])
X, task_num_labels = None, None
print('Size of the dataframe: {}'.format(df.shape))
# t-SNE with PCA input
np.random.seed(0)
rand_perm_idxes = np.random.permutation(df.shape[0])
df_subset = df.loc[rand_perm_idxes[:(n_tasks * N_indiv)], :].copy()
data_subset = df_subset[feat_cols].values
pca_n_comp = 50
pca_n = PCA(n_components=pca_n_comp)
pca_result_n = pca_n.fit_transform(data_subset)
df_subset['pca-one'] = pca_result_n[:, 0]
df_subset['pca-two'] = pca_result_n[:, 1]
# df_subset['pca-three'] = pca_result_n[:, 2]
cum_var_explained_2 = []
for i in np.linspace(1, pca_n_comp, pca_n_comp, dtype=np.uint8):
print('Cumulative explained variation for {} principal components: {}'.format(i, np.sum(pca_n.explained_variance_ratio_[:i])))
cum_var_explained_2.append(np.sum(pca_n.explained_variance_ratio_[:i]))
sns.lineplot(x=np.linspace(1, pca_n_comp, pca_n_comp, dtype=np.uint8), y=cum_var_explained_2)
plt.title('Cumulative variance explained by principal components for source tasks with induced negative interference')
plt.xlabel('# principal components')
plt.ylabel('Cumulative proportion of variance explained')
plt.show()
time_start = time.time()
tsne = TSNE(n_components=2, verbose=0, perplexity=40, n_iter=1000, learning_rate=200.0, n_jobs=-1)
tsne_pca_results = tsne.fit_transform(pca_result_n)
print('t-SNE done! Time elapsed: {} seconds'.format(time.time()-time_start))
df_subset['tsne-pca{}-one'.format(pca_n_comp)] = tsne_pca_results[:, 0]
df_subset['tsne-pca{}-two'.format(pca_n_comp)] = tsne_pca_results[:, 1]
plt.figure(figsize=(16,8))
ax1 = plt.subplot(1, 2, 1)
sns.scatterplot(
x="pca-one", y="pca-two",
hue="task",
palette=sns.color_palette("husl", n_tasks),
data=df_subset,
legend="full",
alpha=0.8,
ax=ax1
)
plt.title('PCA')
# plt.legend([1, 2, 3, 4], title='transform', loc='upper right')
ax3 = plt.subplot(1, 2, 2)
sns.scatterplot(
x='tsne-pca{}-one'.format(pca_n_comp), y='tsne-pca{}-two'.format(pca_n_comp),
hue="task",
palette=sns.color_palette("husl", n_tasks),
data=df_subset,
legend="full",
alpha=0.8,
ax=ax3
)
plt.title('t-SNE with PCA input')
# plt.legend([1, 2, 3, 4], title='transform', loc='upper right')
plt.show()
Note that the data points in the t-SNE of the original source task observations are less linearly separable than those of the t-SNE of the negative interference induced source task observations. A quick way to see this is to consider the difference in the clustering of the Space Invaders and Riverraid observations.
plt.figure(figsize=(15, 8))
sns.lineplot(x=np.linspace(1, pca_n_comp, pca_n_comp, dtype=np.uint8), y=cum_var_explained_1)
sns.lineplot(x=np.linspace(1, pca_n_comp, pca_n_comp, dtype=np.uint8), y=cum_var_explained_2)
cum_var_explained_diff = [cum_var_2 - cum_var_1 for cum_var_1, cum_var_2 in zip(cum_var_explained_1, cum_var_explained_2)]
sns.lineplot(x=np.linspace(1, pca_n_comp, pca_n_comp, dtype=np.uint8), y=cum_var_explained_diff)
plt.title('Cumulative variance explained by principal components for original and negative interference source task sets', fontsize=16)
plt.xlabel('# principal components')
plt.ylabel('Cumulative proportion of variance explained')
plt.legend(['Original set', 'Negative interference set', 'Difference'])
plt.xticks(np.linspace(1, pca_n_comp, pca_n_comp, dtype=np.uint8))
plt.yticks(np.linspace(0, 1, 21))
plt.show()
Fewer PCA components explain more variance for the negative interference induced source tasks than for the original source tasks because the observations between the former are more easily separable than the observations between the latter.
Explicitly, this is because Space Invaders was replaced with Riverraid, which both eliminated a task with observations relatively similar to the three remaining source task observations and added a task with observations relatively dissimilar to the three remaining source task observations. Each of these changes alone made it easier to separate the data, so both together made it much easier.