import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.utils import shuffle
import matplotlib.pyplot as plt
from aggmap import AggMap
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() #load data
_, w, h = x_train.shape
orignal_cols = ['p-%s' % str((i+1)).zfill(len(str(w*h))) for i in range(w*h)]
x_train_df = pd.DataFrame(x_train.reshape(x_train.shape[0], w*h), columns=orignal_cols)
x_test_df = pd.DataFrame(x_test.reshape(x_test.shape[0], w*h), columns=orignal_cols)
ax = plt.imshow(x_train_df.iloc[0].values.reshape(w,h))
ax = plt.imshow(x_test_df.iloc[0].values.reshape(w,h))
shuffled_cols = shuffle(orignal_cols, random_state=111)
x_train_df_shuffled = x_train_df[shuffled_cols]
x_test_df_shuffled = x_test_df[shuffled_cols]
ax = plt.imshow(x_train_df_shuffled.iloc[0].values.reshape(w,h))
ax = plt.imshow(x_test_df_shuffled.iloc[0].values.reshape(w,h))
mp = AggMap(x_train_df_shuffled, metric='correlation')
mp = mp.fit(cluster_channels=1, var_thr=0, verbose=0)
2021-10-04 02:32:45,967 - INFO - [bidd-aggmap] - Calculating distance ... 2021-10-04 02:32:46,004 - INFO - [bidd-aggmap] - the number of process is 16
100%|#########################################################################| 306936/306936 [00:54<00:00, 5664.44it/s] 100%|######################################################################| 306936/306936 [00:00<00:00, 1966620.01it/s] 100%|################################################################################| 784/784 [00:02<00:00, 362.02it/s]
2021-10-04 02:33:42,732 - INFO - [bidd-aggmap] - applying hierarchical clustering to obtain group information ... 2021-10-04 02:33:46,554 - INFO - [bidd-aggmap] - Applying grid feature map(assignment), this may take several minutes(1~30 min) 2021-10-04 02:33:47,175 - INFO - [bidd-aggmap] - Finished
x_train_restructured = mp.batch_transform(x_train_df_shuffled.values)
x_test_restructured = mp.batch_transform(x_test_df_shuffled.values)
100%|###########################################################################| 60000/60000 [00:29<00:00, 2043.71it/s] 100%|###########################################################################| 10000/10000 [00:04<00:00, 2351.53it/s]
ax = plt.imshow(x_train_restructured[0].reshape(*mp.fmap_shape))
ax = plt.imshow(x_test_restructured[0].reshape(*mp.fmap_shape))