In [14]:
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
In [15]:
datamart_rfmt = pd.read_csv("data/chapter_4/datamart_rfmt.csv")
In [16]:
datamart_rfmt_log = np.log(datamart_rfmt)
In [22]:
scaler = StandardScaler()
datamart_rfmt_normalized = scaler.fit_transform(datamart_rfmt_log)
datamart_rfmt_normalized = pd.DataFrame(data = datamart_rfmt_normalized, index=datamart_rfmt.index, columns=datamart_rfmt.columns)
In [18]:
from sklearn.cluster import KMeans
In [19]:
label = {}
for k in range(1,11):
    kmeans = KMeans(n_clusters = k, random_state = 1).fit(datamart_rfmt_normalized)
    label[k] = kmeans.labels_
In [21]:
datamart_rfmt_normalized.to_csv("data/clustree/datamart_rfmt_normalized.csv")
pd.DataFrame(label).to_csv("data/clustree/datamart_rfmt_label.csv")