from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
datamart_rfmt = pd.read_csv("data/chapter_4/datamart_rfmt.csv")
datamart_rfmt_log = np.log(datamart_rfmt)
scaler = StandardScaler()
datamart_rfmt_normalized = scaler.fit_transform(datamart_rfmt_log)
datamart_rfmt_normalized = pd.DataFrame(data = datamart_rfmt_normalized, index=datamart_rfmt.index, columns=datamart_rfmt.columns)
from sklearn.cluster import KMeans
label = {}
for k in range(1,11):
kmeans = KMeans(n_clusters = k, random_state = 1).fit(datamart_rfmt_normalized)
label[k] = kmeans.labels_
datamart_rfmt_normalized.to_csv("data/clustree/datamart_rfmt_normalized.csv")
pd.DataFrame(label).to_csv("data/clustree/datamart_rfmt_label.csv")