Write an autoencoder to do the fraud detection using Keras.
Write an autoencoder to do the fraud detection using Keras.
Make inputs¶
import numpy as np
np.random.seed(1) # reproducibility
import pandas as pd
df = pd.read_csv("data/creditcard.csv")
df.head()
df.shape
y = df['Class']
X = df.drop(['Time', 'Class'], axis = 1)
from sklearn.model_selection import train_test_split
# ?train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
Define and train model¶
# ?autoencoder.compile
autoencoder.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
history = autoencoder.fit(X_train, X_train, epochs = 10, batch_size = 32, validation_split=0.2)
autoencoder.save("model/creditcard_autoencoders_model.h5")
plot_acc(history.history)
plot_loss(history.history)
from keras.models import load_model
autoencoder2 = load_model("model/creditcard_autoencoders_model.h5")
autoencoder2.__class__
preds = autoencoder.predict(X_test)
for i in range(5):
# print(i)
if np.sum(np.square(preds[i] - np.array(X_test)[i]))/30 < 1:
np.sum(np.square(preds[i] - np.array(X_test)[i]))/30
else :
1
ifelse is too slow.
y_preds = np.where(np.sum(np.square(preds - np.array(X_test)), axis = 1)/30 < 1,
np.sum(np.square(preds - np.array(X_test)), axis = 1)/30, 1)
y_preds.__class__
y_preds.shape
import pyks
data = pd.DataFrame({'y': y_test, 'yhat': y_preds})
pyks.plot(data)