# %%
import numpy as np
import matplotlib.pyplot as plt
# %%
import pandas as pd
# %%
def accyracy(y_true, y_pred):
"""
Calculate accuracy of the model
"""
correct_conter = 0
for yt, yp in zip(y_true, y_pred):
if (yt == yp):
correct_conter += 1
return correct_conter / len(y_true)
# %%
l1 = [0, 1, 1, 1, 0, 0, 0, 1]
l2 = [0, 1, 0, 1, 0, 1, 0, 0]
# %%
print(accyracy(l1, l2))
# %%
def true_positive(y_true, y_pred):
tp = 0
for yt, yp in zip(y_true, y_pred):
if (yt == 1 and yp == 1):
tp += 1
return tp
def true_negative(y_true, y_pred):
tn = 0
for yt, yp in zip(y_true, y_pred):
if (yt == 0 and yp == 0):
tn += 1
return tn
def false_positive(y_true, y_pred):
fp = 0
for yt, yp in zip(y_true, y_pred):
if (yt == 0 and yp == 1):
fp += 1
return fp
def false_negative(y_true, y_pred):
fn = 0
for yt, yp in zip(y_true, y_pred):
if (yt == 1 and yp == 0):
fn += 1
return fn
# %%
print(true_positive(l1, l2))
print(true_negative(l1, l2))
print(false_positive(l1, l2))
print(false_negative(l1, l2))
# %%
def accyracy_v2(y_true, y_pred):
tp = true_positive(y_true, y_pred)
tn = true_negative(y_true, y_pred)
fp = false_positive(y_true, y_pred)
fn = false_negative(y_true, y_pred)
return (tp + tn) / (tp + tn + fp + fn)
print(accyracy_v2(l1, l2))
# %%
def precision(y_true, y_pred):
tp = true_positive(y_true, y_pred)
fp = false_positive(y_true, y_pred)
if (tp + fp) == 0:
return 0
return tp / (tp + fp)
print(precision(l1, l2))
# %%
def recall(y_true, y_pred):
tp = true_positive(y_true, y_pred)
fn = false_negative(y_true, y_pred)
return tp / (tp + fn)
print(recall(l1,l2))
# %%
y_true = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 1, 0]
y_pred = [0.02638412, 0.11114267, 0.31620708,
0.0490937, 0.0191491, 0.17554844,
0.15952202, 0.03819563, 0.11639273,
0.079377, 0.08584789, 0.39095342,
0.27259048, 0.03447096, 0.04644807,
0.03543574, 0.18521942, 0.05934905,
0.61977213, 0.33056815]
# %%
thresholds = [0.0490937 , 0.05934905, 0.079377,
0.08584789, 0.11114267, 0.11639273,
0.15952202, 0.17554844, 0.18521942,
0.27259048, 0.31620708, 0.33056815,
0.39095342, 0.61977213]
# %%
precisions = []
recalls = []
for i in thresholds:
tmp_prediction = [1 if x > i else 0 for x in y_pred]
p = precision(y_true, tmp_prediction)
r = recall(y_true, tmp_prediction)
precisions.append(p)
recalls.append(r)
# %%
plt.figure(figsize=(7,7))
plt.plot(recalls, precisions)
plt.xlabel("Recall", fontsize=14)
plt.ylabel("Precision", fontsize=14)
plt.xlim(0, 1)
plt.ylim(0,1)
plt.title("Precision-Recall Curve", fontsize=14)
plt.show()
# %%
def f1_score(y_true, y_pred):
p = precision(y_true, y_pred)
r = recall(y_true, y_pred)
if (p + r) == 0:
return 0
return 2 * p * r / (p + r)
# %%
y_true = [0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 1, 0]
y_pred = [0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1, 0, 0, 0, 0, 0, 0, 0, 1, 0]
# %%
print(f1_score(y_true, y_pred))
# %%
from sklearn import metrics
# %%
metrics.f1_score(y_true, y_pred )
# %%
def tpr(y_true, y_pred):
return recall(y_true, y_pred)
# %%
def fpr(y_true, y_pred):
fp = false_positive(y_true, y_pred)
tn = true_negative(y_true, y_pred)
return fp / (fp + tn)
# %%
y_true = [0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1]
# %%
y_pred =[0.1, 0.3, 0.2, 0.6, 0.8, 0.05, 0.9, 0.5, 0.3, 0.66, 0.3,
0.2, 0.85, 0.15, 0.99]
# y_pred = y_true
# %%
thresholds = [0, 0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.85, 0.9, 0.99, 1.0]
# %%
tpr_list = []
fpr_list = []
for thresh in thresholds:
tmp_prediction = [1 if x >= thresh else 0 for x in y_pred]
tmp_tpr = tpr(y_true, tmp_prediction)
tmp_fpr = fpr(y_true, tmp_prediction)
tpr_list.append(tmp_tpr)
fpr_list.append(tmp_fpr)
plt.figure(figsize=(7,7))
plt.plot(fpr_list, tpr_list)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.show()
# %%
df = pd.DataFrame({"thresholds": thresholds, "tpr": tpr_list, "fpr": fpr_list})
print(df)
# %%
metrics.roc_auc_score(y_true, y_pred)
# %%
# 信用性と擬陽性の数を格納するリスト
tp_list = []
fp_list = []
# 正解
y_true = [0, 0, 0, 0, 1, 0, 1,
0, 0, 1, 0, 1, 0, 0, 1]
# 予測確率
y_pred = [0.1, 0.3, 0.2, 0.6, 0.8, 0.05,
0.9, 0.5, 0.3, 0.66, 0.3, 0.2,
0.85, 0.15, 0.99]
# 閾値のリスト
thresholds = [0, 0.1, 0.2, 0.3, 0.4, 0.5,
0.6, 0.7, 0.8, 0.85, 0.9, 0.99, 1.0]
# 閾値ごとにtpとfpを計算してリストに格納
for thresh in thresholds:
tmp_prediction = [1 if x >= thresh else 0 for x in y_pred]
# 真陽性を計算
tmp_tp = true_positive(y_true, tmp_prediction)
# 偽陽性を計算
tmp_fp = false_positive(y_true, tmp_prediction)
# 真陽性と偽陽性をリストに格納
tp_list.append(tmp_tp)
fp_list.append(tmp_fp)
# %%
df = pd.DataFrame({"thresholds": thresholds, "tp": tp_list, "fp": fp_list})
print(df)
# %%
def log_loss(y_true, y_proba):
epsilon = 1e-15
loss = []
for yt, yp in zip(y_true, y_proba):
yp = np.clip(yp, epsilon, 1-epsilon)
tmp_loss = -1 * (yt * np.log(yp) + (1-yt) * np.log(1-yp))
loss.append(tmp_loss)
return np.mean(loss)
# %%
# 正解
y_true = [0, 0, 0, 0, 1, 0, 1,
0, 0, 1, 0, 1, 0, 0, 1]
y_proba = [0.1, 0.3, 0.2, 0.6, 0.8, 0.05,
0.9, 0.5, 0.3, 0.66, 0.3, 0.2,
0.85, 0.15, 0.99]
print(log_loss(y_true, y_proba))
# %%
metrics.log_loss(y_true, y_proba)
# %%
def macro_prediction(y_true, y_pred):
num_classes =len(np.unique(y_true))
precision=0
for class_ in range(num_classes):
temp_true = [1 if p == class_ else 0 for p in y_true]
temp_pred = [1 if p == class_ else 0 for p in y_pred]
tp = true_positive(temp_true, temp_pred)
fp = false_positive(temp_true, temp_pred)
temp_precision = tp/(tp+fp)
precision += temp_precision
return precision / num_classes
# %%
def micro_precision(y_true, y_pred):
num_classes=len(np.unique(y_true))
tp=0
fp=0
for class_ in range(num_classes):
temp_true = [1 if p == class_ else 0 for p in y_true]
temp_pred = [1 if p == class_ else 0 for p in y_pred]
tp += true_positive(temp_true, temp_pred)
fp += false_positive(temp_true, temp_pred)
return tp/(tp+fp)
# %%
from collections import Counter
# %%
def weighted_precision(y_true, y_pred):
num_classes = len(np.unique(y_true))
class_counts = Counter(y_true)
precision = 0
for class_ in range(num_classes):
temp_true = [1 if p == class_ else 0 for p in y_true]
temp_pred = [1 if p == class_ else 0 for p in y_pred]
tp = true_positive(temp_true, temp_pred)
fp = false_positive(temp_true, temp_pred)
temp_precision = tp/(tp+fp)
precision += temp_precision * class_counts[class_]
return precision / len(y_true)
# %%
y_true=[0,1,2,0,1,2,0,2,2]
y_pred=[0,2,1,0,2,1,0,0,2]
# %%
macro_prediction(y_true,y_pred)
# %%
metrics.precision_score(y_true,y_pred,average="macro")
# %%
micro_precision(y_true,y_pred)
# %%
metrics.precision_score(y_true,y_pred,average="micro")
# %%
weighted_precision(y_true,y_pred)
# %%
metrics.precision_score(y_true,y_pred,average="weighted")
# %%
def weighted_f1(y_true, y_pred):
num_classes = len(np.unique(y_true))
class_counts = Counter(y_true)
f1=0
for class_ in range(num_classes):
temp_true = [1 if p == class_ else 0 for p in y_true]
temp_pred = [1 if p == class_ else 0 for p in y_pred]
p=precision(temp_true, temp_pred)
r = recall(temp_true, temp_pred)
if (p + r != 0):
temp_f1 = 2*p*r/(p+r)
else :
temp_f1 = 0
weighted_f1 = temp_f1 * class_counts[class_]
f1 += weighted_f1
overall_f1 = f1 / len(y_true)
return overall_f1
# %%
y_true=[0,1,2,0,1,2,0,2,2]
y_pred=[0,2,1,0,2,1,0,0,2]
print(weighted_f1(y_true,y_pred))
print(metrics.f1_score(y_true,y_pred,average="weighted"))
# %%
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
# %%
y_true=[0,1,2,0,1,2,0,2,2]
y_pred=[0,2,1,0,2,1,0,0,2]
# %%
cm = metrics.confusion_matrix(y_true, y_pred)
# %%
plt.figure(figsize=(10,10))
# %%
cmap = sns.cubehelix_palette(30, hue=0.05, rot =0,
light=0.9, dark=0, as_cmap=True)
sns.set(font_scale=2.5)
sns.heatmap(cm, annot=True, cmap=cmap, cbar=False)
plt.xlabel('Actual Labels')
plt.ylabel('Predicted Labels')
# %%
def pk(y_true, y_pred, k):
"""
This function calculates precision at k
for a single sample
:param y_true: list of values, actual classes
:param y_pred: list of values, predicted classes
:param k: the value for k
:return: precision at a given value k
"""
# if k is 0, return 0. we should never have this
# as k is always >= 1
if k == 0:
return 0
# we are interested only in top-k predictions
y_pred = y_pred[:k]
# convert predictions to set
pred_set = set(y_pred)
# convert actual values to set
true_set = set(y_true)
# find common values
common_values = pred_set.intersection(true_set)
# return length of common values over k
return len(common_values) / len(y_pred[:k])
# %%
def apk(y_true, y_pred, k):
"""
This function calculates average precision at k
for a single sample
:param y_true: list of values, actual classes
:param y_pred: list of values, predicted classes
:param k: the value for k
:return: average precision at a given value k
"""
pk_values = []
for i in range(1, k+1):
pk_values.append(pk(y_true, y_pred, i))
if len(pk_values) == 0:
return 0
return sum(pk_values) / len(pk_values)
# %%
y_true = [
[1, 2, 3],
[0, 2],
[1],
[2, 3],
[1, 0],
[]
]
y_pred = [
[0, 1, 2],
[1],
[0, 2, 3],
[2, 3, 4, 0],
[0, 1, 2],
[0]
]
# %%
for i in range(len(y_true)):
for j in range(1, 4):
print(
f"""
y_true = {y_true[i]}
y_pred = {y_pred[i]}
AP@{j} = {apk(y_true[i], y_pred[i], k=j)}
"""
)
# %%
def mapk(y_true, y_pred, k):
apk_values = []
for i in range(len(y_true)):
apk_values.append(apk(y_true[i], y_pred[i], k=k))
return sum(apk_values) / len(apk_values)
# %%
y_true
# %%
y_pred
# %%
mapk(y_true, y_pred, k=1)
# %%
mapk(y_true, y_pred, k=2)
# %%
mapk(y_true, y_pred, k=3)
# %%
mapk(y_true, y_pred, k=4)
# %%
def mean_absolute_error(y_true, y_pred):
return np.mean(np.abs(y_true - y_pred))
# %%
def mean_squared_error(y_true, y_pred):
return np.mean(np.square(y_true - y_pred))
# %%
def mean_squared_log_error(y_true, y_pred):
return np.mean(np.square(np.log(y_true + 1) - np.log(y_pred + 1)))
# %%
def mean_percentage_error(y_true, y_pred):
return np.mean((y_true - y_pred) / y_true) * 100
# %%
def mean_abs_percentage_error(y_true, Y_pred):
return np.mean(np.abs(y_true - y_pred)/y_true)* 100
# %%
def r2(y_true, y_pred):
mean_true_value = np.mean(y_true)
numerator = 0
denominator = 0
for yt, yp in zip(y_true, y_pred):
numerator += (yt - yp) ** 2
denominator += (yt - mean_true_value) ** 2
return 1 - (numerator / denominator)
# %%
Register as a new user and use Qiita more conveniently
- You get articles that match your needs
- You can efficiently read back useful information
- You can use dark theme