やること
numpy
と python list
の append
が結構速度が違うと見たので、実際に違いを計測してみる。
コード
import numpy as np
import time
import matplotlib.pyplot as plt
import pandas as pd
data_nums = np.logspace(1, 5, 5)
# [1.e+01 1.e+02 1.e+03 1.e+04 1.e+05]
data_nums = data_nums.astype("int64")
df = pd.DataFrame(
index=range(len(data_nums)), columns=["data_num", "python_list", "numpy_ndarray"]
)
df.loc[:, "data_num"] = data_nums
for data_num in data_nums:
df_index = df.index[df["data_num"] == data_num].to_list()
print(f"-" * 30)
print(f"data_num : {data_num}")
print(f"-" * 30)
# =========================================
# using python list append
# =========================================
start_time_python_list = time.perf_counter()
data_python_list = np.empty(0)
data_python_list_ = data_python_list.tolist() # numpy ndarray -> python list
for i in range(data_num): # append data
data_python_list_.append(i)
data_python_list_np = np.asarray(data_python_list_)
# python list -> numpy ndarray
# print(data_python_list_np)
end_time_python_list = time.perf_counter()
elapsed_time_python_list = end_time_python_list - start_time_python_list
print(f"python_list : {elapsed_time_python_list} [sec]")
df.at[df.index[df_index[0]], "python_list"] = elapsed_time_python_list
# =========================================
# using numpy ndarray append
# =========================================
start_time_numpy = time.perf_counter()
data_numpy_append_ = np.empty(0)
for i in range(data_num): # append data
data_numpy_append_ = np.append(data_numpy_append_, i)
# print(data_numpy_append_)
end_time_numpy = time.perf_counter()
elapsed_time_numpy = end_time_numpy - start_time_numpy
print(f"numpy_ndarray : {elapsed_time_numpy} [sec]")
df.at[df.index[df_index[0]], "numpy_ndarray"] = elapsed_time_numpy
print(f"")
print(df)
a = df.to_numpy()
# グラフ描画用
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.set_xscale("log")
ax.set_xlabel("data num")
ax.set_ylabel("elapsed time[s]")
ax.plot(a[:, 0], a[:, 1], label="python_list")
ax.scatter(a[:, 0], a[:, 1])
ax.plot(a[:, 0], a[:, 2], label="numpy_ndarray")
ax.scatter(a[:, 0], a[:, 2])
ax.legend()
plt.show()
結果
配列の要素数(data_num) | python_list [sec] | numpy_ndarray [sec] |
---|---|---|
10 | 0.000019 | 0.000081 |
100 | 0.000221 | 0.000634 |
1000 | 0.00026 | 0.007898 |
10000 | 0.001955 | 0.071762 |
100000 | 0.018196 | 2.05108 |
append
は python list
の方が速い。
numpy ndarray
は固定長で宣言されており、python list
は可変長配列であることが理由らしい。