この記事は、Elixir Advent Calendar 2024 シリーズ4 の8日目です
【本コラムは、5分で読め、2分で試せます】
piacere です、ご覧いただいてありがとございます
pandasのfillnaは、AI・LLMの前処理における「欠損値置換処理」を実現します
これにより、数値しか行列処理では扱えないAI・LLMに対し、数値では無いNaN(Elixirだとnilに相当)を数値化することで、行列処理を可能とし、傾向分析を可能とします
maplist = [
%{"c1" => "v1", "c2" => 2, "c3" => 3},
%{"c1" => "v2", "c2" => nil, "c3" => 6},
%{"c1" => "v3", "c2" => 5, "c3" => nil}
]
fill = 0
結果:[
%{"c1" => "v1", "c2" => 2, "c3" => 3},
%{"c1" => "v2", "c2" => 0, "c3" => 6},
%{"c1" => "v3", "c2" => 5, "c3" => 0}
]
これを実装すると、こんなコードになります
maplist = [
%{"c1" => "v1", "c2" => 2, "c3" => 3},
%{"c1" => "v2", "c2" => nil, "c3" => 6},
%{"c1" => "v3", "c2" => 5, "c3" => nil}
]
keys = maplist |> List.first |> Map.keys
fill = 0
keys
|> Enum.reduce(maplist, fn key, acc ->
acc
|> Enum.map(& Map.update!(&1, key, fn v ->
if v == nil do fill else v end
end))
end)
結果:[
%{"c1" => "v1", "c2" => 2, "c3" => 3},
%{"c1" => "v2", "c2" => 0, "c3" => 6},
%{"c1" => "v3", "c2" => 5, "c3" => 0}
]
任意の列指定もできます
maplist = [
%{"c1" => "v1", "c2" => 2, "c3" => 3},
%{"c1" => "v2", "c2" => nil, "c3" => 6},
%{"c1" => "v3", "c2" => 5, "c3" => nil}
]
keys = ["c1", "c3"]
fill = 0
keys
|> Enum.reduce(maplist, fn key, acc ->
acc
|> Enum.map(& Map.update!(&1, key, fn v ->
if v == nil do fill else v end
end))
end)
結果:[
%{"c1" => "v1", "c2" => 2, "c3" => 3},
%{"c1" => "v2", "c2" => nil, "c3" => 6},
%{"c1" => "v3", "c2" => 5, "c3" => 0}
]