はじめに
- @python_kenichi さんのPythonで自然言語処理100本ノック 2020を解いたついでに死ぬほど詳しく解説を書いていく[第1章 準備運動]を拝見しまして、私はぜひElixirでやってみようとおもいました
-
Elixirは
Elixir 1.10.3 (compiled with Erlang/OTP 23)
を使いました
0. インストールとプロジェクトの作成
- まずはElixirをインストールしましょう
- 手前味噌ですが、Enjoy Elixir #001 -- mix new, iex -S mix, mix format などを参考にしてください
- インストールができましたら以下のコマンドでプロジェクトを作ります
$ mix new nlp100
$ cd nlp100
1. ソースコードを書く
lib/warm_up.ex
defmodule WarmUp do
require Integer
@moduledoc """
https://nlp100.github.io/ja/ch01.html
"""
@doc ~S"""
00. 文字列の逆順
https://nlp100.github.io/ja/ch01.html#00-%E6%96%87%E5%AD%97%E5%88%97%E3%81%AE%E9%80%86%E9%A0%86
## Examples
iex> WarmUp.run00
"desserts"
"""
def run00(str \\ "stressed"), do: str |> String.reverse()
@doc ~S"""
01. 「パタトクカシーー」
https://nlp100.github.io/ja/ch01.html#01-%E3%83%91%E3%82%BF%E3%83%88%E3%82%AF%E3%82%AB%E3%82%B7%E3%83%BC%E3%83%BC
## Examples
iex> WarmUp.run01
"パトカー"
"""
def run01(str \\ "パタトクカシーー") do
str
|> String.codepoints()
|> Enum.with_index(1)
|> Enum.filter(fn {_, idx} -> Integer.is_odd(idx) end)
|> Enum.map(fn {c, _} -> c end)
|> Enum.join()
end
@doc ~S"""
02. 「パトカー」+「タクシー」=「パタトクカシーー」
https://nlp100.github.io/ja/ch01.html#02-%E3%83%91%E3%83%88%E3%82%AB%E3%83%BC%E3%82%BF%E3%82%AF%E3%82%B7%E3%83%BC%E3%83%91%E3%82%BF%E3%83%88%E3%82%AF%E3%82%AB%E3%82%B7%E3%83%BC%E3%83%BC
## Examples
iex> WarmUp.run02
"パタトクカシーー"
"""
def run02(str1 \\ "パトカー", str2 \\ "タクシー") do
Enum.zip(String.codepoints(str1), String.codepoints(str2))
|> Enum.map(fn {c1, c2} -> "#{c1}#{c2}" end)
|> Enum.join()
end
@doc ~S"""
03. 円周率
https://nlp100.github.io/ja/ch01.html#03-%E5%86%86%E5%91%A8%E7%8E%87
## Examples
iex> WarmUp.run03
[3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9]
"""
def run03(
str \\ "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics."
) do
str
|> String.replace(".", "")
|> String.replace(",", "")
|> String.split(" ")
|> Enum.map(&String.length/1)
end
@doc ~S"""
04. 元素記号
https://nlp100.github.io/ja/ch01.html#04-%E5%85%83%E7%B4%A0%E8%A8%98%E5%8F%B7
## Examples
iex> WarmUp.run04
%{"Al" => 13, "Ar" => 18, "B" => 5, "Be" => 4, "C" => 6, "Ca" => 20, "Cl" => 17, "F" => 9, "H" => 1, "He" => 2, "K" => 19, "Li" => 3, "Mi" => 12, "N" => 7, "Na" => 11, "Ne" => 10, "O" => 8, "P" => 15, "S" => 16, "Si" => 14}
iex> WarmUp.run04 |> map_size
20
"""
def run04(
str \\ "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."
) do
str
|> String.split(" ")
|> Enum.with_index(1)
|> Enum.map(&extract/1)
|> Enum.into(%{})
end
defp extract({str, idx}) when idx in [1, 5, 6, 7, 8, 9, 15, 16, 19] do
{String.slice(str, 0, 1), idx}
end
defp extract({str, idx}), do: {String.slice(str, 0, 2), idx}
@doc ~S"""
05. n-gram
https://nlp100.github.io/ja/ch01.html#05-n-gram
## Examples
iex> WarmUp.run05_character
[
["I", " ", "a", "m", " ", "a", "n", " ", "N", "L", "P", "e", "r"],
["I ", " a", "am", "m ", " a", "an", "n ", " N", "NL", "LP", "Pe", "er"],
["I a", " am", "am ", "m a", " an", "an ", "n N", " NL", "NLP", "LPe", "Per"],
["I am", " am ", "am a", "m an", " an ", "an N", "n NL", " NLP", "NLPe", "LPer"],
["I am ", " am a", "am an", "m an ", " an N", "an NL", "n NLP", " NLPe", "NLPer"],
["I am a", " am an", "am an ", "m an N", " an NL", "an NLP", "n NLPe", " NLPer"],
["I am an", " am an ", "am an N", "m an NL", " an NLP", "an NLPe", "n NLPer"],
["I am an ", " am an N", "am an NL", "m an NLP", " an NLPe", "an NLPer"],
["I am an N", " am an NL", "am an NLP", "m an NLPe", " an NLPer"],
["I am an NL", " am an NLP", "am an NLPe", "m an NLPer"],
["I am an NLP", " am an NLPe", "am an NLPer"],
["I am an NLPe", " am an NLPer"],
["I am an NLPer"]
]
"""
def run05_character(str \\ "I am an NLPer") do
1..String.length(str)
|> Enum.map(&n_gram(str, &1))
end
@doc ~S"""
05. n-gram
https://nlp100.github.io/ja/ch01.html#05-n-gram
## Examples
iex> WarmUp.run05_word
[
[["I"], ["am"], ["an"], ["NLPer"]],
[["I", "am"], ["am", "an"], ["an", "NLPer"]],
[["I", "am", "an"], ["am", "an", "NLPer"]],
[["I", "am", "an", "NLPer"]]
]
"""
def run05_word(str \\ "I am an NLPer") do
list = String.split(str, " ")
1..Enum.count(list)
|> Enum.map(&n_gram(list, &1))
end
@doc ~S"""
n-gram
## Examples
iex> WarmUp.n_gram("こちら葛飾区亀有公園前派出所", 1)
["こ", "ち", "ら", "葛", "飾", "区", "亀", "有", "公", "園", "前", "派", "出", "所"]
iex> WarmUp.n_gram("こちら葛飾区亀有公園前派出所", 2)
["こち", "ちら", "ら葛", "葛飾", "飾区", "区亀", "亀有", "有公", "公園", "園前", "前派", "派出", "出所"]
iex> WarmUp.n_gram("こちら葛飾区亀有公園前派出所", 3)
["こちら", "ちら葛", "ら葛飾", "葛飾区", "飾区亀", "区亀有", "亀有公", "有公園", "公園前", "園前派", "前派出", "派出所"]
iex> WarmUp.n_gram([1, 2, 3], 1)
[[1], [2], [3]]
iex> WarmUp.n_gram([1, 2, 3], 2)
[[1, 2], [2, 3]]
iex> WarmUp.n_gram([1, 2, 3], 3)
[[1, 2, 3]]
"""
def n_gram(str, n) when is_bitstring(str) do
for i <- 0..(String.length(str) - n), do: String.slice(str, i, n)
end
def n_gram(list, n) when is_list(list) do
for i <- 0..(Enum.count(list) - n), do: Enum.slice(list, i, n)
end
@doc ~S"""
06. 集合
https://nlp100.github.io/ja/ch01.html#06-%E9%9B%86%E5%90%88
## Examples
iex> WarmUp.run06_has_se?
false
"""
def run06_union, do: MapSet.union(run06_x(), run06_y())
def run06_intersection, do: MapSet.intersection(run06_x(), run06_y())
def run06_difference, do: MapSet.difference(run06_x(), run06_y())
def run06_has_se?, do: run06_intersection() |> Enum.any?(&(&1 == "se"))
defp run06_x, do: n_gram("paraparaparadise", 2) |> MapSet.new()
defp run06_y, do: n_gram("paragraph", 2) |> MapSet.new()
@doc ~S"""
07. テンプレートによる文生成
https://nlp100.github.io/ja/ch01.html#07-%E3%83%86%E3%83%B3%E3%83%97%E3%83%AC%E3%83%BC%E3%83%88%E3%81%AB%E3%82%88%E3%82%8B%E6%96%87%E7%94%9F%E6%88%90
## Examples
iex> WarmUp.run07
"12時の気温は22.4"
"""
def run07(x \\ 12, y \\ "気温", z \\ 22.4), do: "#{x}時の#{y}は#{z}"
@doc ~S"""
08. 暗号文
https://nlp100.github.io/ja/ch01.html#08-%E6%9A%97%E5%8F%B7%E6%96%87
## Examples
iex> WarmUp.run08
"Hello, World"
"""
def run08(str \\ "Hello, World") do
cipher(str) |> cipher()
end
def cipher(str) do
str
|> String.codepoints()
|> Enum.map(fn <<point::utf8>> -> point end)
|> Enum.map(&encrypt/1)
|> Enum.map(fn point -> List.to_string([point]) end)
|> Enum.join()
end
defp encrypt(point) when ?a <= point and point <= ?z, do: 219 - point
defp encrypt(point), do: point
@doc ~S"""
09. Typoglycemia
https://nlp100.github.io/ja/ch01.html#09-typoglycemia
## Examples
iex> WarmUp.run09 |> String.contains?("I")
true
iex> WarmUp.run09 |> String.contains?("that")
true
iex> WarmUp.run09 |> String.contains?("what")
true
iex> WarmUp.run09 |> String.contains?("was")
true
iex> WarmUp.run09 |> String.contains?("the")
true
iex> WarmUp.run09 |> String.contains?("of")
true
iex> WarmUp.run09 |> String.contains?("mind")
true
iex> WarmUp.run09 |> String.match?(~r/c[!-~]{6}t/)
true
iex> WarmUp.run09 |> String.match?(~r/p[!-~]{8}l/)
"""
def run09(
str \\ "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind ."
) do
str
|> String.split(" ")
|> Enum.map(&to_typoglycemia/1)
|> Enum.join(" ")
end
@doc ~S"""
to typoglycemia
## Examples
iex> WarmUp.to_typoglycemia("that")
"that"
iex> WarmUp.to_typoglycemia("apple") |> String.match?(~r/^a[!-~]{3}e$/)
true
iex> WarmUp.to_typoglycemia("couldn't") |> String.match?(~r/^c[!-~]{6}t$/)
true
"""
def to_typoglycemia(word), do: to_typoglycemia(word, String.length(word))
defp to_typoglycemia(word, length) when length <= 4, do: word
defp to_typoglycemia(word, _length) do
codepoints = word |> String.codepoints()
first = hd(codepoints)
intermediate = Enum.slice(codepoints, 1..-2) |> Enum.shuffle() |> Enum.join()
last = Enum.at(codepoints, -1)
"#{first}#{intermediate}#{last}"
end
end
-
## Examples
のところに書いてあるやつはDoctestと呼ばれるものでテストができます
test/nlp100_test.exs
defmodule Nlp100Test do
use ExUnit.Case
doctest Nlp100
doctest WarmUp #追加
2. 実行する
$ mix test
...............
Finished in 0.1 seconds
14 doctests, 1 test, 0 failures
$ iex -S mix
Erlang/OTP 23 [erts-11.0.2] [source] [64-bit] [smp:72:2] [ds:72:2:10] [async-threads:1] [hipe]
Interactive Elixir (1.10.3) - press Ctrl+C to exit (type h() ENTER for help)
iex> WarmUp.run00
"desserts"
iex> WarmUp.run06_has_se?
false
iex> System.halt
3. ex_doc
- ドキュメントを作りましょう!
mix.exs
defp deps do
[
{:ex_doc, "~> 0.22", only: :dev, runtime: false}
]
end
$ mix deps.get
$ mix docs
-
doc/index.html
をブラウザで開いてみましょう
- 素敵なドキュメントができあがっています!
- ドキュメントの全体はこんな感じになります