LoginSignup
9

More than 1 year has passed since last update.

posted at

updated at

Elixirで自然言語処理100本ノック 2020を解いた[第1章 準備運動]

はじめに

0. インストールとプロジェクトの作成

$ mix new nlp100
$ cd nlp100

1. ソースコードを書く

lib/warm_up.ex
defmodule WarmUp do
  require Integer

  @moduledoc """
  https://nlp100.github.io/ja/ch01.html
  """

  @doc ~S"""
  00. 文字列の逆順

  https://nlp100.github.io/ja/ch01.html#00-%E6%96%87%E5%AD%97%E5%88%97%E3%81%AE%E9%80%86%E9%A0%86

  ## Examples

      iex> WarmUp.run00
      "desserts"

  """
  def run00(str \\ "stressed"), do: str |> String.reverse()

  @doc ~S"""
  01. 「パタトクカシーー」

  https://nlp100.github.io/ja/ch01.html#01-%E3%83%91%E3%82%BF%E3%83%88%E3%82%AF%E3%82%AB%E3%82%B7%E3%83%BC%E3%83%BC

  ## Examples

      iex> WarmUp.run01
      "パトカー"

  """
  def run01(str \\ "パタトクカシーー") do
    str
    |> String.codepoints()
    |> Enum.with_index(1)
    |> Enum.filter(fn {_, idx} -> Integer.is_odd(idx) end)
    |> Enum.map(fn {c, _} -> c end)
    |> Enum.join()
  end

  @doc ~S"""
  02. 「パトカー」+「タクシー」=「パタトクカシーー」

  https://nlp100.github.io/ja/ch01.html#02-%E3%83%91%E3%83%88%E3%82%AB%E3%83%BC%E3%82%BF%E3%82%AF%E3%82%B7%E3%83%BC%E3%83%91%E3%82%BF%E3%83%88%E3%82%AF%E3%82%AB%E3%82%B7%E3%83%BC%E3%83%BC

  ## Examples

      iex> WarmUp.run02
      "パタトクカシーー"

  """
  def run02(str1 \\ "パトカー", str2 \\ "タクシー") do
    Enum.zip(String.codepoints(str1), String.codepoints(str2))
    |> Enum.map(fn {c1, c2} -> "#{c1}#{c2}" end)
    |> Enum.join()
  end

  @doc ~S"""
  03. 円周率

  https://nlp100.github.io/ja/ch01.html#03-%E5%86%86%E5%91%A8%E7%8E%87

  ## Examples

      iex> WarmUp.run03
      [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9]

  """
  def run03(
        str \\ "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics."
      ) do
    str
    |> String.replace(".", "")
    |> String.replace(",", "")
    |> String.split(" ")
    |> Enum.map(&String.length/1)
  end

  @doc ~S"""
  04. 元素記号

  https://nlp100.github.io/ja/ch01.html#04-%E5%85%83%E7%B4%A0%E8%A8%98%E5%8F%B7

  ## Examples

      iex> WarmUp.run04
      %{"Al" => 13, "Ar" => 18, "B" => 5, "Be" => 4, "C" => 6, "Ca" => 20, "Cl" => 17, "F" => 9, "H" => 1, "He" => 2, "K" => 19, "Li" => 3, "Mi" => 12, "N" => 7, "Na" => 11, "Ne" => 10, "O" => 8, "P" => 15, "S" => 16, "Si" => 14}
      iex> WarmUp.run04 |> map_size
      20

  """
  def run04(
        str \\ "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."
      ) do
    str
    |> String.split(" ")
    |> Enum.with_index(1)
    |> Enum.map(&extract/1)
    |> Enum.into(%{})
  end

  defp extract({str, idx}) when idx in [1, 5, 6, 7, 8, 9, 15, 16, 19] do
    {String.slice(str, 0, 1), idx}
  end

  defp extract({str, idx}), do: {String.slice(str, 0, 2), idx}

  @doc ~S"""
  05. n-gram

  https://nlp100.github.io/ja/ch01.html#05-n-gram

  ## Examples

      iex> WarmUp.run05_character
      [
        ["I", " ", "a", "m", " ", "a", "n", " ", "N", "L", "P", "e", "r"],
        ["I ", " a", "am", "m ", " a", "an", "n ", " N", "NL", "LP", "Pe", "er"],
        ["I a", " am", "am ", "m a", " an", "an ", "n N", " NL", "NLP", "LPe", "Per"],
        ["I am", " am ", "am a", "m an", " an ", "an N", "n NL", " NLP", "NLPe", "LPer"],
        ["I am ", " am a", "am an", "m an ", " an N", "an NL", "n NLP", " NLPe", "NLPer"],
        ["I am a", " am an", "am an ", "m an N", " an NL", "an NLP", "n NLPe", " NLPer"],
        ["I am an", " am an ", "am an N", "m an NL", " an NLP", "an NLPe", "n NLPer"],
        ["I am an ", " am an N", "am an NL", "m an NLP", " an NLPe", "an NLPer"],
        ["I am an N", " am an NL", "am an NLP", "m an NLPe", " an NLPer"],
        ["I am an NL", " am an NLP", "am an NLPe", "m an NLPer"],
        ["I am an NLP", " am an NLPe", "am an NLPer"],
        ["I am an NLPe", " am an NLPer"],
        ["I am an NLPer"]
      ]

  """
  def run05_character(str \\ "I am an NLPer") do
    1..String.length(str)
    |> Enum.map(&n_gram(str, &1))
  end

  @doc ~S"""
  05. n-gram

  https://nlp100.github.io/ja/ch01.html#05-n-gram

  ## Examples

      iex> WarmUp.run05_word
      [
        [["I"], ["am"], ["an"], ["NLPer"]],
        [["I", "am"], ["am", "an"], ["an", "NLPer"]],
        [["I", "am", "an"], ["am", "an", "NLPer"]],
        [["I", "am", "an", "NLPer"]]
      ]

  """
  def run05_word(str \\ "I am an NLPer") do
    list = String.split(str, " ")

    1..Enum.count(list)
    |> Enum.map(&n_gram(list, &1))
  end

  @doc ~S"""
  n-gram

  ## Examples

      iex> WarmUp.n_gram("こちら葛飾区亀有公園前派出所", 1)
      ["", "", "", "", "", "", "", "", "", "", "", "", "", ""]
      iex> WarmUp.n_gram("こちら葛飾区亀有公園前派出所", 2)
      ["こち", "ちら", "ら葛", "葛飾", "飾区", "区亀", "亀有", "有公", "公園", "園前", "前派", "派出", "出所"]
      iex> WarmUp.n_gram("こちら葛飾区亀有公園前派出所", 3)
      ["こちら", "ちら葛", "ら葛飾", "葛飾区", "飾区亀", "区亀有", "亀有公", "有公園", "公園前", "園前派", "前派出", "派出所"]
      iex> WarmUp.n_gram([1, 2, 3], 1)
      [[1], [2], [3]]
      iex> WarmUp.n_gram([1, 2, 3], 2)
      [[1, 2], [2, 3]]
      iex> WarmUp.n_gram([1, 2, 3], 3)
      [[1, 2, 3]]

  """
  def n_gram(str, n) when is_bitstring(str) do
    for i <- 0..(String.length(str) - n), do: String.slice(str, i, n)
  end

  def n_gram(list, n) when is_list(list) do
    for i <- 0..(Enum.count(list) - n), do: Enum.slice(list, i, n)
  end

  @doc ~S"""
  06. 集合

  https://nlp100.github.io/ja/ch01.html#06-%E9%9B%86%E5%90%88

  ## Examples

      iex> WarmUp.run06_has_se?
      false

  """
  def run06_union, do: MapSet.union(run06_x(), run06_y())

  def run06_intersection, do: MapSet.intersection(run06_x(), run06_y())

  def run06_difference, do: MapSet.difference(run06_x(), run06_y())

  def run06_has_se?, do: run06_intersection() |> Enum.any?(&(&1 == "se"))

  defp run06_x, do: n_gram("paraparaparadise", 2) |> MapSet.new()
  defp run06_y, do: n_gram("paragraph", 2) |> MapSet.new()

  @doc ~S"""
  07. テンプレートによる文生成

  https://nlp100.github.io/ja/ch01.html#07-%E3%83%86%E3%83%B3%E3%83%97%E3%83%AC%E3%83%BC%E3%83%88%E3%81%AB%E3%82%88%E3%82%8B%E6%96%87%E7%94%9F%E6%88%90

  ## Examples

      iex> WarmUp.run07
      "12時の気温は22.4"

  """
  def run07(x \\ 12, y \\ "気温", z \\ 22.4), do: "#{x}時の#{y}#{z}"

  @doc ~S"""
  08. 暗号文

  https://nlp100.github.io/ja/ch01.html#08-%E6%9A%97%E5%8F%B7%E6%96%87

  ## Examples

      iex> WarmUp.run08
      "Hello, World"

  """
  def run08(str \\ "Hello, World") do
    cipher(str) |> cipher()
  end

  def cipher(str) do
    str
    |> String.codepoints()
    |> Enum.map(fn <<point::utf8>> -> point end)
    |> Enum.map(&encrypt/1)
    |> Enum.map(fn point -> List.to_string([point]) end)
    |> Enum.join()
  end

  defp encrypt(point) when ?a <= point and point <= ?z, do: 219 - point

  defp encrypt(point), do: point

  @doc ~S"""
  09. Typoglycemia

  https://nlp100.github.io/ja/ch01.html#09-typoglycemia

  ## Examples

      iex> WarmUp.run09 |> String.contains?("I")
      true
      iex> WarmUp.run09 |> String.contains?("that")
      true
      iex> WarmUp.run09 |> String.contains?("what")
      true
      iex> WarmUp.run09 |> String.contains?("was")
      true
      iex> WarmUp.run09 |> String.contains?("the")
      true
      iex> WarmUp.run09 |> String.contains?("of")
      true
      iex> WarmUp.run09 |> String.contains?("mind")
      true
      iex> WarmUp.run09 |> String.match?(~r/c[!-~]{6}t/)
      true
      iex> WarmUp.run09 |> String.match?(~r/p[!-~]{8}l/)

  """
  def run09(
        str \\ "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind ."
      ) do
    str
    |> String.split(" ")
    |> Enum.map(&to_typoglycemia/1)
    |> Enum.join(" ")
  end

  @doc ~S"""
  to typoglycemia

  ## Examples

      iex> WarmUp.to_typoglycemia("that")
      "that"
      iex> WarmUp.to_typoglycemia("apple") |> String.match?(~r/^a[!-~]{3}e$/)
      true
      iex> WarmUp.to_typoglycemia("couldn't") |> String.match?(~r/^c[!-~]{6}t$/)
      true

  """
  def to_typoglycemia(word), do: to_typoglycemia(word, String.length(word))

  defp to_typoglycemia(word, length) when length <= 4, do: word

  defp to_typoglycemia(word, _length) do
    codepoints = word |> String.codepoints()
    first = hd(codepoints)
    intermediate = Enum.slice(codepoints, 1..-2) |> Enum.shuffle() |> Enum.join()
    last = Enum.at(codepoints, -1)
    "#{first}#{intermediate}#{last}"
  end
end
  • ## Examplesのところに書いてあるやつはDoctestと呼ばれるものでテストができます
test/nlp100_test.exs
defmodule Nlp100Test do
  use ExUnit.Case
  doctest Nlp100
  doctest WarmUp #追加

2. 実行する

$ mix test
...............

Finished in 0.1 seconds
14 doctests, 1 test, 0 failures
$ iex -S mix
Erlang/OTP 23 [erts-11.0.2] [source] [64-bit] [smp:72:2] [ds:72:2:10] [async-threads:1] [hipe]

Interactive Elixir (1.10.3) - press Ctrl+C to exit (type h() ENTER for help)
iex> WarmUp.run00
"desserts"
iex> WarmUp.run06_has_se?
false
iex> System.halt

3. ex_doc

  • ドキュメントを作りましょう!
mix.exs
  defp deps do
    [
      {:ex_doc, "~> 0.22", only: :dev, runtime: false}
    ]
  end
$ mix deps.get
$ mix docs
  • doc/index.htmlをブラウザで開いてみましょう

スクリーンショット 2020-06-14 1.37.09.png

  • 素敵なドキュメントができあがっています!:tada: :fire: :rocket:
  • ドキュメントの全体はこんな感じになります

Wrapping Up

  • テストやコメント、空行を含んでlib/warm_up.exは、310行でできています
  • 今回つかったモジュールは以下の通りです
  • 特に、 Enumにお世話になりました
    • いつもお世話になっています!!!
  • お好みの言語でお楽しみください
  • Enjoy!!!

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
What you can do with signing up
9