LoginSignup
12
9

More than 3 years have passed since last update.

Elixirで自然言語処理100本ノック 2020を解いた[第1章 準備運動]

Last updated at Posted at 2020-06-13

はじめに

0. インストールとプロジェクトの作成

$ mix new nlp100
$ cd nlp100

1. ソースコードを書く

lib/warm_up.ex
defmodule WarmUp do
  require Integer

  @moduledoc """
  https://nlp100.github.io/ja/ch01.html
  """

  @doc ~S"""
  00. 文字列の逆順

  https://nlp100.github.io/ja/ch01.html#00-%E6%96%87%E5%AD%97%E5%88%97%E3%81%AE%E9%80%86%E9%A0%86

  ## Examples

      iex> WarmUp.run00
      "desserts"

  """
  def run00(str \\ "stressed"), do: str |> String.reverse()

  @doc ~S"""
  01. 「パタトクカシーー」

  https://nlp100.github.io/ja/ch01.html#01-%E3%83%91%E3%82%BF%E3%83%88%E3%82%AF%E3%82%AB%E3%82%B7%E3%83%BC%E3%83%BC

  ## Examples

      iex> WarmUp.run01
      "パトカー"

  """
  def run01(str \\ "パタトクカシーー") do
    str
    |> String.codepoints()
    |> Enum.with_index(1)
    |> Enum.filter(fn {_, idx} -> Integer.is_odd(idx) end)
    |> Enum.map(fn {c, _} -> c end)
    |> Enum.join()
  end

  @doc ~S"""
  02. 「パトカー」+「タクシー」=「パタトクカシーー」

  https://nlp100.github.io/ja/ch01.html#02-%E3%83%91%E3%83%88%E3%82%AB%E3%83%BC%E3%82%BF%E3%82%AF%E3%82%B7%E3%83%BC%E3%83%91%E3%82%BF%E3%83%88%E3%82%AF%E3%82%AB%E3%82%B7%E3%83%BC%E3%83%BC

  ## Examples

      iex> WarmUp.run02
      "パタトクカシーー"

  """
  def run02(str1 \\ "パトカー", str2 \\ "タクシー") do
    Enum.zip(String.codepoints(str1), String.codepoints(str2))
    |> Enum.map(fn {c1, c2} -> "#{c1}#{c2}" end)
    |> Enum.join()
  end

  @doc ~S"""
  03. 円周率

  https://nlp100.github.io/ja/ch01.html#03-%E5%86%86%E5%91%A8%E7%8E%87

  ## Examples

      iex> WarmUp.run03
      [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9]

  """
  def run03(
        str \\ "Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics."
      ) do
    str
    |> String.replace(".", "")
    |> String.replace(",", "")
    |> String.split(" ")
    |> Enum.map(&String.length/1)
  end

  @doc ~S"""
  04. 元素記号

  https://nlp100.github.io/ja/ch01.html#04-%E5%85%83%E7%B4%A0%E8%A8%98%E5%8F%B7

  ## Examples

      iex> WarmUp.run04
      %{"Al" => 13, "Ar" => 18, "B" => 5, "Be" => 4, "C" => 6, "Ca" => 20, "Cl" => 17, "F" => 9, "H" => 1, "He" => 2, "K" => 19, "Li" => 3, "Mi" => 12, "N" => 7, "Na" => 11, "Ne" => 10, "O" => 8, "P" => 15, "S" => 16, "Si" => 14}
      iex> WarmUp.run04 |> map_size
      20

  """
  def run04(
        str \\ "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."
      ) do
    str
    |> String.split(" ")
    |> Enum.with_index(1)
    |> Enum.map(&extract/1)
    |> Enum.into(%{})
  end

  defp extract({str, idx}) when idx in [1, 5, 6, 7, 8, 9, 15, 16, 19] do
    {String.slice(str, 0, 1), idx}
  end

  defp extract({str, idx}), do: {String.slice(str, 0, 2), idx}

  @doc ~S"""
  05. n-gram

  https://nlp100.github.io/ja/ch01.html#05-n-gram

  ## Examples

      iex> WarmUp.run05_character
      [
        ["I", " ", "a", "m", " ", "a", "n", " ", "N", "L", "P", "e", "r"],
        ["I ", " a", "am", "m ", " a", "an", "n ", " N", "NL", "LP", "Pe", "er"],
        ["I a", " am", "am ", "m a", " an", "an ", "n N", " NL", "NLP", "LPe", "Per"],
        ["I am", " am ", "am a", "m an", " an ", "an N", "n NL", " NLP", "NLPe", "LPer"],
        ["I am ", " am a", "am an", "m an ", " an N", "an NL", "n NLP", " NLPe", "NLPer"],
        ["I am a", " am an", "am an ", "m an N", " an NL", "an NLP", "n NLPe", " NLPer"],
        ["I am an", " am an ", "am an N", "m an NL", " an NLP", "an NLPe", "n NLPer"],
        ["I am an ", " am an N", "am an NL", "m an NLP", " an NLPe", "an NLPer"],
        ["I am an N", " am an NL", "am an NLP", "m an NLPe", " an NLPer"],
        ["I am an NL", " am an NLP", "am an NLPe", "m an NLPer"],
        ["I am an NLP", " am an NLPe", "am an NLPer"],
        ["I am an NLPe", " am an NLPer"],
        ["I am an NLPer"]
      ]

  """
  def run05_character(str \\ "I am an NLPer") do
    1..String.length(str)
    |> Enum.map(&n_gram(str, &1))
  end

  @doc ~S"""
  05. n-gram

  https://nlp100.github.io/ja/ch01.html#05-n-gram

  ## Examples

      iex> WarmUp.run05_word
      [
        [["I"], ["am"], ["an"], ["NLPer"]],
        [["I", "am"], ["am", "an"], ["an", "NLPer"]],
        [["I", "am", "an"], ["am", "an", "NLPer"]],
        [["I", "am", "an", "NLPer"]]
      ]

  """
  def run05_word(str \\ "I am an NLPer") do
    list = String.split(str, " ")

    1..Enum.count(list)
    |> Enum.map(&n_gram(list, &1))
  end

  @doc ~S"""
  n-gram

  ## Examples

      iex> WarmUp.n_gram("こちら葛飾区亀有公園前派出所", 1)
      ["", "", "", "", "", "", "", "", "", "", "", "", "", ""]
      iex> WarmUp.n_gram("こちら葛飾区亀有公園前派出所", 2)
      ["こち", "ちら", "ら葛", "葛飾", "飾区", "区亀", "亀有", "有公", "公園", "園前", "前派", "派出", "出所"]
      iex> WarmUp.n_gram("こちら葛飾区亀有公園前派出所", 3)
      ["こちら", "ちら葛", "ら葛飾", "葛飾区", "飾区亀", "区亀有", "亀有公", "有公園", "公園前", "園前派", "前派出", "派出所"]
      iex> WarmUp.n_gram([1, 2, 3], 1)
      [[1], [2], [3]]
      iex> WarmUp.n_gram([1, 2, 3], 2)
      [[1, 2], [2, 3]]
      iex> WarmUp.n_gram([1, 2, 3], 3)
      [[1, 2, 3]]

  """
  def n_gram(str, n) when is_bitstring(str) do
    for i <- 0..(String.length(str) - n), do: String.slice(str, i, n)
  end

  def n_gram(list, n) when is_list(list) do
    for i <- 0..(Enum.count(list) - n), do: Enum.slice(list, i, n)
  end

  @doc ~S"""
  06. 集合

  https://nlp100.github.io/ja/ch01.html#06-%E9%9B%86%E5%90%88

  ## Examples

      iex> WarmUp.run06_has_se?
      false

  """
  def run06_union, do: MapSet.union(run06_x(), run06_y())

  def run06_intersection, do: MapSet.intersection(run06_x(), run06_y())

  def run06_difference, do: MapSet.difference(run06_x(), run06_y())

  def run06_has_se?, do: run06_intersection() |> Enum.any?(&(&1 == "se"))

  defp run06_x, do: n_gram("paraparaparadise", 2) |> MapSet.new()
  defp run06_y, do: n_gram("paragraph", 2) |> MapSet.new()

  @doc ~S"""
  07. テンプレートによる文生成

  https://nlp100.github.io/ja/ch01.html#07-%E3%83%86%E3%83%B3%E3%83%97%E3%83%AC%E3%83%BC%E3%83%88%E3%81%AB%E3%82%88%E3%82%8B%E6%96%87%E7%94%9F%E6%88%90

  ## Examples

      iex> WarmUp.run07
      "12時の気温は22.4"

  """
  def run07(x \\ 12, y \\ "気温", z \\ 22.4), do: "#{x}時の#{y}#{z}"

  @doc ~S"""
  08. 暗号文

  https://nlp100.github.io/ja/ch01.html#08-%E6%9A%97%E5%8F%B7%E6%96%87

  ## Examples

      iex> WarmUp.run08
      "Hello, World"

  """
  def run08(str \\ "Hello, World") do
    cipher(str) |> cipher()
  end

  def cipher(str) do
    str
    |> String.codepoints()
    |> Enum.map(fn <<point::utf8>> -> point end)
    |> Enum.map(&encrypt/1)
    |> Enum.map(fn point -> List.to_string([point]) end)
    |> Enum.join()
  end

  defp encrypt(point) when ?a <= point and point <= ?z, do: 219 - point

  defp encrypt(point), do: point

  @doc ~S"""
  09. Typoglycemia

  https://nlp100.github.io/ja/ch01.html#09-typoglycemia

  ## Examples

      iex> WarmUp.run09 |> String.contains?("I")
      true
      iex> WarmUp.run09 |> String.contains?("that")
      true
      iex> WarmUp.run09 |> String.contains?("what")
      true
      iex> WarmUp.run09 |> String.contains?("was")
      true
      iex> WarmUp.run09 |> String.contains?("the")
      true
      iex> WarmUp.run09 |> String.contains?("of")
      true
      iex> WarmUp.run09 |> String.contains?("mind")
      true
      iex> WarmUp.run09 |> String.match?(~r/c[!-~]{6}t/)
      true
      iex> WarmUp.run09 |> String.match?(~r/p[!-~]{8}l/)

  """
  def run09(
        str \\ "I couldn't believe that I could actually understand what I was reading : the phenomenal power of the human mind ."
      ) do
    str
    |> String.split(" ")
    |> Enum.map(&to_typoglycemia/1)
    |> Enum.join(" ")
  end

  @doc ~S"""
  to typoglycemia

  ## Examples

      iex> WarmUp.to_typoglycemia("that")
      "that"
      iex> WarmUp.to_typoglycemia("apple") |> String.match?(~r/^a[!-~]{3}e$/)
      true
      iex> WarmUp.to_typoglycemia("couldn't") |> String.match?(~r/^c[!-~]{6}t$/)
      true

  """
  def to_typoglycemia(word), do: to_typoglycemia(word, String.length(word))

  defp to_typoglycemia(word, length) when length <= 4, do: word

  defp to_typoglycemia(word, _length) do
    codepoints = word |> String.codepoints()
    first = hd(codepoints)
    intermediate = Enum.slice(codepoints, 1..-2) |> Enum.shuffle() |> Enum.join()
    last = Enum.at(codepoints, -1)
    "#{first}#{intermediate}#{last}"
  end
end
  • ## Examplesのところに書いてあるやつはDoctestと呼ばれるものでテストができます
test/nlp100_test.exs
defmodule Nlp100Test do
  use ExUnit.Case
  doctest Nlp100
  doctest WarmUp #追加

2. 実行する

$ mix test
...............

Finished in 0.1 seconds
14 doctests, 1 test, 0 failures
$ iex -S mix
Erlang/OTP 23 [erts-11.0.2] [source] [64-bit] [smp:72:2] [ds:72:2:10] [async-threads:1] [hipe]

Interactive Elixir (1.10.3) - press Ctrl+C to exit (type h() ENTER for help)
iex> WarmUp.run00
"desserts"
iex> WarmUp.run06_has_se?
false
iex> System.halt

3. ex_doc

  • ドキュメントを作りましょう!
mix.exs
  defp deps do
    [
      {:ex_doc, "~> 0.22", only: :dev, runtime: false}
    ]
  end
$ mix deps.get
$ mix docs
  • doc/index.htmlをブラウザで開いてみましょう

スクリーンショット 2020-06-14 1.37.09.png

  • 素敵なドキュメントができあがっています!:tada: :fire: :rocket:
  • ドキュメントの全体はこんな感じになります

Wrapping Up

  • テストやコメント、空行を含んでlib/warm_up.exは、310行でできています
  • 今回つかったモジュールは以下の通りです
  • 特に、 Enumにお世話になりました
    • いつもお世話になっています!!!
  • お好みの言語でお楽しみください
  • Enjoy!!!
12
9
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
12
9