LoginSignup
2
2

More than 5 years have passed since last update.

言語処理100本ノックから

Last updated at Posted at 2016-04-05

参考 URL

http://www.cl.ecei.tohoku.ac.jp/nlp100/
http://mocobeta-backup.tumblr.com/post/122417696767/100-2015-scala-1
http://mocobeta-backup.tumblr.com/post/122514402372/100-2015-scala-2
http://www.geocities.jp/m_hiroi/java/scala.html

第1章

00

StackOverflow から

def reverse(s: String): String = {
  if (s.length == 1)
    return s
  return reverse(s.substring(1)) + s.charAt(0)
}

02

zip, flatMap を使った例があったけど理解できなかったので,まずはわかる範囲で再帰を使ってみた.

def mergeStr(s1: String, s2: String): String = {
  def mergeList[A](l1: List[A], l2: List[A]): List[A] = {
    (l1, l2) match {
      case (_ , Nil) => l1
      case (Nil, _) => l2
      case (x::xRest, y::yRest) => x::y::mergeList(xRest, yRest)
    }
  }
  mergeList(s1.toList, s2.toList).mkString
}

04

def p04(): Unit = {
    val s = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."
    val indexies = Array(1, 5, 6, 7, 8, 9, 15, 16, 19)

    println(s.split(" ").zipWithIndex.collect {
      case (s, i) if indexies.exists(_ == i + 1) => (s.charAt(0), i +1)
      case (s, i) => (s.substring(0, 2), i + 1)
    }.toMap)
  }

05

いくつかコードを書いたけどピンとこないので、整理

  • 文字bi-gram のときの空白、カンマ、ピリオドの扱いをどうするか。
  • Map は mutable でないと追加できない?
  • 文字列で受ける、リストで受ける? Char と String 両方で受けられるように型パラメータを使うのが綺麗か?
"I am an NLPer".sliding(2).toSeq
"I am an NLPer".split(" ").sliding(2).toSeq

06

  • 05 では重複を排除しないことにした。
  • ソートして、重複を除いた集合を作る(ソートは必要ないけど)
  • Set の union と Seq の union だと結果が異なる、ここでは重複は除く
  • ※今はソートまで、distinct の実装はこれから
  def sortAndDedupe[A <% Ordered[A]](seq: Seq[A]): Seq[A] = {
    def msort[A](comp: (A, A) => Boolean)(xs: Seq[A]): Seq[A] = {
      def merge(xs1: Seq[A], xs2: Seq[A]): Seq[A] = {
        if (xs1.isEmpty) {
          xs2
        } else if (xs2.isEmpty) {
          xs1
        } else if (comp(xs1.head, xs2.head)) {
          xs1.head +: merge(xs1.tail, xs2)
        } else {
          xs2.head +: merge(xs1, xs2.tail)
        }
      }
      val n = xs.length / 2
      if (n == 0) xs
      else merge(msort(comp)(xs take n), msort(comp)(xs drop n))
    }

    val s = msort((x: A, y: A) => x < y)(seq.toList)
    s.distinct
  }
def msort[A](comp: (A, A) => Boolean)(list: List[A]): List[A] = {
  def merge(first: List[A], second: List[A]): List[A] = {
    (first, second) match {
      case (x :: xs, ys@(y :: _)) if comp(x, y) => x :: merge(xs, ys)
      case (xs, y :: ys)                        => y :: merge(xs, ys)
      case (xs, Nil)                            => xs
      case (Nil, ys)                            => ys
    }
  }
  val n = list.length / 2
  if (n == 0) list
  else merge(msort(comp)(list take n), msort(comp)(list drop n))
}

def distinct[A](list: List[A]): List[A] = {
  def exists[A](p: A => Boolean)(l: List[A]): Boolean = l match {
    case head :: tail => p(head) || exists(p)(tail)
    case _ => false
  }
  var result = List[A]()
  for (elem <- list) {
    if(!exists((x: A) => x == elem)(result)) result = elem :: result
  }
  result.reverse
}

val list = List(1,2,3,4,2,3,4,99)
val s = msort((x: Int, y: Int) => x < y)(list)

println(list)           // List(1, 2, 3, 4, 2, 3, 4, 99)
println(s)              // List(1, 2, 2, 3, 3, 4, 4, 99)
println(distinct(s))    // List(1, 2, 3, 4, 99)
2
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
2
2