LoginSignup
7
6

More than 5 years have passed since last update.

Spark MLlibで相関係数を算出してみる。

Last updated at Posted at 2014-10-15

環境

  • Mac OS X Version 10.9.5
  • Scala 2.10.4
  • Spark 1.1.0
  • Spark MLlib 1.1.0
  • sbt 0.13.1

準備

/root/to/project/path
   |-- build.sbt
   |-- src
   |    |-- main
   |    |    |-- scala
   |    |    |    |-- Stoppable.scala
   |    |-- test
   |    |    |-- scala
   |    |    |    |-- SparkMLlibCorrelationSpec.scala
build.sbt
name := "Spark MLlib examples"                                                                                                                                                    

version := "0.0.1-SNAPSHOT"

scalaVersion := "2.10.4"

scalacOptions ++= Seq("-Xlint", "-deprecation", "-unchecked", "-feature", "-Xelide-below", "ALL")

libraryDependencies ++= Seq(
  "org.apache.spark" %% "spark-core" % "1.1.0",
  "org.apache.spark" %% "spark-mllib" % "1.1.0",
  "org.specs2" %% "specs2" % "2.4.1"
)

実装

src/test/scala/SparkMLlibCorrelationSpec.scala
import commons._                                                                                                                                                                  

import org.apache.spark._
import org.apache.spark.mllib.stat._
import org.apache.spark.mllib.stat.correlation._
import org.apache.spark.SparkContext._

import org.specs2._

class SparkMLlibCorrelationSpec extends Specification with Stoppable { def is = s2"""

  Spark MLlib

  Correlation
    correlation1                               $correlation1
  """

  // 気温とビールの出荷数
  val xData = Array(12.1, 15.3, 18.6, 21.7, 26.1, 32.1)
  val yData = Array(45.0, 520.0, 2864.0, 6874.0, 25487.0, 102870.0)

  var retCorrelation1: Double = _ 
  using(new SparkContext("local[1]", "SparkMLlibCorrelationSpec", System.getenv("SPARK_HOME"))) { sc =>
    val x = sc.parallelize(xData)
    val y = sc.parallelize(yData)
    retCorrelation1 = Statistics.corr(x, y, "pearson")
  }
  // この相関係数を算出しているページ:http://dekiru.net/article/4576/
  def correlation1 = retCorrelation1 must_== 0.8633475827899126
}
src/main/scala/Stoppable.scala
package commons                                                                                                                                                                   

import scala.language.reflectiveCalls

trait Stoppable {
  type T = { def stop(): Unit }
  def using[A <: T, B](resource: A)(f: A => B) = try {
    f(resource)
  } finally {
    resource.stop()
  }
}

実行

$ sbt '~test-only SparkMLlibCorrelationSpec'

参考

7
6
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
7
6