環境
- Mac OS X Version 10.9.5
- Scala 2.10.4
- Spark 1.1.0
- Spark MLlib 1.1.0
- sbt 0.13.1
準備
/root/to/project/path
|-- build.sbt
|-- src
| |-- main
| | |-- scala
| | | |-- Stoppable.scala
| |-- test
| | |-- scala
| | | |-- SparkMLlibCorrelationSpec.scala
build.sbt
name := "Spark MLlib examples"
version := "0.0.1-SNAPSHOT"
scalaVersion := "2.10.4"
scalacOptions ++= Seq("-Xlint", "-deprecation", "-unchecked", "-feature", "-Xelide-below", "ALL")
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "1.1.0",
"org.apache.spark" %% "spark-mllib" % "1.1.0",
"org.specs2" %% "specs2" % "2.4.1"
)
実装
src/test/scala/SparkMLlibCorrelationSpec.scala
import commons._
import org.apache.spark._
import org.apache.spark.mllib.stat._
import org.apache.spark.mllib.stat.correlation._
import org.apache.spark.SparkContext._
import org.specs2._
class SparkMLlibCorrelationSpec extends Specification with Stoppable { def is = s2"""
Spark MLlib
Correlation
correlation1 $correlation1
"""
// 気温とビールの出荷数
val xData = Array(12.1, 15.3, 18.6, 21.7, 26.1, 32.1)
val yData = Array(45.0, 520.0, 2864.0, 6874.0, 25487.0, 102870.0)
var retCorrelation1: Double = _
using(new SparkContext("local[1]", "SparkMLlibCorrelationSpec", System.getenv("SPARK_HOME"))) { sc =>
val x = sc.parallelize(xData)
val y = sc.parallelize(yData)
retCorrelation1 = Statistics.corr(x, y, "pearson")
}
// この相関係数を算出しているページ:http://dekiru.net/article/4576/
def correlation1 = retCorrelation1 must_== 0.8633475827899126
}
src/main/scala/Stoppable.scala
package commons
import scala.language.reflectiveCalls
trait Stoppable {
type T = { def stop(): Unit }
def using[A <: T, B](resource: A)(f: A => B) = try {
f(resource)
} finally {
resource.stop()
}
}
実行
$ sbt '~test-only SparkMLlibCorrelationSpec'
参考