SparkでHiveContextを用いてwindow functionを使用するサンプル。
build.sbt
...
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "1.5.2" % "provided",
"org.apache.spark" %% "spark-sql" % "1.5.2" % "provided",
"org.apache.spark" %% "spark-hive" % "1.5.2" % "provided"
)
...
scalaサンプル
import org.apache.spark.SparkConf
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.hive.HiveContext
...
val conf = new SparkConf()
val sc = new SparkContext(conf)
val sqlContext = new HiveContext(sc)
import sqlContext.implicits._
val src = sqlContext.read.json(test_data)
src.registerTempTable("test")
val r = sqlContext.sql("select id, time, row_number() over(partition by id order by time) num from test")
...