データ作り
scala> val t1 = Array(35, 36, 32, 30, 40, 42, 38)
t1: Array[Int] = Array(35, 36, 32, 30, 40, 42, 38)
scala> val t2 = Array(31, 32, 34, 55, 56)
t2: Array[Int] = Array(31, 32, 34, 55, 56)
scala> val tC = Seq(t1, t2).toDF("celsius")
tC: org.apache.spark.sql.DataFrame = [celsius: array<int>]
scala> tC.show()
+--------------------+
| celsius|
+--------------------+
|[35, 36, 32, 30, ...|
|[31, 32, 34, 55, 56]|
+--------------------+
scala> tC.createOrReplaceTempView("tC")
# これでspark.sqlで色々弄れるっぽい
# ()内の文字列をspark.sqlでのfromで指定する形になる
transform()
arraylistの全体の値を色々弄れる
scala> spark.sql("""
| SELECT celsius, transform(celsius, t -> ((t*9) div 5) + 32) AS fahrenheit
| FROM tC
| """).show()
+--------------------+--------------------+
| celsius| fahrenheit|
+--------------------+--------------------+
|[35, 36, 32, 30, ...|[95, 96, 89, 86, ...|
|[31, 32, 34, 55, 56]|[87, 89, 93, 131,...|
+--------------------+--------------------+
# divは除算の小数点切り捨て
filter()
listのデータを条件付けで抽出する
scala> spark.sql("""
| SELECT celsius, filter(celsius, t -> t>38) AS high
| FROM tC
| """).show()
+--------------------+--------+
| celsius| high|
+--------------------+--------+
|[35, 36, 32, 30, ...|[40, 42]|
|[31, 32, 34, 55, 56]|[55, 56]|
+--------------------+--------+
scala> spark.sql("""
| SELECT celsius, filter(celsius, t -> t>38 and t<41) AS soso
| FROM tC
| """).show()
+--------------------+----+
| celsius|soso|
+--------------------+----+
|[35, 36, 32, 30, ...|[40]|
|[31, 32, 34, 55, 56]| []|
+--------------------+----+
# 複数の条件をつけるときはandとかor
# &だとダメだった
exists()
条件に合う値がlist内にあるかのtrue, false
scala> spark.sql("""
| SELECT celsius, exists(celsius, t -> t=38) AS threshold
| FROM tC
| """).show()
+--------------------+---------+
| celsius|threshold|
+--------------------+---------+
|[35, 36, 32, 30, ...| true|
|[31, 32, 34, 55, 56]| false|
+--------------------+---------+
scala> spark.sql("""
| SELECT celsius, exists(celsius, t -> t>38 and t<41) AS threshold
| FROM tC
| """).show()
+--------------------+---------+
| celsius|threshold|
+--------------------+---------+
|[35, 36, 32, 30, ...| true|
|[31, 32, 34, 55, 56]| false|
+--------------------+---------+
#こっちもandで複数行ける