Posted at

ScalaでGzipによる圧縮/展開を行う。

More than 3 years have passed since last update.


環境


  • Mac OS X Version 10.10.2

  • Scala 2.11.5

  • sbt 0.13.7


準備

/root/to/project/path

|-- build.sbt
|-- src
| |-- main
| | |-- scala
| | | |-- GZIP.scala
| |-- test
| | |-- scala
| | | |-- GZIPSpec.scala


build.sbt

name := "gzip"                                                                                                                                                    

version := "1.0"

scalaVersion := "2.11.5"

scalacOptions ++= Seq("-Xlint", "-deprecation", "-unchecked", "-feature", "-Xelide-below", "ALL")

libraryDependencies ++= Seq(
"org.scalaz" %% "scalaz-core" % "7.1.1",
"org.specs2" %% "specs2" % "2.4.1"
)



実装


src/main/scala/GZIP.scala

import java.io.{BufferedReader, ByteArrayInputStream, ByteArrayOutputStream, InputStreamReader}

import java.nio.charset.Charset
import java.util.zip._

import scala.util.control.Exception._
import scalaz.Scalaz._

object GZIP {

def compress(source: String, charsetName: String): Either[Throwable, Array[Byte]] = {
val buffer = new ByteArrayOutputStream()
def compressString(source: String, charset: Charset): Either[Throwable, Unit] = {
val gzos = new GZIPOutputStream(buffer)
allCatch andFinally {
gzos.close()
} either {
gzos.write(source.getBytes(charset))
}
}
def toByteArray: Either[Throwable, Array[Byte]] = {
allCatch andFinally {
buffer.close()
} either {
buffer.toByteArray()
}
}
for {
r1 <- getCharset(charsetName)
r2 <- compressString(source, r1)
r3 <- toByteArray
} yield r3
}

def decompress(data: Array[Byte], charsetName: String): Either[Throwable, String] = {
def decompressByteArray(data: Array[Byte], charset: Charset): Either[Throwable, String] = {
val bais = new ByteArrayInputStream(data)
val gzis = new GZIPInputStream(bais)
val reader = new InputStreamReader(gzis)
val in = new BufferedReader(reader)
val sb = new StringBuilder
allCatch andFinally {
in.close()
reader.close()
gzis.close()
bais.close()
} either {
Iterator continually {
in.readLine()
} takeWhile {
_ != null
} foreach {
sb.append(_)
}
sb.result
}
}
for {
r1 <- getCharset(charsetName)
r2 <- decompressByteArray(data, r1)
} yield r2
}

private def getCharset(charsetName: String) =
allCatch either { Charset.forName(charsetName) }
}



src/test/scala/GZIPSpec.scala

import org.specs2.mutable.Specification

class GZIPSpec extends Specification {

"GZIP" should {
"compress -> decompress" in {
val r1 = GZIP.compress("あいうえお", "UTF-8")
val c = r1.fold(x => Array[Byte](0), identity)
val r2 = GZIP.decompress(c, "UTF-8")
val d = r2.fold(x => "", identity)
d must_== "あいうえお"
}
}
}



実行

$ sbt '~test-only GZIPSpec'


参考

http://stackoverflow.com/questions/3627401/gzipinputstream-to-string