今日は一日“歌う声優”三昧 ツヴァイ!
いっぱいツイートが手に入るし、BigQuery使ってみたいし
BigQueryに放り込む用のCSVを作ってみる。
ソースコード
project.clj
(defproject tweet-bigdata-clj "0.1.0-SNAPSHOT"
:description "FIXME: write description"
:url "http://example.com/FIXME"
:license {:name "Eclipse Public License"
:url "http://www.eclipse.org/legal/epl-v10.html"}
:dependencies [[org.clojure/clojure "1.6.0"]
[org.twitter4j/twitter4j-core "4.0.2"]
[clojure-csv/clojure-csv "2.0.1"]]
:main tweet-bigdata-clj.core)
core.clj
(ns tweet-bigdata-clj.core
(:import [twitter4j TwitterFactory Query QueryResult])
(:require [clojure-csv.core :as csv]
[clojure.java.io :as io]))
(declare twitter get-tweets tweets-to-map to-csvfile to-csv make-csv get-all-tweets)
(def twitter (.getInstance (TwitterFactory.)))
(defn get-tweets [hash-tag]
"tweet取得"
(let [query (doto (Query.)
(.setQuery hash-tag)
(.setCount 100)
(.setSince "2014-11-03")
(.setUntil "2014-11-04"))]
(->> (.search twitter query))))
(defn tweets-to-map [tweets]
"tweetsをmap形式に変換する"
(map #(zipmap [:screenName
:name
:text
:createdAt]
[(.. % getUser getScreenName)
(.. % getUser getName)
(clojure.string/replace (.getText %) #"\n" " ")
(.. % getCreatedAt toString)]) tweets))
(defn to-csvfile [text]
"csvファイルを作成"
(with-open [out-file (io/writer "out-file.csv" :encoding "utf-8" :append true)]
(.write out-file (apply str text))))
(defn to-csv [{text :text name :name screen-name :screenName created-at :createdAt}]
"カンマ分割"
(csv/write-csv [[screen-name text created-at]]))
(defn make-csv [query]
(->> query
(.getTweets)
(tweets-to-map)
(map #(to-csv %))
(to-csvfile)))
(defn get-all-tweets [#^QueryResult query]
(loop [q query]
(when (.hasNext q)
(Thread/sleep 5000)
(let [nq (doto (.nextQuery q)
(.setCount 100))
nqr (.search twitter nq)]
(make-csv nqr)
(recur nqr)))))
(defn get-first-page [query]
(let [q (.search twitter query)]
(dosync(alter result-list conj q))
q))
(defn -main[]
(let [first-tweets (get-tweets "#zanmai")]
(make-csv first-tweets)
(get-all-tweets first-tweets)))