python3で自分のtweetからwordcloudを作成 #Python

python3でtwitter APIをたたき、
MeCabで形態素解析し、
wordcloudを作成する

ライブラリは入ってる前提

anacondaをインストールするとか
MeCabやwordcloud入れるとか
twitter apiとか

twitter APIを使用してtweetを取得し、csvにoutputする

get_tweets.py

# coding: utf-8

import requests
from requests_oauthlib import OAuth1Session
import json
import csv

CK = "hhhhhhhhhhhhhhhhhhhhhh"
CS = "oooooooooooooooooooooooooooooooooooooooooo"
AT = "gggggggggggggggggggggggggggggggggggggggggggggggggg"
AS = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
oauth = OAuth1Session(CK, CS, AT, AS)

# url = "https://stream.twitter.com/1.1/statuses/filter.json" # get stream tweets # これうまくいかなかったかも
# url = "https://stream.twitter.com/1.1/statuses/sample.json" # get sample tweets # これうまくいかなかったかも
# url = "https://api.twitter.com/1.1/statuses/update.json" # post a tweet
# url = "https://api.twitter.com/1.1/search/tweets.json?" # search tweets
# 下のusernameを変えれば特定のpublic userのtweetを取得できるはず(フォローしてないとだめかな?)
url = "https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=username" # @ は不要 

params = {
    # "track": "a"
    # "lang": "ja"
    "count": "200"
    # "status": "Hello, world!"
}
req = oauth.get(
    url,
    # stream = True,
    params = params
    )
twitter = json.loads(req.text)
maxid = twitter[0]["id"] + 1

c = 0
tweets_list = []
for i in range(3):
    print(i)
    params = {
        "count": "200",
        "max_id": maxid
    }
    req = oauth.get(
        url,
        # max_id = maxid,
        params = params
        )
    twitter = json.loads(req.text)
    for tweet in twitter:
        tweets_list.append([
            c,
            tweet["id"],
            tweet["created_at"],
            tweet["text"]
            ])
        maxid = tweet["id"] - 1
        c += 1

with open("tweets.csv", "w") as f:
    writer = csv.writer(f, lineterminator="\n")
    writer.writerow(tweets_list)

取得したtweetからリプライやリツイートを除外し、wordcloudを作成

リプライやリツイートの判定には"@"、"RT"が含まれるものとした
この処理が不要であればリスト内包表記で書ける

wordcloud_tweets.py

import MeCab
from os import path
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import csv

stop_words = ["する", "ない", "なる", "もう", "しよ", "でき", "なっ", "くっ", "やっ", "ある", "しれ", "思う", "今日"]
pos_list = [10, 11, 31, 32, 34]
pos_list.extend(list(range(36,50)))
pos_list.extend([59, 60, 62, 67])
def create_mecab_list(text):
    mecab_list = []
    mecab = MeCab.Tagger("-Ochasen -d /usr/local/lib/mecab/dic/mecab-ipadic-neologd")
    mecab.parse("")
    # encoding = text.encode('utf-8')
    node = mecab.parseToNode(text)
    while node:
        # for sw in stop_words:
        #   if node.surface == sw:
        #       node = node.next
        if len(node.surface) > 1:
            if node.posid in pos_list:
                morpheme = node.surface
                mecab_list.append(morpheme)
        node = node.next
    return mecab_list

text_tweet = []
with open("./tweets.csv", "r") as file:
    reader = csv.reader(file)
    for tweets_text in reader:
        tweets_list = csv.reader(tweets_text)
        for ele in tweets_list:
            if "@" in ele[3]:
                continue
            if "RT" in ele[3]:
                continue
            text_tweet.append(ele[3])
text = "".join(text_tweet)
string = " ".join(create_mecab_list(text))#.decode("utf-8")

fpath = "/Library/Fonts/ヒラギノ丸ゴ ProN W4.ttc"
wordcloud = WordCloud(
    background_color="black",
    stopwords=set(stop_words),
    max_font_size=56,
    relative_scaling=.4,
    width=500,
    height=300,
    font_path=fpath
    ).generate(string)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
wordcloud.to_file("./wordcloud.png")