LoginSignup
4
6

More than 5 years have passed since last update.

python3で自分のtweetからwordcloudを作成

Posted at

python3でtwitter APIをたたき、
MeCabで形態素解析し、
wordcloudを作成する

ライブラリは入ってる前提

twitter APIを使用してtweetを取得し、csvにoutputする

get_tweets.py
# coding: utf-8

import requests
from requests_oauthlib import OAuth1Session
import json
import csv

CK = "hhhhhhhhhhhhhhhhhhhhhh"
CS = "oooooooooooooooooooooooooooooooooooooooooo"
AT = "gggggggggggggggggggggggggggggggggggggggggggggggggg"
AS = "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"
oauth = OAuth1Session(CK, CS, AT, AS)

# url = "https://stream.twitter.com/1.1/statuses/filter.json" # get stream tweets # これうまくいかなかったかも
# url = "https://stream.twitter.com/1.1/statuses/sample.json" # get sample tweets # これうまくいかなかったかも
# url = "https://api.twitter.com/1.1/statuses/update.json" # post a tweet
# url = "https://api.twitter.com/1.1/search/tweets.json?" # search tweets
# 下のusernameを変えれば特定のpublic userのtweetを取得できるはず(フォローしてないとだめかな?)
url = "https://api.twitter.com/1.1/statuses/user_timeline.json?screen_name=username" # @ は不要 

params = {
    # "track": "a"
    # "lang": "ja"
    "count": "200"
    # "status": "Hello, world!"
}
req = oauth.get(
    url,
    # stream = True,
    params = params
    )
twitter = json.loads(req.text)
maxid = twitter[0]["id"] + 1

c = 0
tweets_list = []
for i in range(3):
    print(i)
    params = {
        "count": "200",
        "max_id": maxid
    }
    req = oauth.get(
        url,
        # max_id = maxid,
        params = params
        )
    twitter = json.loads(req.text)
    for tweet in twitter:
        tweets_list.append([
            c,
            tweet["id"],
            tweet["created_at"],
            tweet["text"]
            ])
        maxid = tweet["id"] - 1
        c += 1

with open("tweets.csv", "w") as f:
    writer = csv.writer(f, lineterminator="\n")
    writer.writerow(tweets_list)

取得したtweetからリプライやリツイートを除外し、wordcloudを作成

リプライやリツイートの判定には"@"、"RT"が含まれるものとした
この処理が不要であればリスト内包表記で書ける

wordcloud_tweets.py
import MeCab
from os import path
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import csv

stop_words = ["する", "ない", "なる", "もう", "しよ", "でき", "なっ", "くっ", "やっ", "ある", "しれ", "思う", "今日"]
pos_list = [10, 11, 31, 32, 34]
pos_list.extend(list(range(36,50)))
pos_list.extend([59, 60, 62, 67])
def create_mecab_list(text):
    mecab_list = []
    mecab = MeCab.Tagger("-Ochasen -d /usr/local/lib/mecab/dic/mecab-ipadic-neologd")
    mecab.parse("")
    # encoding = text.encode('utf-8')
    node = mecab.parseToNode(text)
    while node:
        # for sw in stop_words:
        #   if node.surface == sw:
        #       node = node.next
        if len(node.surface) > 1:
            if node.posid in pos_list:
                morpheme = node.surface
                mecab_list.append(morpheme)
        node = node.next
    return mecab_list

text_tweet = []
with open("./tweets.csv", "r") as file:
    reader = csv.reader(file)
    for tweets_text in reader:
        tweets_list = csv.reader(tweets_text)
        for ele in tweets_list:
            if "@" in ele[3]:
                continue
            if "RT" in ele[3]:
                continue
            text_tweet.append(ele[3])
text = "".join(text_tweet)
string = " ".join(create_mecab_list(text))#.decode("utf-8")

fpath = "/Library/Fonts/ヒラギノ丸ゴ ProN W4.ttc"
wordcloud = WordCloud(
    background_color="black",
    stopwords=set(stop_words),
    max_font_size=56,
    relative_scaling=.4,
    width=500,
    height=300,
    font_path=fpath
    ).generate(string)
plt.figure()
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
wordcloud.to_file("./wordcloud.png")
4
6
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
4
6