グダグダ書いてもしょうがないので簡潔に書きます
#必要なもの
- tweepy
- MeCab
これを予め入れてください.入れ方はggってください.
#なにか
TwitterAPIのpublic streamで全世界のツイートを取得し,その中から日本語のツイートだけをふるいにかけ、余計な情報を削除した後MeCabに投げ形態素解析するものです.
#! /usr/bin/env python3
# encoding=utf-8
import re
import tweepy
import unicodedata
import MeCab
from datetime import timedelta
CK = ""
CS = ""
AT = ""
AS = ""
class Listener(tweepy.StreamListener):
def on_status(self, status):
status.created_at += timedelta(hours=9)
if is_japanese(status.text):
tweet = format_text(status.text)
print(tweet)
print(MeCab.Tagger("").parse(tweet.encode("utf-8")))
return True
def on_error(self, status_code):
print('Error: ' + str(status_code))
return True
def on_timeout(self):
print('Timeout...')
return True
def format_text(text):
text = re.sub(r'https?://[\w/:%#\$&\?\(\)~\.=\+\-…]+', "", text)
text = re.sub(r'@[\w/:%#\$&\?\(\)~\.=\+\-…]+', "", text)
text = re.sub(r'&[\w/:%#\$&\?\(\)~\.=\+\-…]+', "", text)
text = re.sub('お気に入り', "", text)
text = re.sub('まとめ', "", text)
text = re.sub(r'[!-~]', "", text) # 半角記号,数字,英字
text = re.sub(r'[︰-@]', "", text) # 全角記号
text = re.sub('\n', " ", text) # 改行文字
text = re.sub(';', "", text)
text = re.sub('RT', "", text)
text = re.sub('\n', " ", text)
return text
def is_japanese(string):
for ch in string:
name = unicodedata.name(ch)
if ("CJK UNIFIED" in name and ("HIRAGANA" in name or "KATAKANA" in name)) \
or ("HIRAGANA" in name or "KATAKANA" in name):
return True
return False
def name(ch):
try:
return unicodedata.name
except ValueError:
return None
auth = tweepy.OAuthHandler(CK, CS)
auth.set_access_token(AT, AS)
listener = Listener()
stream = tweepy.Stream(auth, listener)
stream.sample()#filterにすると条件を付けれる(詳細はggってくだ..)
これを動かしているとたまに
Traceback (most recent call last):
File "Get_public_tweet.py", line 86, in <module>
stream.sample()
File "/Library/Python/2.7/site-packages/tweepy/streaming.py", line 419, in sample
self._start(async)
File "/Library/Python/2.7/site-packages/tweepy/streaming.py", line 361, in _start
self._run()
File "/Library/Python/2.7/site-packages/tweepy/streaming.py", line 294, in _run
raise exception
ValueError: no such name
こういうエラーが出てくるけれど原因がわからなくて、わかる人コメント等で教えて欲しいです.