ツイートの取得
- 設定ファイル
setting.ini
[hoge]
consumer_key = gomurakamitennou
consumer_secret = tyoukeitennnou
access_key = gokameyamatennnnou
access_secret = godaigotennou
- 認証
tweetUtil.py
import configparser
import tweepy
def auth_api(envName):
config = configparser.ConfigParser()
config.read('setting.ini')
print("envName is " + envName)
consumer_key = config.get(envName, 'consumer_key')
consumer_secret = config.get(envName, 'consumer_secret')
access_key = config.get(envName, 'access_key')
access_secret = config.get(envName, 'access_secret')
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_key, access_secret)
api = tweepy.API(auth)
return api
- csv作成
initGetTweet3200.py
import tweepy
import csv
from tweetUtil import auth_api
import sys
envName = sys.argv[1]
accountName = sys.argv[2]
print(sys.argv)
# Twiter Auth
api = auth_api(envName)
# Get Tweet
tweet_data = []
for tweet in tweepy.Cursor(api.user_timeline, screen_name=accountName, exclude_replies=False).items():
tweet_data.append([tweet.id, tweet.created_at,
tweet.text.replace('\n', '').replace('\r', '')])
# out put csv
with open(f'tweets_{accountName}.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f, lineterminator='\n')
writer.writerows(tweet_data)
pass
差分ツイートの取得
getDiffWriteCsv.py
from tweetUtil import auth_api
from pprint import pprint
import csv
import tweepy
import sys
if __name__ == "__main__":
envName = sys.argv[1]
accountName = sys.argv[2]
print(sys.argv)
# Twiter Auth
api = auth_api(envName)
# csvからの読み込み
with open(f'tweets_{accountName}.csv') as f:
lstw = [i for i in csv.reader(f)]
# IDだけ抽出
lsNotInId = list(map(lambda x: int(x[0]), lstw))
# 差分を比較
addCsv = []
for tweet in tweepy.Cursor(api.user_timeline, screen_name=accountName, exclude_replies=False).items(100):
if tweet.id not in lsNotInId:
addCsv.append([tweet.id, tweet.created_at,
tweet.text.replace('\n', '').replace('\r', '')])
pprint(addCsv)
# csv追記
with open(f'tweets_{accountName}.csv', 'a', newline='', encoding='utf-8') as f:
writer = csv.writer(f, lineterminator='\n')
writer.writerows(addCsv)
pass
ツイートの作成
-
csvからマルコフ連鎖に必要なベースのテキストを抜き出す
- マルコフ連鎖のgenerate_textとwakatiはこの記事に記載がある
-
余計な文字列の除去
tweetUtil.py
def getRawText(target):
removedTarget = ""
try:
# URLと改行の除去
removedTarget = re.sub(r"(https?|ftp)(:\/\/[-_\.!~*\'()a-zA-Z0-9;\/?:\@&=\+$,%#]+)",
"", target).replace('\n', '').replace('\r', '')
# リプライ先の除去
removedTarget = re.sub(r"@([A-Za-z0-9_]+)", "", removedTarget)
removedTarget = removedTarget.replace(
'@', '').replace('#', '').replace('&', '')
except Exception as e:
print(f'{target} is {e}')
removedTarget = target
return removedTarget
- 本体
csvToGenerateText.py
from tweetUtil import auth_api, getRawText
import sys
import csv
from generate_text import wakati, generate_text
from tweetUtil import auth_api
if __name__ == "__main__":
envName = sys.argv[1]
accountName = sys.argv[2]
print(sys.argv)
# Twiter Auth
api = auth_api(envName)
# csvからの読み込み
baseStr = ""
with open(f'tweets_{accountName}.csv') as f:
reader = csv.reader(f)
# マルコフ連鎖するテキストの抽出
for row in reader:
try:
if "RT @" not in row[2]:
baseStr += row[2]
except Exception as e:
print(e)
tweet_text = generate_text(wakati(getRawText(baseStr)))
# trim
tweet_text_140 = tweet_text[0:139]
print(tweet_text_140)
api.update_status(tweet_text_140)
実行
フォルダ構造
tweetGenerate
| setting.ini
| tweetUtil.py
| initGetTweet3200.py
| getDiffWriteCsv.py
| csvToGenerateText.py
| tweets_$accountName.csv
Run
# ベーステキスト取得
python initGetTweet3200.py $envName $accountName
# 差分取得というか監視
python getDiffWriteCsv.py $envName $accountName
# ツイート作成
python csvToGeneceteText.py $envName $accountName