LoginSignup
1
3

More than 1 year has passed since last update.

Python × Twitter API ツイート種類別にデータ取得する

Posted at

キーワードを含むツイート数

累計の場合

search.py
import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
keyword = '五反田'
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# 検索実行
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        tweets_data.append(tweet.full_text.strip().replace('\n', '。') + '\n')
    total_tweets_data.append(len(tweets_data))

# キーワードを含むツイート数(累計)を表示
for i in total_tweets_data:
    print('-------------------------------------------')
    print(i)

実行結果

Image from Gyazo

当日の場合

search.py
import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
keyword = '五反田'
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# 検索実行
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        tweets_data.append(tweet.full_text.strip().replace('\n', '。') + '\n')
    total_tweets_data.append(len(tweets_data))

# キーワードを含むツイート数(当日)
for i in range(len(total_tweets_data)):
    if i > 0:
        day_tweets_data.append(
            total_tweets_data[i] - total_tweets_data[i - 1])
    else:
        day_tweets_data.append(total_tweets_data[i])

# キーワードを含むツイート数(当日)を表示
for i in day_tweets_data:
    print('-------------------------------------------')
    print(i)

実行結果

Image from Gyazo

キーワードを含むRT数

累計の場合

search.py
import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
exclude_RT_data = []
total_exclude_RT_data = []
day_exclude_RT_data = []
total_RT_data = []
day_RT_data = []
keyword = '五反田'
key = keyword.ljust(len(keyword) + 1) + "exclude:retweets"
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# キーワードを含むツイート数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        tweets_data.append(tweet.full_text.strip().replace('\n', '。') + '\n')
    total_tweets_data.append(len(tweets_data))

# キーワードを含む && RTを除くツイート数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=key,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        total_exclude_RT_data.append(
            tweet.full_text.strip().replace(
                '\n', '。') + '\n')
    exclude_RT_data.append(len(total_exclude_RT_data))

# ツイート数 - RTを除くツイート数 = RT数
total_RT_data = [x - y for (x, y) in zip(total_tweets_data, exclude_RT_data)]

# キーワードを含むRT数(累計)を表示
for i in total_RT_data:
    print('-------------------------------------------')
    print(i)

実行結果

Image from Gyazo

当日の場合

search.py
import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
exclude_RT_data = []
total_exclude_RT_data = []
day_exclude_RT_data = []
total_RT_data = []
day_RT_data = []
keyword = '五反田'
key = keyword.ljust(len(keyword) + 1) + "exclude:retweets"
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# キーワードを含むツイート数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        tweets_data.append(tweet.full_text.strip().replace('\n', '。') + '\n')
    total_tweets_data.append(len(tweets_data))

# キーワードを含む && RTを除くツイート数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=key,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        total_exclude_RT_data.append(
            tweet.full_text.strip().replace(
                '\n', '。') + '\n')
    exclude_RT_data.append(len(total_exclude_RT_data))

# ツイート数 - RTを除くツイート数 = RT数
total_RT_data = [x - y for (x, y) in zip(total_tweets_data, exclude_RT_data)]

# キーワードを含むRT数(当日)
for i in range(len(total_RT_data)):
    if i > 0:
        day_RT_data.append(total_RT_data[i] - total_RT_data[i - 1])
    else:
        day_RT_data.append(total_RT_data[i])

# キーワードを含むRT数(当日)を表示
for i in day_RT_data:
    print('-------------------------------------------')
    print(i)

実行結果

Image from Gyazo

キーワードを含む引用数

累計の場合

search.py
import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
exclude_RT_data = []
total_exclude_RT_data = []
day_exclude_RT_data = []
total_RT_data = []
day_RT_data = []
quote_data = []
total_quote_data = []
day_quote_data = []
keyword = '五反田'
key = keyword.ljust(len(keyword) + 1) + "exclude:retweets"
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# キーワードを含む引用数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        quote_data.append(tweet.is_quote_status)
    total_quote_data.append(quote_data.count(True))

# キーワードを含む引用数(累計)を表示
for i in total_quote_data:
    print('-------------------------------------------')
    print(i)

実行結果

Image from Gyazo

当日の場合

search.py
import tweepy
import datetime

consumer_key = ""
consumer_secret = ""
access_token = ""
access_token_secret = ""

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

tweets_data = []
total_tweets_data = []
day_tweets_data = []
exclude_RT_data = []
total_exclude_RT_data = []
day_exclude_RT_data = []
total_RT_data = []
day_RT_data = []
quote_data = []
total_quote_data = []
day_quote_data = []
keyword = '五反田'
key = keyword.ljust(len(keyword) + 1) + "exclude:retweets"
since = []
until = []

# 日付をdatetimeオブジェクトに変更
sinceForm = '20210428'
untilForm = '20210501'
sinceDate = datetime.datetime.strptime(sinceForm, '%Y%m%d')
untilDate = datetime.datetime.strptime(
    untilForm, '%Y%m%d') + datetime.timedelta(days=1)
diff = (untilDate - sinceDate).days

# since, untilリストを作成
for i in range(diff):
    since.append(sinceDate + datetime.timedelta(days=i))

for i in range(diff):
    until.append(untilDate + datetime.timedelta(days=-i))
until.reverse()

# キーワードを含む引用数(累計)を表示
for i in range(diff):
    for tweet in tweepy.Cursor(
            api.search,
            tweet_mode='extended',
            q=keyword,
            lang='ja',
            since=since[i],
            until=until[i]).items():
        quote_data.append(tweet.is_quote_status)
    total_quote_data.append(quote_data.count(True))

# キーワードを含む引用数(当日)
for i in range(len(total_quote_data)):
    if i > 0:
        day_quote_data.append(total_quote_data[i] - total_quote_data[i - 1])
    else:
        day_quote_data.append(total_quote_data[i])

# キーワードを含む引用数(当日)を表示
for i in day_quote_data:
    print('-------------------------------------------')
    print(i)

実行結果

Image from Gyazo

参考文献

1
3
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
3