More than 3 years have passed since last update.

Twitterのタイムライン取得

Last updated at 2020-12-18Posted at 2019-06-16

特に真新しいことでもないけど、忘れないように書いておきます。
※2020/05/03 いくつか修正しました。
※2020/05/28 動画に対応しました。

タイムラインの取得

TwitterのAPI Keyは別ファイルに記載しておくと使いまわすことができます。

(key.py)

from requests_oauthlib import OAuth1Session

API_KEY = "******************"
API_SECRET = "******************"
ACCESS_TOKEN = "******************"
ACCESS_SECRET = "******************"

# 以下の関数をimportして使用する
def CreateOAuthSession():
    return OAuth1Session(API_KEY, API_SECRET, ACCESS_TOKEN, ACCESS_SECRET)

昔はConsumer Keyだったようですが、今はAPI Keyと記載されています。

タイムライン取得プログラム本体。

import key
import json

# ログイン認証
twitter = key.CreateOAuthSession()
url = "https://api.twitter.com/1.1/statuses/user_timeline.json"

num = input(str("何件のツイートを表示しますか？"))
params ={'count' : num}
req = twitter.get(url, params = params)

print('----------------------------------------------------')

if req.status_code == 200:
    timeline = json.loads(req.text)
    for tweet in timeline:
        print(tweet['user']['name']+'::'+tweet['text'])
        print(tweet['created_at'])
        print('----------------------------------------------------')
else:
    print("ERROR: %d" % req.status_code)

パラメータについて

params には、何を取得するのか指定できる。例えば以下のようにします。
なお、取得できるツイート数は、1回200件までで、15分毎に900回までの制限があります。

userID = input("userID:")                            # 取得するユーザーID(@は除く)
GET_AT_ONCE = int(input("Get at once(max:200):"))    # 一度に取得するtweet数(Max=200)
params = {
    "screen_name":userID,       # 取得するアカウント名(@除く)
    "count":GET_AT_ONCE,        # 取得するツイート数
    "include_entities":True,    # 画像や動画を含むかどうか
    "exclude_replies":False,    # リプライ(返信)を含まないかどうか(Falseで含まない)
    "include_rts":True          # リツイートを含むかどうか
}

投稿日時について

Twitterの時刻はUTCなのでJSTより9時間遅れる。時刻を補正する場合は以下のようにします。
フォーマットがdatetimeにはないので、time.struct_timeから変換します。
pytzはインストールが必要(pip install pytz)

import datetime, pytz, time
def change_time(created_at):
    st = time.strptime(created_at, '%a %b %d %H:%M:%S +0000 %Y')        # time.struct_timeに変換
    utc_time = datetime.datetime(st.tm_year, st.tm_mon,st.tm_mday, \
        st.tm_hour,st.tm_min,st.tm_sec, tzinfo=datetime.timezone.utc)   # datetimeに変換(timezoneを付与)
    jst_time = utc_time.astimezone(pytz.timezone("Asia/Tokyo"))         # 日本時間に変換
    str_time = jst_time.strftime("%Y-%m-%d_%H%M%S")                     # 文字列で返す
    return str_time

画像の収集

方針
・アカウントを指定して画像を一括保存する
・オリジナル画質で取得する
・ファイル名は投稿日時を使用する

ソースコード全文は以下。
画像とリンクが同じツイートに含まれている場合に、画像の取得ができないという、そういう仕様みたいです。
extended_entitiesが出てこないんです。
違うAPIで取得できるのかもしれないけど、ご存じの方いらっしゃったら教えてくださいヾ('□'*)ﾉ
※2020/05/03 追記
本文やURLの合計が140文字を超えると、extended_entitiesが出力されなくなります。
これを回避するには、Resource URLに"tweet_mode=extended"を付けます。
得られるjsonのフォーマットが変わるようですが未確認(＾-＾；
詳細は下記。
https://developer.twitter.com/en/docs/tweets/tweet-updates

import key
import json
import os
import sys
import datetime, pytz, time
import urllib.request

# Resource URL
TL = "https://api.twitter.com/1.1/statuses/user_timeline.json?tweet_mode=extended"

# 入力
userID = input("userID:")
GET_AT_ONCE = int(input("Get at once(max:200):"))
GET_COUNT = int(input("Get count(max:900/15min):"))

params = {
    "screen_name":userID,       # 取得するアカウント名(@除く)
    "count":GET_AT_ONCE,        # 取得するツイート数
    "include_entities":True,    # 画像や動画を含むかどうか
    "exclude_replies":False,    # リプライ(返信)を含まないかどうか(Falseで含まない)
    "include_rts":False         # リツイートを含むかどうか
}

# Global
timeline = str("")
last_id = str("")
old_id = str("0")

# 画像の保存場所
foldername = "./images/"+userID

# ログイン認証
twitter = key.CreateOAuthSession()

# タイムラインの取得
def getTL():
    global timeline
    req = twitter.get(TL, params=params)
    timeline = json.loads(req.text)

def change_time(created_at):
    st = time.strptime(created_at, '%a %b %d %H:%M:%S +0000 %Y')
    utc_time = datetime.datetime(st.tm_year, st.tm_mon,st.tm_mday, \
        st.tm_hour,st.tm_min,st.tm_sec, tzinfo=datetime.timezone.utc)
    jst_time = utc_time.astimezone(pytz.timezone("Asia/Tokyo"))
    return(jst_time.strftime("%Y-%m-%d_%H%M%S"))

def saveImg():
    global timeline, last_id
    for content in timeline:    # GET_AT_ONCEで取得した全部のツイートが含まれるので1つずつ処理していく
        # ツイート時刻を取得
        tweet_date = change_time(content["created_at"])
 
        if "extended_entities" in content:  # 画像や動画がある場合
            count = 0
            for photo in content["extended_entities"]["media"]:
                if "video_info" in photo:   # 動画がある場合はここでとれる
                    for video in photo["video_info"]["variants"]:
                        # bitrateが小さい順に並んでいるので上書きして最大のものを取得
                        if "mp4" in video["url"]:
                            video_url = video["url"]

                    filename = foldername + "/" + tweet_date + ".mp4"
                    try:
                        urllib.request.urlretrieve(video_url, filename)
                    except:
                        print("error")
                    else:
                        print("Video is saved successfully (" + tweet_date + ")")

                # 画像のURLをセット
                image_url = photo["media_url"] + ":orig"
                # ファイル名を作成
                if count == 0:
                    filename = foldername + "/" + tweet_date + ".jpg"
                else:
                    filename = foldername + "/" + tweet_date + "_" + str(count) + ".jpg"

                try:
                    urllib.request.urlretrieve(image_url, filename)
                except:
                    print("error")
                else:
                    print("Image is saved successfully (" + tweet_date + ")")
                finally:
                    time.sleep(0.1)
                    count = count + 1

                    last_id = content["id"]     # 最後に保存したIDを記録

        else:
            continue

if __name__ == "__main__":
    if not os.path.exists(foldername):  # 保存先がない場合は作成する
        os.makedirs(foldername)
    
    for i in range(0, GET_COUNT):
        if i == 0:
            params = params
        else:
            if old_id == last_id:
                break  # 終了
            else:
                old_id = last_id
                params.update({"max_id":last_id})   # 続きから取得する
    
        getTL()
        saveImg()
        print("After waiting for 10 seconds, perform the next acquisition.")
        time.sleep(10)

sleepは怒られないために一応入れてある程度。数字に根拠はないです。

※コード内で、「bitrateが小さい順に並んでいるので上書きして最大のものを取得」としていましたが、必ずしも順番に並んでいるわけではないようです。もうひと工夫が必要。

おわりに

多くの方にストックされているようですが、ストックする際はぜひLGTMも一緒にクリックしていただけると励みになります。
ぜひお願いします(⌒▽⌒)

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up