##説明
PHPでInstagramの特定ユーザの写真をスクレイピングする2018/3/20版(認証不要)のPython版
##クラスと実行例
###実行例
import sys
sys.path.append('./class')
import insta
if __name__ == "__main__":
ins = insta.insta()
ins.setQuery_hash('XXXXXXXXXXXXXXXXXXXXXXXX')
ins.get_media_id_user_feed("XXXXXXXXXXXXXXX",None,12)
medias = ins.get_media_by_user()
#取得した記事件数
print(len(medias))
#取得した記事を件数分回す
for m in medias:
typename = m["node"]["__typename"]
shortcode = m["node"]["shortcode"]
#print(typename)
if typename == "GraphImage":
print(m["node"]["id"])
pass
elif typename == "GraphSidecar":
ins.get_media_detail_by_shortcode(shortcode)
media_childrens = ins.get_media_childrens()
for c in media_childrens:
print(c["node"]["id"])
pass
elif typename == "GraphVideo":
print(shortcode)
ins.get_media_detail_by_shortcode(shortcode)
video = ins.get_video_by_shortcode()
print(video["video_url"])
else:
print(typename)
###クラス
import requests #https://github.com/kennethreitz/requests/
import json
class insta:
url = 'https://www.instagram.com/'
url_login = 'https://www.instagram.com/accounts/login/ajax/'
api_user_detail = 'https://i.instagram.com/api/v1/users/%s/info/'
url_user_detail = 'https://www.instagram.com/%s/?__a=1'
url_media_detail = 'https://www.instagram.com/p/%s/?__a=1'
def __init__(self):
self.s = requests.Session()
self.query_hash = ''
self.media_by_user = None
self.media_childrens = None
self.video_by_shortcode = None
def setQuery_hash(self,query_hash):
self.query_hash = query_hash
def get_media_id_user_feed(self, user_id, after=None, first=12):
variables = {
"id": user_id,
"first": first,
}
if after:
variables["after"] = after
url = "".join(["https://www.instagram.com/graphql/query/?",
"query_hash=" + self.query_hash + "&",
"variables=" + json.dumps(variables)])
r = self.s.get(url, headers="")
all_data = json.loads(r.text)
self.media_by_user = all_data["data"]["user"]["edge_owner_to_timeline_media"]["edges"]
def get_media_by_user(self):
return self.media_by_user
def get_media_detail_by_shortcode(self,shortcode):
try:
media_url = self.url_media_detail % shortcode
r = self.s.get(media_url, headers="")
#print(r.text)
all_data = json.loads(r.text)
if all_data["graphql"]["shortcode_media"]["__typename"] == "GraphSidecar":
if "edge_sidecar_to_children" in all_data["graphql"]["shortcode_media"]:
self.media_childrens = all_data["graphql"]["shortcode_media"]["edge_sidecar_to_children"]["edges"]
self.video_by_shortcode = None
elif all_data["graphql"]["shortcode_media"]["__typename"] == "GraphVideo":
self.video_by_shortcode = all_data["graphql"]["shortcode_media"]
self.media_childrens = None
#print(all_data)
except:
#print(all_data)
return False
def get_media_childrens(self):
return self.media_childrens
def get_video_by_shortcode(self):
return self.video_by_shortcode
##取得データ
配列構造表現するのに何かいいのないかなって思ってたらyamlっての発見
使い方正しいのかわからないが、いい感じなので使う
###get_media_id_user_feedで取得するall_dataの配列構造
2018/03/23時点
data:
user:
edge_owner_to_timeline_media:
count: XXX #(全記事数)
edges:
0: #(一つ目の記事)
node:
comments_disabled: false
dimensions:
height: 587
width: 1080
display_url: "https://"
edge_media_to_preview_like:
count:XX #(いいね数)
edge_media_to_cation:
edges:
0:
node:
text: "キャプション"
edge_media_to_comment:
count: XX #(コメント数)
id: “XXXXXXXXXXXXXXXXXXX”
is_video: false
owner:
id: “XXXXXX”
shortcode : “XXXXXXXXX”
taken_at_timestamp: 1521733017
thumbnail_resources:
0:
config_height: 150
config_width: 150
src: "https://scontent-~~~~~~~~~"
1:
config_height: 240
config_width: 240
src: "https://scontent-~~~~~~~~~"
2:
config_height: 320
config_width: 320
src: "https://scontent-~~~~~~~~~"
3:
config_height: 480
config_width: 480
src: "https://scontent-~~~~~~~~~"
4:
config_height: 640
config_width: 640
src: "https://scontent-~~~~~~~~~"
thumbnail_src: "https://scontent-~~~~~~~~~"
__typename: "GraphImage"
page_info:
end_cursor: “AQCeVVi1uheNSqSK85hsk4K1ljSZkjK1ZY2AC~~~~~~~~”
has_next_page: true
###get_media_detail_by_shortcodeで取得するall_dataの配列構造(GraphSidecar)
graphql:
shortcode_media:
caption_is_edited: false
comments_disabled: false
dimensions:
height: 937
width: 750
display_resources:
0:
config_height: 800
config_width: 640
src:"https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
1:
config_height: 973
config_width: 750
src:"https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
2:
config_height: 1350
config_width: 1080
src:"https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
display_url:
edge_media_preview_like:
count: 8755
edges:
0:
node:
id: "XXXXXXXX"
profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/~~~~~~~~~"
username: "xxxxxxxxxx"
9:
edge_media_to_caption:
edges:
0:
node:
text: "キャプション"
edge_media_to_comment:
count: 284
edges:
0:
node:
created_at: 1518973441
id: "XXXXXXXXXXXXXX"
owner:
id: "XXXXXXXX"
profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/~~~~~~~~~"
username: "xxxxxxxxxx"
text: "コメント"
38:
page_info:
end_cursor: "AQC4-dKEHCd1J1CeoS58T6ifFDoxqjO22MT3LaxwbpIGtGCXD_E~~~~~"
has_next_page: sure
edge_media_to_sponsor_user:
edges: []
edge_media_to_tagged_user:
edges: []
edge_sidecar_to_children:
edges:
0:
node:
dimensions:
height: 937
width: 750
display_resources:
display_url: ""
edge_media_to_tagged_user:
edges: []
getting_info: null
is_video: false
media_preview: null
shortcode: "XXXXXX"
shuould_log_client_event: false
tracking_token: "eyJ2ZXJzaW9uIjo1LCJwYXlsb2FkIjp7ImlzX2FuYWx5dGljc190cmFj~~~~"
__typename: "GraphImage"
1:
edge_web_media_to_related_media:
edges: []
gating_info: null
id: "XXXXXXXXXXXXXXXXXXX"
is_ad: false
is_video: false
location: null
media_preview: null
owner:
blocked_by_viewer: false
followed_by_viewer: false
full_name: "名前"
has_blocked_viewer: false
id: "XXXXXXXXX"
is_private: false
is_unpublished: false
is_verified: true
profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/vp/~~~~~~~~~~~"
requested_by_viewer: false
username: "xxxxxxx"
shortcode: "XXXXXXX"
should_log_client_event: false
taken_at_timestamp: 1518938669
tracking_token: "eyJ2ZXJzaW9uIjo1LCJwYXlsb2FkIjp7ImlzX2FuYWx5dGl~~~~~~~~"
video_view_count: 24095
viewer_has_liked: false
viewer_has_saved: false
viewer_has_saved_to_collection: false
__typename: "GraphSidecar"
###get_media_detail_by_shortcodeで取得するall_dataの配列構造(video)
graphql:
shortcode_media:
caption_is_edited: true
comments_disabled: false
dash_info:
is_dash_eligible: true
number_of_qualities: 4
video_dash_manifest: "<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" minBufferTi~~~~~"
dimensions:
height: 937
width: 750
display_resources:
0:
config_height: 800
config_width: 640
src: "https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
1:
config_height: 973
config_width: 750
src: "https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
2:
config_height: 1350
config_width: 1080
src: "https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
display_url:
edge_media_preview_like:
count: 8755
edges:
0:
node:
id: "XXXXXXXX"
profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/~~~~~~~~~"
username: "xxxxxxxxxx"
9:
edge_media_to_caption:
edges:
0:
node:
text: "キャプション"
edge_media_to_comment:
count: 284
edges:
0:
node:
created_at: 1518973441
id: "XXXXXXXXXXXXXX"
owner:
id: "XXXXXXXX"
profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/~~~~~~~~~"
username: "xxxxxxxxxx"
text: "コメント"
38:
edge_media_to_sponsor_user:
edges: []
edge_media_to_tagged_user:
edges: []
edge_web_media_to_related_media:
edges:
0:
node:
shortcode: "XXXXXXX"
thumnail_src: "https://scontent-nrt1-1.cdninstagram.com/~~~~~"
19:
gating_info: null
id: "XXXXXXXXXXXXXXXXXXX"
is_ad: false
is_video: true
location:
has_public_page: true
id: "XXXXXXXXXXXXXXXXX"
name: "ロケーション名"
slug: ""
media_preview:
owner:
blocked_by_viewer: false
followed_by_viewer: true
full_name: "名前"
has_blocked_viewer: false
id: "XXXXXXXXX"
is_private: false
is_unpublished: false
is_verified: true
profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/vp/~~~~~~~~~~~"
requested_by_viewer: false
username: "xxxxxxx"
shortcode: "XXXXXXX"
should_log_client_event: false
taken_at_timestamp: 1518938669
tracking_token: "eyJ2ZXJzaW9uIjo1LCJwYXlsb2FkIjp7ImlzX2FuYWx5dGl~~~~~~~~"
video_url: "https://scontent-nrt1-1.cdninstagram.com/vp/~~~~~~~~~~~"
video_view_count: 24095
viewer_has_liked: false
viewer_has_saved: false
viewer_has_saved_to_collection: false
__typename: "GraphVideo"
##参考