LoginSignup
12
19

More than 5 years have passed since last update.

PythonでInstagramの特定ユーザの写真をスクレイピングする2018/3/22版(認証不要)

Last updated at Posted at 2018-03-22

説明

PHPでInstagramの特定ユーザの写真をスクレイピングする2018/3/20版(認証不要)のPython版

クラスと実行例

実行例

import sys
sys.path.append('./class')
import insta


if __name__ == "__main__":

  ins = insta.insta()


  ins.setQuery_hash('XXXXXXXXXXXXXXXXXXXXXXXX')


  ins.get_media_id_user_feed("XXXXXXXXXXXXXXX",None,12)

  medias = ins.get_media_by_user()

  #取得した記事件数
  print(len(medias))

  #取得した記事を件数分回す
  for m in medias:
    typename = m["node"]["__typename"]
    shortcode = m["node"]["shortcode"]
    #print(typename)
    if typename == "GraphImage":
        print(m["node"]["id"])
        pass
    elif typename == "GraphSidecar":
      ins.get_media_detail_by_shortcode(shortcode)
      media_childrens = ins.get_media_childrens()
      for c in media_childrens:
        print(c["node"]["id"])
        pass
    elif typename == "GraphVideo":
      print(shortcode)
      ins.get_media_detail_by_shortcode(shortcode)
      video = ins.get_video_by_shortcode()
      print(video["video_url"])
    else:
      print(typename)

クラス


import requests #https://github.com/kennethreitz/requests/
import json


class insta:

  url = 'https://www.instagram.com/'
  url_login = 'https://www.instagram.com/accounts/login/ajax/'
  api_user_detail = 'https://i.instagram.com/api/v1/users/%s/info/'
  url_user_detail = 'https://www.instagram.com/%s/?__a=1'
  url_media_detail = 'https://www.instagram.com/p/%s/?__a=1'


  def __init__(self):
    self.s = requests.Session()
    self.query_hash = ''
    self.media_by_user = None
    self.media_childrens = None
    self.video_by_shortcode = None

  def setQuery_hash(self,query_hash):
    self.query_hash = query_hash


  def get_media_id_user_feed(self, user_id, after=None, first=12):
    variables = {
      "id": user_id,
      "first": first,
    }

    if after:
      variables["after"] = after

    url = "".join(["https://www.instagram.com/graphql/query/?",
                                 "query_hash=" + self.query_hash + "&",
                                 "variables=" + json.dumps(variables)])

    r = self.s.get(url, headers="")
    all_data = json.loads(r.text)
    self.media_by_user = all_data["data"]["user"]["edge_owner_to_timeline_media"]["edges"]


  def get_media_by_user(self):
    return self.media_by_user


  def get_media_detail_by_shortcode(self,shortcode):

    try:
      media_url = self.url_media_detail % shortcode
      r = self.s.get(media_url, headers="")
      #print(r.text)
      all_data = json.loads(r.text)
      if all_data["graphql"]["shortcode_media"]["__typename"] == "GraphSidecar":
        if "edge_sidecar_to_children" in all_data["graphql"]["shortcode_media"]:
          self.media_childrens = all_data["graphql"]["shortcode_media"]["edge_sidecar_to_children"]["edges"]
          self.video_by_shortcode = None
      elif all_data["graphql"]["shortcode_media"]["__typename"] == "GraphVideo":
        self.video_by_shortcode = all_data["graphql"]["shortcode_media"]
        self.media_childrens = None
      #print(all_data)
    except:
      #print(all_data)
      return False


  def get_media_childrens(self):
    return self.media_childrens

  def get_video_by_shortcode(self):
    return self.video_by_shortcode


取得データ

配列構造表現するのに何かいいのないかなって思ってたらyamlっての発見
使い方正しいのかわからないが、いい感じなので使う

get_media_id_user_feedで取得するall_dataの配列構造

2018/03/23時点




data:
  user:
    edge_owner_to_timeline_media:
      count: XXX #(全記事数)
      edges:
        0: #(一つ目の記事)
          node:
            comments_disabled: false
            dimensions:
              height: 587
              width: 1080
            display_url: "https://"
            edge_media_to_preview_like:
              count:XX #(いいね数)
            edge_media_to_cation:
              edges:
                0:
                  node:
                    text: "キャプション"
            edge_media_to_comment:
              count: XX #(コメント数)
            id: “XXXXXXXXXXXXXXXXXXX”
            is_video: false
            owner:
              id: “XXXXXX”
            shortcode : “XXXXXXXXX”
            taken_at_timestamp: 1521733017
            thumbnail_resources:
              0:
                config_height: 150
                config_width: 150
                src: "https://scontent-~~~~~~~~~"
              1:
                config_height: 240
                config_width: 240
                src: "https://scontent-~~~~~~~~~"
              2:
                config_height: 320
                config_width: 320
                src: "https://scontent-~~~~~~~~~"
              3:
                config_height: 480
                config_width: 480
                src: "https://scontent-~~~~~~~~~"
              4:
                config_height: 640
                config_width: 640
                src: "https://scontent-~~~~~~~~~"
            thumbnail_src: "https://scontent-~~~~~~~~~"
            __typename: "GraphImage"
      page_info:
        end_cursor: “AQCeVVi1uheNSqSK85hsk4K1ljSZkjK1ZY2AC~~~~~~~~”
        has_next_page: true

get_media_detail_by_shortcodeで取得するall_dataの配列構造(GraphSidecar)


graphql:
  shortcode_media:
    caption_is_edited: false
    comments_disabled: false
    dimensions:
      height: 937
      width: 750
    display_resources:
      0:
        config_height: 800
        config_width: 640
        src:"https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
      1:
        config_height: 973
        config_width: 750
        src:"https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
      2:
        config_height: 1350
        config_width: 1080
        src:"https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
    display_url:
    edge_media_preview_like:
      count: 8755
      edges:
        0:
          node:
            id: "XXXXXXXX"
            profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/~~~~~~~~~"
            username: "xxxxxxxxxx"
        9:
    edge_media_to_caption:
      edges:
        0:
          node:
            text: "キャプション"
    edge_media_to_comment:
      count: 284
      edges:
        0:
          node:
            created_at: 1518973441
            id: "XXXXXXXXXXXXXX"
            owner:
              id: "XXXXXXXX"
              profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/~~~~~~~~~"
              username: "xxxxxxxxxx"
            text: "コメント"
        38:
       page_info:
         end_cursor: "AQC4-dKEHCd1J1CeoS58T6ifFDoxqjO22MT3LaxwbpIGtGCXD_E~~~~~"
         has_next_page: sure
    edge_media_to_sponsor_user:
      edges: []
    edge_media_to_tagged_user:
      edges: []
    edge_sidecar_to_children:
      edges:
        0:
          node:
            dimensions:
              height: 937
              width: 750
            display_resources:
            display_url: ""
            edge_media_to_tagged_user:
              edges: []
            getting_info: null
            is_video: false
            media_preview: null
            shortcode: "XXXXXX"
            shuould_log_client_event: false
            tracking_token: "eyJ2ZXJzaW9uIjo1LCJwYXlsb2FkIjp7ImlzX2FuYWx5dGljc190cmFj~~~~"
            __typename: "GraphImage"
        1:
    edge_web_media_to_related_media:
      edges: []
    gating_info: null
    id: "XXXXXXXXXXXXXXXXXXX"
    is_ad: false
    is_video: false
    location: null
    media_preview: null
    owner:
      blocked_by_viewer: false
      followed_by_viewer: false
      full_name: "名前"
      has_blocked_viewer: false
      id: "XXXXXXXXX"
      is_private: false
      is_unpublished: false
      is_verified: true
      profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/vp/~~~~~~~~~~~"
      requested_by_viewer: false
      username: "xxxxxxx"
    shortcode: "XXXXXXX"
    should_log_client_event: false
    taken_at_timestamp: 1518938669
    tracking_token: "eyJ2ZXJzaW9uIjo1LCJwYXlsb2FkIjp7ImlzX2FuYWx5dGl~~~~~~~~"
    video_view_count: 24095
    viewer_has_liked: false
    viewer_has_saved: false
    viewer_has_saved_to_collection: false
    __typename: "GraphSidecar"

get_media_detail_by_shortcodeで取得するall_dataの配列構造(video)

graphql:
  shortcode_media:
    caption_is_edited: true
    comments_disabled: false
    dash_info:
      is_dash_eligible: true
      number_of_qualities: 4
      video_dash_manifest: "<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" minBufferTi~~~~~"
    dimensions:
      height: 937
      width: 750
    display_resources:
      0:
        config_height: 800
        config_width: 640
        src: "https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
      1:
        config_height: 973
        config_width: 750
        src: "https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
      2:
        config_height: 1350
        config_width: 1080
        src: "https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
    display_url:
    edge_media_preview_like:
      count: 8755
      edges:
        0:
          node:
            id: "XXXXXXXX"
            profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/~~~~~~~~~"
            username: "xxxxxxxxxx"
        9:
    edge_media_to_caption:
      edges:
        0:
          node:
            text: "キャプション"
    edge_media_to_comment:
      count: 284
      edges:
        0:
          node:
            created_at: 1518973441
            id: "XXXXXXXXXXXXXX"
            owner:
              id: "XXXXXXXX"
              profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/~~~~~~~~~"
              username: "xxxxxxxxxx"
            text: "コメント"
        38:
    edge_media_to_sponsor_user:
      edges: []
    edge_media_to_tagged_user:
      edges: []
    edge_web_media_to_related_media:
      edges:
        0:
          node:
            shortcode: "XXXXXXX"
            thumnail_src: "https://scontent-nrt1-1.cdninstagram.com/~~~~~"
        19:
    gating_info: null
    id: "XXXXXXXXXXXXXXXXXXX"
    is_ad: false
    is_video: true
    location:
      has_public_page: true
      id: "XXXXXXXXXXXXXXXXX"
      name: "ロケーション名"
      slug: ""
    media_preview:
    owner:
      blocked_by_viewer: false
      followed_by_viewer: true
      full_name: "名前"
      has_blocked_viewer: false
      id: "XXXXXXXXX"
      is_private: false
      is_unpublished: false
      is_verified: true
      profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/vp/~~~~~~~~~~~"
      requested_by_viewer: false
      username: "xxxxxxx"
    shortcode: "XXXXXXX"
    should_log_client_event: false
    taken_at_timestamp: 1518938669
    tracking_token: "eyJ2ZXJzaW9uIjo1LCJwYXlsb2FkIjp7ImlzX2FuYWx5dGl~~~~~~~~"
    video_url: "https://scontent-nrt1-1.cdninstagram.com/vp/~~~~~~~~~~~"
    video_view_count: 24095
    viewer_has_liked: false
    viewer_has_saved: false
    viewer_has_saved_to_collection: false
    __typename: "GraphVideo"


参考

12
19
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
12
19