Python
Instagram
python3

PythonでInstagramの特定ユーザの写真をスクレイピングする2018/3/22版(認証不要)

説明

PHPでInstagramの特定ユーザの写真をスクレイピングする2018/3/20版(認証不要)のPython版

クラスと実行例

実行例

import sys
sys.path.append('./class')
import insta


if __name__ == "__main__":

  ins = insta.insta()


  ins.setQuery_hash('XXXXXXXXXXXXXXXXXXXXXXXX')


  ins.get_media_id_user_feed("XXXXXXXXXXXXXXX",None,12)

  medias = ins.get_media_by_user()

  #取得した記事件数
  print(len(medias))

  #取得した記事を件数分回す
  for m in medias:
    typename = m["node"]["__typename"]
    shortcode = m["node"]["shortcode"]
    #print(typename)
    if typename == "GraphImage":
        print(m["node"]["id"])
        pass
    elif typename == "GraphSidecar":
      ins.get_media_detail_by_shortcode(shortcode)
      media_childrens = ins.get_media_childrens()
      for c in media_childrens:
        print(c["node"]["id"])
        pass
    elif typename == "GraphVideo":
      print(shortcode)
      ins.get_media_detail_by_shortcode(shortcode)
      video = ins.get_video_by_shortcode()
      print(video["video_url"])
    else:
      print(typename)

クラス

import requests #https://github.com/kennethreitz/requests/
import json


class insta:

  url = 'https://www.instagram.com/'
  url_login = 'https://www.instagram.com/accounts/login/ajax/'
  api_user_detail = 'https://i.instagram.com/api/v1/users/%s/info/'
  url_user_detail = 'https://www.instagram.com/%s/?__a=1'
  url_media_detail = 'https://www.instagram.com/p/%s/?__a=1'


  def __init__(self):
    self.s = requests.Session()
    self.query_hash = ''
    self.media_by_user = None
    self.media_childrens = None
    self.video_by_shortcode = None

  def setQuery_hash(self,query_hash):
    self.query_hash = query_hash


  def get_media_id_user_feed(self, user_id, after=None, first=12):
    variables = {
      "id": user_id,
      "first": first,
    }

    if after:
      variables["after"] = after

    url = "".join(["https://www.instagram.com/graphql/query/?",
                                 "query_hash=" + self.query_hash + "&",
                                 "variables=" + json.dumps(variables)])

    r = self.s.get(url, headers="")
    all_data = json.loads(r.text)
    self.media_by_user = all_data["data"]["user"]["edge_owner_to_timeline_media"]["edges"]


  def get_media_by_user(self):
    return self.media_by_user


  def get_media_detail_by_shortcode(self,shortcode):

    try:
      media_url = self.url_media_detail % shortcode
      r = self.s.get(media_url, headers="")
      #print(r.text)
      all_data = json.loads(r.text)
      if all_data["graphql"]["shortcode_media"]["__typename"] == "GraphSidecar":
        if "edge_sidecar_to_children" in all_data["graphql"]["shortcode_media"]:
          self.media_childrens = all_data["graphql"]["shortcode_media"]["edge_sidecar_to_children"]["edges"]
          self.video_by_shortcode = None
      elif all_data["graphql"]["shortcode_media"]["__typename"] == "GraphVideo":
        self.video_by_shortcode = all_data["graphql"]["shortcode_media"]
        self.media_childrens = None
      #print(all_data)
    except:
      #print(all_data)
      return False


  def get_media_childrens(self):
    return self.media_childrens

  def get_video_by_shortcode(self):
    return self.video_by_shortcode


取得データ

配列構造表現するのに何かいいのないかなって思ってたらyamlっての発見
使い方正しいのかわからないが、いい感じなので使う

get_media_id_user_feedで取得するall_dataの配列構造

2018/03/23時点



data:
  user:
    edge_owner_to_timeline_media:
      count: XXX #(全記事数)
      edges:
        0: #(一つ目の記事)
          node:
            comments_disabled: false
            dimensions:
              height: 587
              width: 1080
            display_url: "https://"
            edge_media_to_preview_like:
              count:XX #(いいね数)
            edge_media_to_cation:
              edges:
                0:
                  node:
                    text: "キャプション"
            edge_media_to_comment:
              count: XX #(コメント数)
            id: “XXXXXXXXXXXXXXXXXXX”
            is_video: false
            owner:
              id: “XXXXXX”
            shortcode : “XXXXXXXXX”
            taken_at_timestamp: 1521733017
            thumbnail_resources:
              0:
                config_height: 150
                config_width: 150
                src: "https://scontent-~~~~~~~~~"
              1:
                config_height: 240
                config_width: 240
                src: "https://scontent-~~~~~~~~~"
              2:
                config_height: 320
                config_width: 320
                src: "https://scontent-~~~~~~~~~"
              3:
                config_height: 480
                config_width: 480
                src: "https://scontent-~~~~~~~~~"
              4:
                config_height: 640
                config_width: 640
                src: "https://scontent-~~~~~~~~~"
            thumbnail_src: "https://scontent-~~~~~~~~~"
            __typename: "GraphImage"
      page_info:
        end_cursor: “AQCeVVi1uheNSqSK85hsk4K1ljSZkjK1ZY2AC~~~~~~~~”
        has_next_page: true

get_media_detail_by_shortcodeで取得するall_dataの配列構造(GraphSidecar)

graphql:
  shortcode_media:
    caption_is_edited: false
    comments_disabled: false
    dimensions:
      height: 937
      width: 750
    display_resources:
      0:
        config_height: 800
        config_width: 640
        src:"https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
      1:
        config_height: 973
        config_width: 750
        src:"https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
      2:
        config_height: 1350
        config_width: 1080
        src:"https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
    display_url:
    edge_media_preview_like:
      count: 8755
      edges:
        0:
          node:
            id: "XXXXXXXX"
            profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/~~~~~~~~~"
            username: "xxxxxxxxxx"
        9:
    edge_media_to_caption:
      edges:
        0:
          node:
            text: "キャプション"
    edge_media_to_comment:
      count: 284
      edges:
        0:
          node:
            created_at: 1518973441
            id: "XXXXXXXXXXXXXX"
            owner:
              id: "XXXXXXXX"
              profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/~~~~~~~~~"
              username: "xxxxxxxxxx"
            text: "コメント"
        38:
       page_info:
         end_cursor: "AQC4-dKEHCd1J1CeoS58T6ifFDoxqjO22MT3LaxwbpIGtGCXD_E~~~~~"
         has_next_page: sure
    edge_media_to_sponsor_user:
      edges: []
    edge_media_to_tagged_user:
      edges: []
    edge_sidecar_to_children:
      edges:
        0:
          node:
            dimensions:
              height: 937
              width: 750
            display_resources:
            display_url: ""
            edge_media_to_tagged_user:
              edges: []
            getting_info: null
            is_video: false
            media_preview: null
            shortcode: "XXXXXX"
            shuould_log_client_event: false
            tracking_token: "eyJ2ZXJzaW9uIjo1LCJwYXlsb2FkIjp7ImlzX2FuYWx5dGljc190cmFj~~~~"
            __typename: "GraphImage"
        1:
    edge_web_media_to_related_media:
      edges: []
    gating_info: null
    id: "XXXXXXXXXXXXXXXXXXX"
    is_ad: false
    is_video: false
    location: null
    media_preview: null
    owner:
      blocked_by_viewer: false
      followed_by_viewer: false
      full_name: "名前"
      has_blocked_viewer: false
      id: "XXXXXXXXX"
      is_private: false
      is_unpublished: false
      is_verified: true
      profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/vp/~~~~~~~~~~~"
      requested_by_viewer: false
      username: "xxxxxxx"
    shortcode: "XXXXXXX"
    should_log_client_event: false
    taken_at_timestamp: 1518938669
    tracking_token: "eyJ2ZXJzaW9uIjo1LCJwYXlsb2FkIjp7ImlzX2FuYWx5dGl~~~~~~~~"
    video_view_count: 24095
    viewer_has_liked: false
    viewer_has_saved: false
    viewer_has_saved_to_collection: false
    __typename: "GraphSidecar"

get_media_detail_by_shortcodeで取得するall_dataの配列構造(video)

graphql:
  shortcode_media:
    caption_is_edited: true
    comments_disabled: false
    dash_info:
      is_dash_eligible: true
      number_of_qualities: 4
      video_dash_manifest: "<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" minBufferTi~~~~~"
    dimensions:
      height: 937
      width: 750
    display_resources:
      0:
        config_height: 800
        config_width: 640
        src: "https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
      1:
        config_height: 973
        config_width: 750
        src: "https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
      2:
        config_height: 1350
        config_width: 1080
        src: "https://scontent-nrt1-1.cdninstagram.com~~~~~~~~~~"
    display_url:
    edge_media_preview_like:
      count: 8755
      edges:
        0:
          node:
            id: "XXXXXXXX"
            profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/~~~~~~~~~"
            username: "xxxxxxxxxx"
        9:
    edge_media_to_caption:
      edges:
        0:
          node:
            text: "キャプション"
    edge_media_to_comment:
      count: 284
      edges:
        0:
          node:
            created_at: 1518973441
            id: "XXXXXXXXXXXXXX"
            owner:
              id: "XXXXXXXX"
              profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/~~~~~~~~~"
              username: "xxxxxxxxxx"
            text: "コメント"
        38:
    edge_media_to_sponsor_user:
      edges: []
    edge_media_to_tagged_user:
      edges: []
    edge_web_media_to_related_media:
      edges:
        0:
          node:
            shortcode: "XXXXXXX"
            thumnail_src: "https://scontent-nrt1-1.cdninstagram.com/~~~~~"
        19:
    gating_info: null
    id: "XXXXXXXXXXXXXXXXXXX"
    is_ad: false
    is_video: true
    location:
      has_public_page: true
      id: "XXXXXXXXXXXXXXXXX"
      name: "ロケーション名"
      slug: ""
    media_preview:
    owner:
      blocked_by_viewer: false
      followed_by_viewer: true
      full_name: "名前"
      has_blocked_viewer: false
      id: "XXXXXXXXX"
      is_private: false
      is_unpublished: false
      is_verified: true
      profile_pic_url: "https://scontent-nrt1-1.cdninstagram.com/vp/~~~~~~~~~~~"
      requested_by_viewer: false
      username: "xxxxxxx"
    shortcode: "XXXXXXX"
    should_log_client_event: false
    taken_at_timestamp: 1518938669
    tracking_token: "eyJ2ZXJzaW9uIjo1LCJwYXlsb2FkIjp7ImlzX2FuYWx5dGl~~~~~~~~"
    video_url: "https://scontent-nrt1-1.cdninstagram.com/vp/~~~~~~~~~~~"
    video_view_count: 24095
    viewer_has_liked: false
    viewer_has_saved: false
    viewer_has_saved_to_collection: false
    __typename: "GraphVideo"


参考