LoginSignup
9
7

More than 5 years have passed since last update.

pythonでSBIからスクレイピングで業績取得

Last updated at Posted at 2018-08-02

pythonでSBIからスクレイピングで業績取得

クラス(Scraper)インスタンス作成時にに口座のIDとパスワードを文字列で入力、メソッドに証券コードを入力すれば業績のPandas Dataframeを返す

開発環境

python 3.6.4

使い方

 >>> d=SBI_Scraper('user_id','password')
 >>> d.get_fi_param('6050')
 flag 証券コード 期末期    売上高       営業益        経常益     最終益   1株益  
   S  6050   連13. 9*   2487000000   188000000   228000000  129000000  13.3   
   S  6050   連14. 9*   2471000000   200000000   235000000  132000000  13.7   
   S  6050   連15. 9*   3018000000   328000000   350000000  192000000  19.8   
   S  6050   連16. 9   3813000000   562000000   554000000  350000000  35.3   
   S  6050   連17. 9   5067000000   811000000   840000000  572000000  56.6   
   S  6050   連18. 9予  6100000000   960000000   970000000  640000000  62.6   
   S  6050   連19. 9予  7400000000  1120000000  1130000000  720000000  70.4  

コード

import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
import sys
import codecs

class SBI_Scraper():

    def __init__(self,user_id,password):
        self.base_url = "https://site1.sbisec.co.jp/ETGate/"
        self.user_id = user_id
        self.password = password
        self.login()

    def login(self):
        post = {
                'JS_FLG': "0",
                'BW_FLG': "0",
                "_ControlID": "WPLETlgR001Control",
                "_DataStoreID": "DSWPLETlgR001Control",
                "_PageID": "WPLETlgR001Rlgn20",
                "_ActionID": "login",
                "getFlg": "on",
                "allPrmFlg": "on",
                "_ReturnPageInfo": "WPLEThmR001Control/DefaultPID/DefaultAID/DSWPLEThmR001Control",
                "user_id": self.user_id,
                "user_password": self.password
                }
        self.session = requests.Session()
        res = self.session.post(self.base_url,data=post)
        res.encoding = res.apparent_encoding

    def int_float_multiply(self,int_,num):
        if isinstance(int_,int) or isinstance(int_,float):
            return int_ * ((10)**num)
        return int_

    def int_converter(self,str_):
        if isinstance(str_,str):
            if re.compile('-|‥').search(str_):
                return str_
            elif str_.find('.') != -1:
                float_ = float(str_.replace(',',''))
                return float_
            else:
                int_ = int(str_.replace(',',''))
                return int_
        return str_

    def dividend_converter(self,str):
            str = re.sub( u'[一-龥]', "", str)
            str = re.sub( '\*', "", str)

            if str.find('〜') != -1:
                return str[:str.find('〜')]
            return str


    def financePage_html(self,ticker):
        post={
                "_ControlID": "WPLETsiR001Control",
                "_DataStoreID": "DSWPLETsiR001Control",
                "_PageID": "WPLETsiR001Idtl50",
                "getFlg": "on",
                "_ActionID": "goToSeasonReportOfFinanceStatus",
                "s_rkbn": "2",
                "s_btype": "",
                "i_stock_sec": str(ticker),
                "i_dom_flg": "1",
                "i_exchange_code": "JPN",
                "i_output_type": "4",
                "exchange_code": "TKY",
                "stock_sec_code_mul": str(ticker),
                "ref_from": "1",
                "ref_to": "20",
                "wstm4130_sort_id": "" ,
                "wstm4130_sort_kbn":  "",
                "qr_keyword": "1",
                "qr_suggest": "1",
                "qr_sort": "1"
                }
        html = self.session.post(self.base_url,data=post)
        html.encoding = html.apparent_encoding
        return html

    def get_fi_param(self,ticker):
        pd_data_all = pd.DataFrame(columns=['flag','証券コード','期末期','売上高','営業益','経常益','最終益','1株益','1株配'])
        dict_={}
        html=self.financePage_html(ticker)
        soup = BeautifulSoup(html.text, 'html.parser')
        div_shikihou = soup.find_all('div',{'class':'shikihouBox01'})[0]
        table = div_shikihou.find_all('table')[1]
        gyousyu_str = table.find_all('tr')[1].string
        tr_list = table.tr.td.table.find_all('tr',{'align':'right'})
        for i in tr_list:
            if re.compile("連|単|◎|◇|□").search(str(i.td.string)):
                dict_['証券コード'] = ticker
                dict_['flag'] = 'S'
                dict_['期末期'] = i.td.string.replace('\n','')
                td_list = i.contents
                dict_['売上高'] = self.int_float_multiply(self.int_converter(td_list[3].string.replace('\n','')),6)
                dict_['営業益'] = self.int_float_multiply(self.int_converter(td_list[5].string.replace('\n','')),6)
                dict_['経常益'] = self.int_float_multiply(self.int_converter(td_list[7].string.replace('\n','')),6)
                dict_['最終益'] = self.int_float_multiply(self.int_converter(td_list[9].string.replace('\n','')),6)
                dict_['1株益'] = self.int_float_multiply(self.int_converter(td_list[11].string.replace('\n','')),0)
                dict_['1株配'] = self.int_float_multiply(self.int_converter(self.dividend_converter(td_list[13].string.replace('\n',''))),0)
                pd_data=pd.DataFrame(dict_,index=['1'])
                pd_data_all=pd_data_all.append(pd_data)
                pd_data_all = pd_data_all.ix[:,['flag','証券コード','期末期','売上高','営業益','経常益','最終益','1株益','1株配']]
        return pd_data_all

終わりに

SBIのウエブサイトの仕様が変わったら動かなくなります

9
7
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
9
7