Help us understand the problem. What is going on with this article?

pythonでSBIからスクレイピングで業績取得

More than 1 year has passed since last update.

pythonでSBIからスクレイピングで業績取得

クラス(Scraper)インスタンス作成時にに口座のIDとパスワードを文字列で入力、メソッドに証券コードを入力すれば業績のPandas Dataframeを返す

開発環境

python 3.6.4

使い方

 >>> d=SBI_Scraper('user_id','password')
 >>> d.get_fi_param('6050')
 flag 証券コード 期末期    売上高       営業益        経常益     最終益   1株益  
   S  6050   連13. 9*   2487000000   188000000   228000000  129000000  13.3   
   S  6050   連14. 9*   2471000000   200000000   235000000  132000000  13.7   
   S  6050   連15. 9*   3018000000   328000000   350000000  192000000  19.8   
   S  6050   連16. 9   3813000000   562000000   554000000  350000000  35.3   
   S  6050   連17. 9   5067000000   811000000   840000000  572000000  56.6   
   S  6050   連18. 9予  6100000000   960000000   970000000  640000000  62.6   
   S  6050   連19. 9予  7400000000  1120000000  1130000000  720000000  70.4  

コード

import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
import sys
import codecs

class SBI_Scraper():

    def __init__(self,user_id,password):
        self.base_url = "https://site1.sbisec.co.jp/ETGate/"
        self.user_id = user_id
        self.password = password
        self.login()

    def login(self):
        post = {
                'JS_FLG': "0",
                'BW_FLG': "0",
                "_ControlID": "WPLETlgR001Control",
                "_DataStoreID": "DSWPLETlgR001Control",
                "_PageID": "WPLETlgR001Rlgn20",
                "_ActionID": "login",
                "getFlg": "on",
                "allPrmFlg": "on",
                "_ReturnPageInfo": "WPLEThmR001Control/DefaultPID/DefaultAID/DSWPLEThmR001Control",
                "user_id": self.user_id,
                "user_password": self.password
                }
        self.session = requests.Session()
        res = self.session.post(self.base_url,data=post)
        res.encoding = res.apparent_encoding

    def int_float_multiply(self,int_,num):
        if isinstance(int_,int) or isinstance(int_,float):
            return int_ * ((10)**num)
        return int_

    def int_converter(self,str_):
        if isinstance(str_,str):
            if re.compile('-|‥').search(str_):
                return str_
            elif str_.find('.') != -1:
                float_ = float(str_.replace(',',''))
                return float_
            else:
                int_ = int(str_.replace(',',''))
                return int_
        return str_

    def dividend_converter(self,str):
            str = re.sub( u'[一-龥]', "", str)
            str = re.sub( '\*', "", str)

            if str.find('〜') != -1:
                return str[:str.find('〜')]
            return str


    def financePage_html(self,ticker):
        post={
                "_ControlID": "WPLETsiR001Control",
                "_DataStoreID": "DSWPLETsiR001Control",
                "_PageID": "WPLETsiR001Idtl50",
                "getFlg": "on",
                "_ActionID": "goToSeasonReportOfFinanceStatus",
                "s_rkbn": "2",
                "s_btype": "",
                "i_stock_sec": str(ticker),
                "i_dom_flg": "1",
                "i_exchange_code": "JPN",
                "i_output_type": "4",
                "exchange_code": "TKY",
                "stock_sec_code_mul": str(ticker),
                "ref_from": "1",
                "ref_to": "20",
                "wstm4130_sort_id": "" ,
                "wstm4130_sort_kbn":  "",
                "qr_keyword": "1",
                "qr_suggest": "1",
                "qr_sort": "1"
                }
        html = self.session.post(self.base_url,data=post)
        html.encoding = html.apparent_encoding
        return html

    def get_fi_param(self,ticker):
        pd_data_all = pd.DataFrame(columns=['flag','証券コード','期末期','売上高','営業益','経常益','最終益','1株益','1株配'])
        dict_={}
        html=self.financePage_html(ticker)
        soup = BeautifulSoup(html.text, 'html.parser')
        div_shikihou = soup.find_all('div',{'class':'shikihouBox01'})[0]
        table = div_shikihou.find_all('table')[1]
        gyousyu_str = table.find_all('tr')[1].string
        tr_list = table.tr.td.table.find_all('tr',{'align':'right'})
        for i in tr_list:
            if re.compile("連|単|◎|◇|□").search(str(i.td.string)):
                dict_['証券コード'] = ticker
                dict_['flag'] = 'S'
                dict_['期末期'] = i.td.string.replace('\n','')
                td_list = i.contents
                dict_['売上高'] = self.int_float_multiply(self.int_converter(td_list[3].string.replace('\n','')),6)
                dict_['営業益'] = self.int_float_multiply(self.int_converter(td_list[5].string.replace('\n','')),6)
                dict_['経常益'] = self.int_float_multiply(self.int_converter(td_list[7].string.replace('\n','')),6)
                dict_['最終益'] = self.int_float_multiply(self.int_converter(td_list[9].string.replace('\n','')),6)
                dict_['1株益'] = self.int_float_multiply(self.int_converter(td_list[11].string.replace('\n','')),0)
                dict_['1株配'] = self.int_float_multiply(self.int_converter(self.dividend_converter(td_list[13].string.replace('\n',''))),0)
                pd_data=pd.DataFrame(dict_,index=['1'])
                pd_data_all=pd_data_all.append(pd_data)
                pd_data_all = pd_data_all.ix[:,['flag','証券コード','期末期','売上高','営業益','経常益','最終益','1株益','1株配']]
        return pd_data_all

終わりに

SBIのウエブサイトの仕様が変わったら動かなくなります

Why not register and get more from Qiita?
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away