import requests
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import slackweb
import io

def data_analysis(url):  
    html = urlopen(url)
    bsObj = BeautifulSoup(html, "html.parser")
    table = bsObj.findAll("table")[0]
    rows = table.findAll("tr")[0:2]  #表の1行目を狙い撃ち

    for r in rows:
        tds = r.findAll("td")
        for td in tds[1:]:
            lnk = td.a.get("href")
            if str(lnk).find('equities')>0 and str(lnk).find('xls')>0 and str(lnk).find('val')>0:
               x=[Investor_data(lnk ,si) for si in range(0,4)] 

def Investor_data(pth,si):

    * slackに通知する場合には "slack_url"を変更する!
    * 取得データを保存する場合には、#df3.to_csv("buysell_data{}.csv".format(si))をコメントアウト


    url = 'https://www.jpx.co.jp/' + pth
    print(url , si)

    df = pd.read_excel(url,  sheet_name=si)

    mkt = df.iloc[4,0]
    mkt = str(mkt).replace('総売買代金', '')
    prd = df.iloc[2,0] 
    ymw = prd[:10]
    unit = df.iloc[3,10] 
    unit = unit.split(',')[0]
    unit = str(unit)
    reference = ymw + "/" + mkt + "/" + "単位"+ "(" + unit + ")"
    df = df[11:63] 
    df.columns = ['a', 'b', 'c', 'd', 'e', 'f','g','h','i','j','k']
    df = df.drop(['c', 'd', 'e', 'f','g','h','j' ,'k'], axis=1)
    df = df[df['b']!='合計']
    df = df.dropna(subset=['i'])
    df = df.dropna(how='all')

    dct = {'Proprietary': '自己計', 'Brokerage': '委託計', 'Total':'総 計', 'Institutions': '法 人'
          , 'Individuals': '個 人', 'Foreigners':'海外投資家','Securities Cos.':'証券会社'
          , 'Investment': '投資信託',  'Business Cos.':'事業法人', 'Other Cos.': 'その他法人等'
          , 'Financial': '金融機関', 'Life & Non-Life':'生保・損保', 'City & Regional BK':'都銀・地銀等'
          , 'Trust BK': '信託銀行',  'Other Financials':'その他金融機関'}
    df["a"] = df["a"].replace(dct)
    df = df.query("a not in ['委託計', '総 計', '法 人', '金融機関']")
    df.columns = ['投資主体', '売買', '金額'] 
    df = df.reset_index()
    #dfの集計: wide_formに変換
    df3 = df.pivot(index='投資主体', columns='売買', values='金額')
    df3 = df3.apply(lambda x: x.str.replace(',','')).astype(np.int)  #カンマ表記のString型をint型に変換
    df3['売買金額'] = df3['買い'] - df3['売り'] #売り買いの差引金額
    d_order = {'自己計': 0, '個 人': 1, '海外投資家': 2, '証券会社': 3
           , '投資信託': 4, '事業法人': 5, 'その他法人等': 6, '生保・損保': 7
           , '都銀・地銀等': 8, '信託銀行': 9, 'その他金融機関': 10}

    df3['order'] = df3.index.map(d_order)
    df3 = df3.sort_values('order')
    df3 = df3.drop(["売り", "買い", "order"], axis=1)
    slack_url = "https://hooks.slack.com/services/xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

    tsv = io.StringIO()
    df3.to_csv(tsv, sep='\t')

    slack = slackweb.Slack(url=slack_url)
    slack.notify(text=url + "\n" + reference + "\n" + tsv.getvalue())

# main
if __name__ == '__main__':

     url = 'https://www.jpx.co.jp/markets/statistics-equities/investor-type/00-00-archives-00.html'

