Python
pandas
stock
Finance

Pythonで米国株のTicker Symbol一覧取得

目的・要件

  • 米国の上場会社一覧(Ticker Symbol)を取得し、pandasのdataframeとして保持すること(ついでにcsvに吐いておく)
  • 上場市場(NYSE/NASDAQ/AMEX)を特定できること
  • S&P 500 指数採用銘柄を特定できること
  • ダウ平均採用銘柄を特定できること
  • ETF銘柄を特定できること (←怪しいので削除)
  • 日本ADR銘柄を特定できること
  • 中国ADR銘柄を特定できること

データ取得元

環境

  • python 3.6.4
  • Anaconda (conda ver.4.4.10):ライブラリ等は全部予め入っているかな?と思います。
  • windows 10

注意

  • どうもNASDAQ公式サイトからダウンロードしたCSVのなかにはETF銘柄が足りないようです。。。対策考え中。
  • コーディングすること自体あまり慣れていないので、ググりながらとりあえず動くものを書いたまで。ご指摘、アドバイス等があればコメントください。

ソースコード

なお、個々のgetなんとかの関数は独立して動くようにしていますので、importして必要に応じてお使いください。

getCompanyList.py
"""
Get all US listing companies ticker symbols
"""

from io import StringIO
import requests
import pandas as pd
import bs4
import re


def get_company_list():
    """[Get the list of US listing companies (stock ticker symbols) from www.nasdaq.com]

    Returns:
        [pandas data frame]
            Columns:
                -Symbol: ex. AAPL
                -Name
                -LastSale
                -MarketCap
                -IPOyear
                -Sector
                -industry
                -Summary Quote
                -exchange
    """

    target_url_nasdaq = 'https://www.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=nasdaq&render=download'
    target_url_nyse = 'https://www.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=nyse&render=download'
    target_url_amex = 'https://www.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=amex&render=download'

    r_nasdaq = requests.get(target_url_nasdaq)
    r_nyse = requests.get(target_url_nyse)
    r_amex = requests.get(target_url_amex)

    df_companies_nasdaq = pd.read_csv(StringIO(r_nasdaq.text))
    # 整形:最終列名を'exchange'に変更し、値に'NASDAQ'を代入
    df_companies_nasdaq = df_companies_nasdaq.rename(
        columns={'Unnamed: 8': 'exchange'})
    df_companies_nasdaq["exchange"] = 'NASDAQ'

    df_companies_nyse = pd.read_csv(StringIO(r_nyse.text))
    # 整形:最終列名を'exchange'に変更し、値に'NYSE'を代入
    df_companies_nyse = df_companies_nyse.rename(
        columns={'Unnamed: 8': 'exchange'})
    df_companies_nyse["exchange"] = 'NYSE'

    df_companies_amex = pd.read_csv(StringIO(r_amex.text))
    # 整形:最終列名を'exchange'に変更し、値に'AMEX'を代入
    df_companies_amex = df_companies_amex.rename(
        columns={'Unnamed: 8': 'exchange'})
    df_companies_amex["exchange"] = 'AMEX'

    # 3市場のデータを結合して一つにする
    df_companies = pd.concat(
        [df_companies_nasdaq, df_companies_nyse, df_companies_amex])

    return df_companies


def get_sp500_company_list():
    """[Get the list of s&p 500 companies (stock ticker symbols) from wikipedia.com]

    Returns:
        [pandas series]
    """
    target_url_wikipedia = 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    tables = pd.read_html(target_url_wikipedia)
    # 対象のhtml中に2つの表が存在するが、最初の表のみを使用
    df_companies_in_sp500 = tables[0]

    # ヘッダーの整形(1行目をヘッダー(Columns)にする)
    header = df_companies_in_sp500[0][:]
    df_companies_in_sp500 = df_companies_in_sp500[1:].rename(columns=header)

    # 1列目(Ticker Symbol)を抜き出し(series型)returnする
    sr_companies_in_sp500 = df_companies_in_sp500.loc[:]['Ticker symbol']
    return sr_companies_in_sp500


def get_dow_company_list():
    """[Get the list of Dow Jones Industrial Average 30 companies (stock ticker symbols) from wikipedia.com]

    Returns:
        [pandas series]
    """
    target_url_wikipedia = 'https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average'
    tables = pd.read_html(target_url_wikipedia)
    # 対象html中に複数の表が存在するが、2個目(つまり[1])の表のみ使用
    df_companies_in_dow = tables[1]

    # ヘッダーの整形(1行目をヘッダー(Columns)にする)
    header = df_companies_in_dow.iloc[0][:]
    df_companies_in_dow = df_companies_in_dow[1:].rename(columns=header)

    # 列(Ticker Symbol)を抜き出し(series型)returnする
    sr_companies_in_dow = df_companies_in_dow.Symbol
    return sr_companies_in_dow


def get_etf_list():
    """[Get the list of ETF tickers from wikipedia.com]

    Returns:
        [pandas series]
    """
    target_url_wikipedia = 'https://en.wikipedia.org/wiki/List_of_American_exchange-traded_funds'
    res = requests.get(target_url_wikipedia)
    res.raise_for_status()
    # html平文をsoupに代入
    soup = bs4.BeautifulSoup(res.text, "html.parser")
    # 特徴に該当する要素(outerHTML)を抽出
    elems_nyse = soup.find_all("a", rel="nofollow", class_="external text",
                               href=re.compile("https://www.nyse.com/quote/"))
    elems_nasdaq = soup.find_all("a", rel="nofollow", class_="external text",
                                 href=re.compile("http://www.nasdaq.com/symbol/"))
    elems = elems_nyse + elems_nasdaq
    # elems_textという空のlistを宣言しておく
    elems_text = []
    # 要素(outerHTML)の中のテキストを一つずつ抽出して、上で宣言したelems_textというリストに代入
    for elem in elems:
        elems_text.append(elem.getText())
    # リストをseriesにする
    sr_etf = pd.Series(elems_text)
    # 重複するもの削除
    sr_etf = sr_etf.drop_duplicates()
    return sr_etf


def get_jp_company_list():
    """[Get the list of Japanese companies (stock ticker symbols) from topforeignstocks.com]

    Returns:
        [pandas series]
    """
    target_url = 'http://topforeignstocks.com/foreign-adrs-list/the-full-list-of-japanese-adrs/'
    tables = pd.read_html(target_url)
    # 対象html中に複数の表が存在するが、1個目(つまり[0])の表のみ使用
    df_jp_company_list = tables[0]

    # "Ticker"列を抜き出し(series型)returnする
    sr_jp_company_list = df_jp_company_list.Ticker
    return sr_jp_company_list


def get_cn_company_list():
    """[Get the list of Chinese companies (stock ticker symbols) from topforeignstocks.com]

    Returns:
        [pandas series]
    """
    target_url = 'http://topforeignstocks.com/foreign-adrs-list/the-full-list-of-chinese-adrs/'
    tables = pd.read_html(target_url)
    # 対象html中に複数の表が存在するが、1個目(つまり[0])の表のみ使用
    df_cn_company_list = tables[0]

    # "Ticker"列を抜き出し(series型)returnする
    sr_cn_company_list = df_cn_company_list.Ticker
    return sr_cn_company_list


def add_sp500_col(arg):
    """[Add a column to the argument that shows wether the ticker is a element of s&p500 index]

        Returns:
            [pandas dataframe]
    """
    arg['Is_SP500'] = ""
    df_with_sp500 = arg
    sr_companies_in_sp500 = get_sp500_company_list()
    for elem in sr_companies_in_sp500:
        if elem in df_with_sp500.Symbol.values:
            df_with_sp500.loc[df_with_sp500.Symbol == elem, 'Is_SP500'] = 'Y'
    return df_with_sp500


def add_dow_col(arg):
    """[Add a column to the argument that shows wether the ticker is a element of Dow]

        Returns:
            [pandas dataframe]
    """
    arg['Is_Dow'] = ""
    df_with_dow = arg
    sr_companies_in_dow = get_dow_company_list()
    for elem in sr_companies_in_dow:
        if elem in df_with_dow.Symbol.values:
            df_with_dow.loc[df_with_dow.Symbol == elem, 'Is_Dow'] = 'Y'
    return df_with_dow


def add_etf_col(arg):
    """[Add a column to the argument that shows wether the ticker is an etf]

        Returns:
            [pandas dataframe]
    """
    arg['Is_etf'] = ""
    df_with_etf = arg
    sr_etf = get_etf_list()
    for elem in sr_etf:
        if elem in df_with_etf.Symbol.values:
            df_with_etf.loc[df_with_etf.Symbol == elem, 'Is_etf'] = 'Y'
    return df_with_etf


def add_jp_col(arg):
    """[Add a column to the argument that shows wether the ticker is a Japan ADR]

        Returns:
            [pandas dataframe]
    """
    arg['Is_JP_ADR'] = ""
    df_with_jp_adr = arg
    sr_jp_adr = get_jp_company_list()
    for elem in sr_jp_adr:
        if elem in df_with_jp_adr.Symbol.values:
            df_with_jp_adr.loc[df_with_jp_adr.Symbol ==
                               elem, 'Is_JP_ADR'] = 'Y'
    return df_with_jp_adr


def add_cn_col(arg):
    """[Add a column to the argument that shows wether the ticker is a China ADR]

        Returns:
            [pandas dataframe]
    """
    arg['Is_CN_ADR'] = ""
    df_with_cn_adr = arg
    sr_cn_adr = get_cn_company_list()
    for elem in sr_cn_adr:
        if elem in df_with_cn_adr.Symbol.values:
            df_with_cn_adr.loc[df_with_cn_adr.Symbol ==
                               elem, 'Is_CN_ADR'] = 'Y'
    return df_with_cn_adr


def get_full_company_list():
    full_company_list = get_company_list()
    full_company_list = add_sp500_col(full_company_list)
    full_company_list = add_dow_col(full_company_list)
    full_company_list = add_etf_col(full_company_list)
    full_company_list = add_jp_col(full_company_list)
    full_company_list = add_cn_col(full_company_list)
    # index振り直し
    full_company_list = full_company_list.reset_index(drop=True)
    return full_company_list


if __name__ == '__main__':
    full_company_list = get_full_company_list()

    # print(full_company_list)
    # import os
    # os.chdir('G:\\dev\\code')

    # CSV出力
    full_company_list.to_csv("full_company_list.csv")