LoginSignup
4
15

More than 5 years have passed since last update.

Pythonで米国株のTicker Symbol一覧取得

Last updated at Posted at 2018-03-14

目的・要件

  • 米国の上場会社一覧(Ticker Symbol)を取得し、pandasのdataframeとして保持すること(ついでにcsvに吐いておく)
  • 上場市場(NYSE/NASDAQ/AMEX)を特定できること
  • S&P 500 指数採用銘柄を特定できること
  • ダウ平均採用銘柄を特定できること
  • ETF銘柄を特定できること (←怪しいので削除)
  • 日本ADR銘柄を特定できること
  • 中国ADR銘柄を特定できること

データ取得元

環境

  • python 3.6.4
  • Anaconda (conda ver.4.4.10):ライブラリ等は全部予め入っているかな?と思います。
  • windows 10

注意

  • どうもNASDAQ公式サイトからダウンロードしたCSVのなかにはETF銘柄が足りないようです。。。対策考え中。
  • コーディングすること自体あまり慣れていないので、ググりながらとりあえず動くものを書いたまで。ご指摘、アドバイス等があればコメントください。

ソースコード

なお、個々のgetなんとかの関数は独立して動くようにしていますので、importして必要に応じてお使いください。

getCompanyList.py
"""
Get all US listing companies ticker symbols
"""

from io import StringIO
import requests
import pandas as pd
import bs4
import re


def get_company_list():
    """[Get the list of US listing companies (stock ticker symbols) from www.nasdaq.com]

    Returns:
        [pandas data frame]
            Columns:
                -Symbol: ex. AAPL
                -Name
                -LastSale
                -MarketCap
                -IPOyear
                -Sector
                -industry
                -Summary Quote
                -exchange
    """

    target_url_nasdaq = 'https://www.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=nasdaq&render=download'
    target_url_nyse = 'https://www.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=nyse&render=download'
    target_url_amex = 'https://www.nasdaq.com/screening/companies-by-name.aspx?letter=0&exchange=amex&render=download'

    r_nasdaq = requests.get(target_url_nasdaq)
    r_nyse = requests.get(target_url_nyse)
    r_amex = requests.get(target_url_amex)

    df_companies_nasdaq = pd.read_csv(StringIO(r_nasdaq.text))
    # 整形:最終列名を'exchange'に変更し、値に'NASDAQ'を代入
    df_companies_nasdaq = df_companies_nasdaq.rename(
        columns={'Unnamed: 8': 'exchange'})
    df_companies_nasdaq["exchange"] = 'NASDAQ'

    df_companies_nyse = pd.read_csv(StringIO(r_nyse.text))
    # 整形:最終列名を'exchange'に変更し、値に'NYSE'を代入
    df_companies_nyse = df_companies_nyse.rename(
        columns={'Unnamed: 8': 'exchange'})
    df_companies_nyse["exchange"] = 'NYSE'

    df_companies_amex = pd.read_csv(StringIO(r_amex.text))
    # 整形:最終列名を'exchange'に変更し、値に'AMEX'を代入
    df_companies_amex = df_companies_amex.rename(
        columns={'Unnamed: 8': 'exchange'})
    df_companies_amex["exchange"] = 'AMEX'

    # 3市場のデータを結合して一つにする
    df_companies = pd.concat(
        [df_companies_nasdaq, df_companies_nyse, df_companies_amex])

    return df_companies


def get_sp500_company_list():
    """[Get the list of s&p 500 companies (stock ticker symbols) from wikipedia.com]

    Returns:
        [pandas series]
    """
    target_url_wikipedia = 'http://en.wikipedia.org/wiki/List_of_S%26P_500_companies'
    tables = pd.read_html(target_url_wikipedia)
    # 対象のhtml中に2つの表が存在するが、最初の表のみを使用
    df_companies_in_sp500 = tables[0]

    # ヘッダーの整形(1行目をヘッダー(Columns)にする)
    header = df_companies_in_sp500[0][:]
    df_companies_in_sp500 = df_companies_in_sp500[1:].rename(columns=header)

    # 1列目(Ticker Symbol)を抜き出し(series型)returnする
    sr_companies_in_sp500 = df_companies_in_sp500.loc[:]['Ticker symbol']
    return sr_companies_in_sp500


def get_dow_company_list():
    """[Get the list of Dow Jones Industrial Average 30 companies (stock ticker symbols) from wikipedia.com]

    Returns:
        [pandas series]
    """
    target_url_wikipedia = 'https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average'
    tables = pd.read_html(target_url_wikipedia)
    # 対象html中に複数の表が存在するが、2個目(つまり[1])の表のみ使用
    df_companies_in_dow = tables[1]

    # ヘッダーの整形(1行目をヘッダー(Columns)にする)
    header = df_companies_in_dow.iloc[0][:]
    df_companies_in_dow = df_companies_in_dow[1:].rename(columns=header)

    # 列(Ticker Symbol)を抜き出し(series型)returnする
    sr_companies_in_dow = df_companies_in_dow.Symbol
    return sr_companies_in_dow


def get_etf_list():
    """[Get the list of ETF tickers from wikipedia.com]

    Returns:
        [pandas series]
    """
    target_url_wikipedia = 'https://en.wikipedia.org/wiki/List_of_American_exchange-traded_funds'
    res = requests.get(target_url_wikipedia)
    res.raise_for_status()
    # html平文をsoupに代入
    soup = bs4.BeautifulSoup(res.text, "html.parser")
    # 特徴に該当する要素(outerHTML)を抽出
    elems_nyse = soup.find_all("a", rel="nofollow", class_="external text",
                               href=re.compile("https://www.nyse.com/quote/"))
    elems_nasdaq = soup.find_all("a", rel="nofollow", class_="external text",
                                 href=re.compile("http://www.nasdaq.com/symbol/"))
    elems = elems_nyse + elems_nasdaq
    # elems_textという空のlistを宣言しておく
    elems_text = []
    # 要素(outerHTML)の中のテキストを一つずつ抽出して、上で宣言したelems_textというリストに代入
    for elem in elems:
        elems_text.append(elem.getText())
    # リストをseriesにする
    sr_etf = pd.Series(elems_text)
    # 重複するもの削除
    sr_etf = sr_etf.drop_duplicates()
    return sr_etf


def get_jp_company_list():
    """[Get the list of Japanese companies (stock ticker symbols) from topforeignstocks.com]

    Returns:
        [pandas series]
    """
    target_url = 'http://topforeignstocks.com/foreign-adrs-list/the-full-list-of-japanese-adrs/'
    tables = pd.read_html(target_url)
    # 対象html中に複数の表が存在するが、1個目(つまり[0])の表のみ使用
    df_jp_company_list = tables[0]

    # "Ticker"列を抜き出し(series型)returnする
    sr_jp_company_list = df_jp_company_list.Ticker
    return sr_jp_company_list


def get_cn_company_list():
    """[Get the list of Chinese companies (stock ticker symbols) from topforeignstocks.com]

    Returns:
        [pandas series]
    """
    target_url = 'http://topforeignstocks.com/foreign-adrs-list/the-full-list-of-chinese-adrs/'
    tables = pd.read_html(target_url)
    # 対象html中に複数の表が存在するが、1個目(つまり[0])の表のみ使用
    df_cn_company_list = tables[0]

    # "Ticker"列を抜き出し(series型)returnする
    sr_cn_company_list = df_cn_company_list.Ticker
    return sr_cn_company_list


def add_sp500_col(arg):
    """[Add a column to the argument that shows wether the ticker is a element of s&p500 index]

        Returns:
            [pandas dataframe]
    """
    arg['Is_SP500'] = ""
    df_with_sp500 = arg
    sr_companies_in_sp500 = get_sp500_company_list()
    for elem in sr_companies_in_sp500:
        if elem in df_with_sp500.Symbol.values:
            df_with_sp500.loc[df_with_sp500.Symbol == elem, 'Is_SP500'] = 'Y'
    return df_with_sp500


def add_dow_col(arg):
    """[Add a column to the argument that shows wether the ticker is a element of Dow]

        Returns:
            [pandas dataframe]
    """
    arg['Is_Dow'] = ""
    df_with_dow = arg
    sr_companies_in_dow = get_dow_company_list()
    for elem in sr_companies_in_dow:
        if elem in df_with_dow.Symbol.values:
            df_with_dow.loc[df_with_dow.Symbol == elem, 'Is_Dow'] = 'Y'
    return df_with_dow


def add_etf_col(arg):
    """[Add a column to the argument that shows wether the ticker is an etf]

        Returns:
            [pandas dataframe]
    """
    arg['Is_etf'] = ""
    df_with_etf = arg
    sr_etf = get_etf_list()
    for elem in sr_etf:
        if elem in df_with_etf.Symbol.values:
            df_with_etf.loc[df_with_etf.Symbol == elem, 'Is_etf'] = 'Y'
    return df_with_etf


def add_jp_col(arg):
    """[Add a column to the argument that shows wether the ticker is a Japan ADR]

        Returns:
            [pandas dataframe]
    """
    arg['Is_JP_ADR'] = ""
    df_with_jp_adr = arg
    sr_jp_adr = get_jp_company_list()
    for elem in sr_jp_adr:
        if elem in df_with_jp_adr.Symbol.values:
            df_with_jp_adr.loc[df_with_jp_adr.Symbol ==
                               elem, 'Is_JP_ADR'] = 'Y'
    return df_with_jp_adr


def add_cn_col(arg):
    """[Add a column to the argument that shows wether the ticker is a China ADR]

        Returns:
            [pandas dataframe]
    """
    arg['Is_CN_ADR'] = ""
    df_with_cn_adr = arg
    sr_cn_adr = get_cn_company_list()
    for elem in sr_cn_adr:
        if elem in df_with_cn_adr.Symbol.values:
            df_with_cn_adr.loc[df_with_cn_adr.Symbol ==
                               elem, 'Is_CN_ADR'] = 'Y'
    return df_with_cn_adr


def get_full_company_list():
    full_company_list = get_company_list()
    full_company_list = add_sp500_col(full_company_list)
    full_company_list = add_dow_col(full_company_list)
    full_company_list = add_etf_col(full_company_list)
    full_company_list = add_jp_col(full_company_list)
    full_company_list = add_cn_col(full_company_list)
    # index振り直し
    full_company_list = full_company_list.reset_index(drop=True)
    return full_company_list


if __name__ == '__main__':
    full_company_list = get_full_company_list()

    # print(full_company_list)
    # import os
    # os.chdir('G:\\dev\\code')

    # CSV出力
    full_company_list.to_csv("full_company_list.csv")


4
15
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
4
15