AtCoder コンテストの入力例一覧を取得するPythonスクリプト #Python

ソースコード

Gistはこちら https://gist.github.com/029969994cb444bef7ed

# -*- coding: utf-8 -*-
from urllib.request import urlopen
from bs4 import BeautifulSoup
import sys
import os
import re
'''
コンテストトップページの URL を渡すと実行ディレクトリ以下に
問題ごとのディレクトリ作成, 問題ページの入力例をファイル出力する
'''

INPUT_FILE_NAME = "input.txt"
DELIMITER = "---"


def main():
    # http://arc001.contest.atcoder.jp/
    try:
        url = sys.argv[1]
    except IndexError:
        print("input contest url:")
        url = input()

    m = re.search('(https?://)(?P<name>.*)\.contest?', url)
    contest_name = m.group('name')
    if not os.path.exists(contest_name):
        os.mkdir(contest_name)
    page = urlopen(url + "assignments")
    soup = BeautifulSoup(page)
    table = soup.find('table', attrs={'class': 'table-wb'}).find('tbody')
    for tr in table.find_all('tr'):
        (sharptd, qlinktd) = tr.find_all('td', limit=2)
        index = sharptd.text
        q_path = contest_name + "/" + index
        if not os.path.exists(q_path):
            os.mkdir(q_path)
        url_tail = qlinktd.find('a').get('href')
        f_path = q_path + "/" + INPUT_FILE_NAME
        if not os.path.exists(f_path):
            f = open(f_path, "w")
            f.write(soup_prets(url + url_tail[1:]))
            f.close()
            print("generate input > " + f_path)
        else:
            print("already exists: f_path")


def soup_prets(url):
    '''
    input テキストを取ってくる
    '''

#    print(url)
    page = urlopen(url)
    soup = BeautifulSoup(page)
    inputs = []
    for pretag in soup.find_all("pre", attrs={"class": "prettyprint"})[::2]:
        inputs.append(pretag.string.strip())
    return ("\n" + DELIMITER + "\n").join(inputs)

if __name__ == '__main__':
    main()

用意

Python3, BeautifulSoup4 が必要

pip install beautifulsoup4

実行例

http://arc001.contest.atcoder.jp/ のコンテストで試す

> ls
start.py

スクリプトを実行

> ./start.py http://arc001.contest.atcoder.jp/
generate input > arc001/A/input.txt
generate input > arc001/B/input.txt
generate input > arc001/C/input.txt
generate input > arc001/D/input.txt

実行後のディレクトリ構成

> tree
.
├── arc001
│   ├── A
│   │   └── input.txt
│   ├── B
│   │   └── input.txt
│   ├── C
│   │   └── input.txt
│   └── D
│       └── input.txt
└── start.py

5 directories, 5 files

生成されたもの確認

> cat arc001/A/input.txt
9
131142143
---
20
12341234123412341234
---
4
1111