More than 5 years have passed since last update.

東京大学大学院情報理工学系研究科創造情報学専攻 2007年度夏プログラミング試験

Posted at 2020-01-05

2007年度夏の院試の解答例です
※記載の内容は筆者が個人的に解いたものであり、正答を保証するものではなく、また東京大学及び本試験内容の提供に関わる組織とは無関係です。

出題テーマ

符号化

問題文

※ 東京大学側から指摘があった場合は問題文を削除いたします。

配布ファイル

q1.txt(実際の配布ファイルです)

Sxx, jzf dppx ez slgp qzfyo esp qzwwzhtyr dpypepynpd.
Zyp zq esp dtxawpde pilxawpd zq l dfmdetefetzy ntaspc td esp Nlpdlc
ntaspc, hstns td dlto ez slgp mppy fdpo mj Ufwtfd Nlpdlc ez
nzxxfytnlep htes std lcxj. Nlpdlc td nzydtopcpo ez mp zyp zq esp qtcde
apcdzyd ez slgp pgpc pxawzjpo pyncjaetzy qzc esp dlvp zq dpnfctyr
xpddlrpd. Nlpdlc opntopo esle dstqetyr plns wpeepc ty esp xpddlrp
hzfwo mp std delyolco lwrzctesx, lyo dz sp tyqzcxpo lww zq std
rpypclwd zq std opntdtzy, lyo hld es

(1)

以下をq1.txtで実行するとわかるのですがkey=11のとき、英語の文章となります

from CharacterCode import *

def isSmall(num):
    return num >= 97 and num <= 122

def isBig(num):
    return num >= 65 and num <= 90

def decodeChar1(ch, key):
    if ch.isalpha():
        int_num = AsciiStr2Int(ch)
        if isSmall(int_num):
            order = int_num - 97
            return Hexbytes2Str(Int2hexbytes((order - key) % 26 + 97))
        if isBig(int_num):
            order = int_num - 65
            return Hexbytes2Str(Int2hexbytes((order - key) % 26 + 65))
    return ch
       
def decodeText1(text, key):
    ret = ''
    for ch in text:
        ret += decodeChar1(ch, key)
    return ret

def solve1(file_path):
    with open(file_path, 'r', encoding='ascii') as f:
        tmp = f.readlines()
        txt_array = []
        for txt in tmp:
            if txt[-1] == '\n':
                txt_array.append(txt[:-1])
            else:
                txt_array.append(txt)
        for key in range(1, 26):
            print('key: ' + str(key))
            for txt in txt_array:
                print(decodeText1(txt, key))
            print('---end---\n')

(2)

2-1

from CharacterCode import *

def isSmall(num):
    return num >= 97 and num <= 122

def isBig(num):
    return num >= 65 and num <= 90

def decodeChar1(ch, key):
    if ch.isalpha():
        int_num = AsciiStr2Int(ch)
        if isSmall(int_num):
            order = int_num - 97
            return Hexbytes2Str(Int2hexbytes((order - key) % 26 + 97))
        if isBig(int_num):
            order = int_num - 65
            return Hexbytes2Str(Int2hexbytes((order - key) % 26 + 65))
    return ch
       
def decodeText1(text, key):
    ret = ''
    for ch in text:
        ret += decodeChar1(ch, key)
    return ret

class Tmp(object):
    def __init__(self, al, cnt):
        self.al = al
        self.cnt = cnt
    def __repr__(self):
        return '{0}: {1}'.format(self.al, self.cnt)
    def __lt__(self, tmp):
        return self.cnt < tmp.cnt    

def solve2_1(file_path):
    with open(file_path, 'r', encoding='ascii') as f:
        text = f.read().lower()
        freq = []
        for i in range(26):
            order = i + 97
            al = Hexbytes2Str(Int2hexbytes(order))
            cnt = 0
            tmp = Tmp(al, cnt)
            freq.append(tmp)
        for ch in text:
            if ch.isalpha():
                idx = AsciiStr2Int(ch) - 97
                freq[idx].cnt += 1
        freq.sort()
        freq = freq[::-1]
        for tmp in freq:
            print(tmp)

2-2

さすがにq22.txtがないので解くことができませんでしたが、やり方としてはq1.txtでアルファベットごとの頻度を見るとわかるのですが母音の頻度は高いので、replaceなどを使って色々と試して見ると見えてくるはずです。

(3)

3-1

from CharacterCode import *
from PriorityQueue import PriorityQueue

def isSmall(num):
    return num >= 97 and num <= 122

def isBig(num):
    return num >= 65 and num <= 90

freq = []

class Node(object):
    # 文字の場合node_idはasciiコード
    def __init__(self, al, cnt, node_id):
        self.al = al
        self.cnt = cnt
        self.node_id = node_id
        # parent, left, rightのnode_id 
        self.p = None
        self.r = None
        self.l = None
    def __repr__(self):                           
        return '{0}: {1}, ID: {2}\n'.format(self.al, self.cnt, self.node_id) + \
               '親ノード: {0}\n'.format(self.p) + \
               '左ノード: {0}, 右ノード: {1}'.format(self.l, self.r)
    def __lt__(self, node):
        if self.cnt == node.cnt:
            return self.node_id < node.node_id
        else:
            return self.cnt < node.cnt 

def encode3(ch):
    ret = ''
    global freq
    node = freq[AsciiStr2Int(ch)]
    while True:
        if node.p == None:
            break
        p = freq[node.p]
        if p.l == node.node_id:
            ret += '0'
        elif p.r == node.node_id:
            ret += '1'
        node = p
    return ret[::-1]        
        
def solve3_1(file_path):
    with open(file_path, 'r', encoding='ascii') as f:
        text = f.read().lower()
        global freq
        freq = [None for _ in range(127)]
        for i in range(127):
            if i == 10:
                node = Node('LF', 0, i)
                freq[i] = node
            elif i == 13:
                node = Node('CR', 0, i)
                freq[i] = node
            elif i == 32:
                node = Node('SP', 0, i)
            elif i >= 33 and i <= 126:
                al = Hexbytes2Str(Int2hexbytes(i))
                cnt = 0
                node = Node(al, cnt, i)            
                freq[i] = node
        for ch in text:
            if ch.isalpha():
                idx = AsciiStr2Int(ch)
                freq[idx].cnt += 1
        pq = PriorityQueue([])
        for node in freq:
            if node != None:
                pq.push(node)
        while len(pq) > 1:
            new_node_id = len(freq)
            r_node = pq.pop()
            l_node = pq.pop()
            new_node = Node('{0},{1}'.format(r_node.al, l_node.al), r_node.cnt + l_node.cnt, new_node_id)
            r_node.p = new_node_id
            l_node.p = new_node_id
            new_node.r = r_node.node_id
            new_node.l = l_node.node_id
            freq.append(new_node)
            pq.push(new_node)
        for i in range(127):
            node = freq[i]
            if node != None:
                code = None
                if node.al == 'LF':
                    code = encode3('\n')
                elif node.al == 'CR':
                    code = encode3('\r')
                elif node.al == 'SP':
                    code = encode3(' ')
                else:
                    code = encode3(node.al)
                print('{0}: {1}'.format(node.al, code))

3-2

from CharacterCode import *
from PriorityQueue import PriorityQueue

def isSmall(num):
    return num >= 97 and num <= 122

def isBig(num):
    return num >= 65 and num <= 90

freq = []

class Node(object):
    # 文字の場合node_idはasciiコード
    def __init__(self, al, cnt, node_id):
        self.al = al
        self.cnt = cnt
        self.node_id = node_id
        # parent, left, rightのnode_id 
        self.p = None
        self.r = None
        self.l = None
    def __repr__(self):                           
        return '{0}: {1}, ID: {2}\n'.format(self.al, self.cnt, self.node_id) + \
               '親ノード: {0}\n'.format(self.p) + \
               '左ノード: {0}, 右ノード: {1}'.format(self.l, self.r)
    def __lt__(self, node):
        if self.cnt == node.cnt:
            return self.node_id < node.node_id
        else:
            return self.cnt < node.cnt 

def encode3(ch):
    ret = ''
    global freq
    node = freq[AsciiStr2Int(ch)]
    while True:
        if node.p == None:
            break
        p = freq[node.p]
        if p.l == node.node_id:
            ret += '0'
        elif p.r == node.node_id:
            ret += '1'
        node = p
    return ret[::-1]        
        
def solve3_2(file_path):
    with open(file_path, 'r', encoding='ascii') as f:
        text = f.read().lower()
        global freq
        freq = [None for _ in range(127)]
        for i in range(127):
            if i == 10:
                node = Node('LF', 0, i)
                freq[i] = node
            elif i == 13:
                node = Node('CR', 0, i)
                freq[i] = node
            elif i == 32:
                node = Node('SP', 0, i)
            elif i >= 33 and i <= 126:
                al = Hexbytes2Str(Int2hexbytes(i))
                cnt = 0
                node = Node(al, cnt, i)            
                freq[i] = node
        for ch in text:
            if ch.isalpha():
                idx = AsciiStr2Int(ch)
                freq[idx].cnt += 1
        pq = PriorityQueue([])
        for node in freq:
            if node != None:
                pq.push(node)
        while len(pq) > 1:
            new_node_id = len(freq)
            r_node = pq.pop()
            l_node = pq.pop()
            new_node = Node('{0},{1}'.format(r_node.al, l_node.al), r_node.cnt + l_node.cnt, new_node_id)
            r_node.p = new_node_id
            l_node.p = new_node_id
            new_node.r = r_node.node_id
            new_node.l = l_node.node_id
            freq.append(new_node)
            pq.push(new_node)
        dic = {}    
        for i in range(127):
            node = freq[i]
            if node != None:
                code = None
                if node.al == 'LF':
                    code = encode3('\n')
                elif node.al == 'CR':
                    code = encode3('\r')
                elif node.al == 'SP':
                    code = encode3(' ')
                else:
                    code = encode3(node.al)
                dic[node.al] = len(code)
        total = 0
        mom = 0
        for i in range(127):
            node = freq[i]
            if node != None:
                cnt = node.cnt
                size = dic[node.al]
                total += (cnt * size)
                mom += cnt
        print('{0:.2f}'.format(total / mom))

感想

僕個人としては好きなタイプの方の問題でした。
c, c++はchar型で直接足し引き算ができるのですがpythonではそれができないので、独自のモジュール(CharacterCode)を事前に作成してあったのでそちらを使って解いています。（binasciiとかで調べるといいかも）
3-1が山場ですが木を作る際にc, c++ならポインタが使えてもう少し楽にかけるのですが、pythonで今回やる場合は配列を作り、そのindexを指すような形にしました。
余談ですが留学生向けに完全同様な英語の試験を行うのでおそらく文字エンコードはasciiの範囲を越すことはないと思います。（要はutf-8encodingなどして日本語の文字を扱うことがないということです。）

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up

東京大学大学院情報理工学系研究科 創造情報学専攻 2007年度夏 プログラミング試験

出題テーマ

問題文

配布ファイル

q1.txt(実際の配布ファイルです)

(1)

(2)

2-1

2-2

(3)

3-1

3-2

感想

東京大学大学院情報理工学系研究科創造情報学専攻 2007年度夏プログラミング試験