問題4
import re
string = "Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can."
re.sub('[-=+,#/\?:^.@*\"※~ㆍ!』‘|\(\)\[\]`\'…》\”\“\’·]',"",string)
strarr = string.split(" ")
arr = [1, 5, 6, 7, 8, 9, 15, 16, 19]
dict_res = {}
for i in arr:
dict_res[i] = strarr[i-1][0]
for i in range(len(strarr)):
if i+1 not in arr:
dict_res[i+1] = strarr[i][:2]
print(dict_res)
""" 実行結果:{'Al': 15,
'Ar': 18,
'B': 1,
'Be': 11,
'C': 2,
'Cl': 17,
'F': 5,
'H': 0,
'He': 9,
'K': 8,
'Li': 10,
'Mi': 14,
'N': 3,
'Na': 13,
'Ne': 12,
'O': 4,
'P': 6,
'S': 7,
'Si': 16}"""
見た目悪いからordereddict 使おう。
import collections
collections.OrderedDict(sorted(dict_res.items()))
"""実行結果:OrderedDict([(1, 'H'),
(2, 'He'),
(3, 'Li'),
(4, 'Be'),
(5, 'B'),
(6, 'C'),
(7, 'N'),
(8, 'O'),
(9, 'F'),
(10, 'Ne'),
(11, 'Na'),
(12, 'Mi'),
(13, 'Al'),
(14, 'Si'),
(15, 'P'),
(16, 'S'),
(17, 'Cl'),
(18, 'Ar'),
(19, 'K'),
(20, 'Ca')])"""
問題5
input_str = "I am an NLPer"
def bigram(string):
alphabet = [string.replace(" ","")[i:i+2] for i in range(len(string)-1)]
word = [string.split()[i:i+2] for i in range(len(string.split())-1)]
return alphabet, word
a,w = bigram(input_str)
print(a,w)
#実行結果:['Ia', 'am', 'ma', 'an', 'nN', 'NL', 'LP', 'Pe', 'er', 'r', '', ''] [['I', 'am'], ['am', 'an'], ['an', 'NLPer']]