# はじめに

これから、数週間に分けて 自然言語処理100ノック 2015の問題を解いていきます。

また、Pythonは入門 Python3CodingBatで簡単に勉強した程度なので、おかしな処理やこうした方がいいよってことがあればコメントください。

## 0. 文字列の逆順

ex0.py
```# -*- coding: utf-8 -*-
if __name__ == '__main__':
str_ = "stressed"

print str_[::-1] # => desserts
```

## 1. 「パタトクカシーー」

ex1.py
```# -*- coding: utf-8 -*-

if __name__ == '__main__':
str_ = 'パタトクカシーー'

print str_[0::2] # => str_[1:len(str_):2]と同じ
```

## 2. 「パトカー」＋「タクシー」＝「パタトクカシーー」

ex2.py
```# -*- coding: utf-8 -*-
if __name__ == '__main__':
str1 = "パトカー"
str2 = "タクシー"

result = ''
if len(str1) > len(str2):
size = len(str1)
else:
size = len(str2)

for i in range(size):
result += str1[i] + str2[i]

print result
# 下のコードも同じ結果になるがわかりにくい
# print( ''.join( [str1[i] + str2[i] for i in range(size)] ) )

```

## 3. 円周率

ex2.py
```# -*- coding: utf-8 -*-

if __name__ == '__main__':
str_ = 'Now I need a drink, alcoholic of course, after the heavy lectures involving quantum mechanics.'
str_ = str_.replace(',', '')
str_ = str_.replace('.', '')
words = str_.split(' ')

print [len(word) for word in words] # => [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 8, 9, 7, 9]

```

## 4. 元素記号

ex4.py
```# -*- coding: utf-8 -*-

if __name__ == '__main__':
str_ = 'Hi He Lied Because Boron Could Not Oxidize Fluorine. New Nations Might Also Sign Peace Security Clause. Arthur King Can.'
one = [0, 4, 5, 6, 7, 8, 14, 15]

str_ = str_.replace('.', '')

words = str_.split(' ')

result = dict()

for i in range( len(words) ):
if i in one:
result[ words[i][0] ] = i
else:
result[ words[i][0:2] ] = i

print result
# => {'Be': 3, 'C': 5, 'B': 4, 'Ca': 19, 'F': 8, 'S': 15, 'H': 0, 'Ki': 18, 'Al': 12, 'Mi': 11, 'Ne': 9, 'O': 7, 'Li': 2, 'P': 14, 'Si': 13, 'Ar': 17, 'Na': 10, 'N': 6, 'Cl': 16, 'He': 1}
```

## 5. n-gram

ex5.py
```# -*- coding: utf-8 -*-

import sys

def get_word_ngram(sentence, n):
words = sentence.split(' ')           # 文を空白区切りで単語ごとに分ける

result = []
for i in range( len(words) - (n - 1) ):
result.append( '-'.join(words[i:i+n:1]) )

return result

def get_char_ngram(sentence, n):
sentence = sentence.replace(' ', '')  #空白がある場合、取り除く

result = []
for i in range( len(sentence) - (n - 1) ):
result.append( ''.join(sentence[i:i+n:1]) )

return result

if __name__ == '__main__':
if len(sys.argv) < 3:
print 'usage: ' + sys.argv[0] + ' n-gram type(word or char)'
sys.exit()

sentence = 'This is apple computer'
if sys.argv[2] == 'word':
# n = 2 => ['This-is', 'is-apple', 'apple-computer']
print get_word_ngram(sentence, int(sys.argv[1]))
elif sys.argv[2] == 'char':
# n = 2 => ['Th', 'hi', 'is', 'si', 'is', 'sa', 'ap', 'pp', 'pl', 'le', 'ec', 'co', 'om', 'mp', 'pu', 'ut', 'te', 'er']
print get_char_ngram(sentence, int(sys.argv[1]))
else:
print 'type error'

```

## 6. 集合

ex6.py
```# -*- coding: utf-8 -*-
import sys

def get_char_ngram(sentence, n):
sentence = sentence.replace(' ', '')  #空白がある場合、取り除く

result = []
for i in range( len(sentence) - (n - 1) ):
result.append( ''.join(sentence[i:i+n:1]) )

return result

if __name__ == '__main__':

sentence1 = 'paraparaparadise'
sentence2 = 'paragraph'

X = set( get_char_ngram(sentence1, 2) )
Y = set( get_char_ngram(sentence2, 2) )

print '和集合 = ',
print X | Y  # => set(['ad', 'ag', 'di', 'is', 'ap', 'pa', 'ra', 'ph', 'ar', 'se', 'gr'])

print '積集合 = ',
print X & Y  # => set(['ap', 'pa', 'ar', 'ra'])

print '差集合(X - Y) = ',
print X - Y  # => set(['is', 'ad', 'se', 'di'])

print '差集合(Y - X) = ',
print Y - X  # => set(['ph', 'gr', 'ag'])

if 'se' in X:
print '\'se\' in X' # 表示される

if 'se' in Y:
print '\'se\' in Y' # 表示されない
```

## 7. テンプレートによる文生成

ex7.py
```# -*- coding: utf-8 -*-

def get_template(x, y, z):
return '%d時の%sは%lf' % (x, y, z)

if __name__ == '__main__':
print get_template(12, '気温', 22.4)  # => 12時の気温は22.400000
```

## 8. 暗号文

ex8.py
```# -*- coding: utf-8 -*-

def cipher(plain_sentence):
sentence = ''
for char in plain_sentence:
if char.islower():
sentence += chr( 219 - ord(char) )
else:
sentence += char

return sentence

if __name__ == '__main__':
plain_sentence = 'aAaAaA'
encrypted_sentence = cipher( plain_sentence )
decrypted_sentence = cipher( encrypted_sentence )
print 'plain sentence     = ' + plain_sentence      # => aAaAaA
print 'encrypted sentence = ' + encrypted_sentence  # => zAzAzA
print 'decrypted sentence = ' + decrypted_sentence  # => aAaAaA

```

## 9. Typoglycemia

ex9.py
```# -*- coding: utf-8 -*-
import random

def typoglycemia(sentence):
result = []
for word in words:
if len(word) > 4:
# 文字列のシャッフルはできないので、強引にシャッフル
result.append( ''.join( random.sample(word,len(word)) ) )
else:
result.append(word)
return result

if __name__ == '__main__':
sentence = 'I couldn\'t believe that I could actually understand what I was reading : the phenomenal power of the human mind .'
words = sentence.split(' ')

typoglycemia = typoglycemia(words)

print typoglycemia   # => ['I', "tudnc'ol", 'ebieelv', 'that', 'I', 'ucldo', 'ltayucla', 'sdduanntre', 'what', 'I', 'was', 'aendgri', ':', 'the', 'manehnploe', 'erowp', 'of', 'the', 'ahmun', 'mind', '.']
```

# 感想

Sign up for free and join this conversation.
If you already have a Qiita account log in.