More than 5 years have passed since last update.

Pythonの正規表現が覚えられない

Last updated at 2020-01-21Posted at 2020-01-21

使い方のかんたんなメモ

import re
ptn = re.compile(r"hoge+")  # 使い回すならcompileしたほうが速い
ptn_with_capture = re.compile(r"(hoge)e*")  # キャプチャはカッコを使う
string = r"hogee_qwerty_hogeeeeee"

# マッチする最初の部分を取得
first_matched_string = re.search(ptn, string).group()
print(first_matched_string)
# => hogee
# キャプチャをつけるとgroup(番号)でその部分だけ取得
first_matched_string = re.search(ptn_with_capture, string).group(1)
print(first_matched_string)
# => hoge

# マッチするすべての部分をリストで取得
matched_string_list = re.findall(ptn, string)
print(matched_string_list)
# => ['hogee', 'hogeeeeee']
# キャプチャをつけるとキャプチャ部分だけ取得
matched_string_list = re.findall(ptn_with_capture, string)
print(matched_string_list)
# => ['hoge', 'hoge']

# マッチするすべての部分をイテレータで取得
matched_string_iter = re.finditer(ptn, string)
print([ s.group() for s in matched_string_iter])
# => ['hogee', 'hogeeeeee']

# マッチする部分で文字列を分割
split_strings = re.split(ptn, string)
print(split_strings)
# => ['', '_qwerty_', '']

# マッチする部分を別の文字列で置換
replace_with = r"→\1←"  # バックスラッシュと番号でキャプチャしたものを使う。 
substituted_string = re.sub(ptn_with_capture, replace_with, string)
print(substituted_string)
# => →hoge←_qwerty_→hoge←

# 最小マッチ
minimal_ptn = re.compile(r"h.*?e")  # *や?、+など繰り返しを表す記号の後ろに?をつけて最小マッチ。
minimal_matched_string = re.search(minimal_ptn, string)
print(minimal_matched_string.group())
# => hoge

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up