6
10

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

Rのstringiパッケージによる文字列の変換についてメモ

Last updated at Posted at 2021-04-05

はじめに

Rのstringiパッケージを用いた文字列の変換についてまとめます。

目次

Rのstringiパッケージによる文字列の変換

まず、使用する文字列ベクトルを用意しておきます。

R
library(tidyverse)
library(stringi)

str <- c("alphabet",
         "ALPHABET",
         "alphabet",
         "ALPHABET",
         ".,;:+-*/^_~&| ",
         ".,;:+-*/^_~&| ",
         "ひらがな",
         "カタカナ",
         "カタカナ",
         "漢字",
         "0123456789",
         "0123456789",
         "エタノール水溶液1,000mL",
         "50%のエタノール水溶液1,000mg")
str
#  [1] "alphabet"                       "ALPHABET"                      
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%のエタノール水溶液1,000mg"       

str_alphabet <- c("abcdefghijklmnopqrstuvwxyz",
                  "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
str_alphabet
# [1] "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

str_hiragana <- c("あかさたなはまやらわがざだばぱ",
                  "いきしちにひみいりゐぎじぢびぴ",
                  "うくすつぬふむゆるうぐずづぶぷ",
                  "えけせてねへめえれゑげぜでべぺ",
                  "おこそとのほもよろをごぞどぼぽ",
                  "きゃぎゃしゃじゃちゃぢゃにゃひゃびゃぴゃみゃりゃ",
                  "きゅぎゅしゅじゅちゅぢゅにゅひゅびゅぴゅみゅりゅ",
                  "きょぎょしょじょちょぢょにょひょびょぴょみょりょ",
                  "ん")
str_hiragana
# [1] "あかさたなはまやらわがざだばぱ"                  
# [2] "いきしちにひみいりゐぎじぢびぴ"                  
# [3] "うくすつぬふむゆるうぐずづぶぷ"                  
# [4] "えけせてねへめえれゑげぜでべぺ"                  
# [5] "おこそとのほもよろをごぞどぼぽ"                  
# [6] "きゃぎゃしゃじゃちゃぢゃにゃひゃびゃぴゃみゃりゃ"
# [7] "きゅぎゅしゅじゅちゅぢゅにゅひゅびゅぴゅみゅりゅ"
# [8] "きょぎょしょじょちょぢょにょひょびょぴょみょりょ"
# [9] "ん" 

stri_trans_general():一般的な変換

大文字・小文字

R
# Any -> 大文字
stri_trans_general(str, "Upper")
stri_trans_general(str, "upper")
stri_trans_general(str, "Any-Upper")
stri_trans_general(str, "any-upper")
#  [1] "ALPHABET"                       "ALPHABET"                      
#  [3] "ALPHABET"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000ML" "50%のエタノール水溶液1,000MG"       

# Any -> 小文字
stri_trans_general(str, "Lower")
stri_trans_general(str, "Any-Lower")
stri_trans_general(str, "lower")
stri_trans_general(str, "any-lower")
#  [1] "alphabet"                       "alphabet"                      
#  [3] "alphabet"               "alphabet"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000ml" "50%のエタノール水溶液1,000mg" 

半角・全角

R
# 半角 -> 全角
stri_trans_general(str, "Halfwidth-Fullwidth")
stri_trans_general(str, "halfwidth-fullwidth")
#  [1] "alphabet"                       "ALPHABET"                      
#  [3] "alphabet"                       "ALPHABET"                      
#  [5] ".,;:+-*/^_~&| "           ".,;:+-*/^_~&| "          
#  [7] "ひらがな"                               "カタカナ"                              
#  [9] "カタカナ"                               "漢字"                                  
# [11] "0123456789"                   "0123456789"                  
# [13] "エタノール水溶液1,000mL"         "50%のエタノール水溶液1,000mg"

stri_trans_general(str_alphabet, "halfwidth-fullwidth")
# [1] "abcdefghijklmnopqrstuvwxyz"
# [2] "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

# 全角 -> 半角
stri_trans_general(str, "Fullwidth-Halfwidth")
stri_trans_general(str, "fullwidth-halfwidth")
#  [1] "alphabet"                "ALPHABET"               
#  [3] "alphabet"                "ALPHABET"               
#  [5] ".,;:+-*/^_~&| "          ".,;:+-*/^_~&| "         
#  [7] "ひらがな"                "カタカナ"                   
#  [9] "カタカナ"                    "漢字"                   
# [11] "0123456789"              "0123456789"             
# [13] "エタノール水溶液1,000mL"      "50%のエタノール水溶液1,000mg"

stri_trans_general(str_alphabet, "halfwidth-fullwidth") %>% print() %>%
  stri_trans_general("fullwidth-halfwidth")
# [1] "abcdefghijklmnopqrstuvwxyz"
# [2] "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
# [1] "abcdefghijklmnopqrstuvwxyz"
# [2] "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

stri_trans_general(str_hiragana, "Hiragana-Katakana") %>% print() %>%
  stri_trans_general("fullwidth-halfwidth")
# [1] "アカサタナハマヤラワガザダバパ"                  
# [2] "イキシチニヒミイリヰギジヂビピ"                  
# [3] "ウクスツヌフムユルウグズヅブプ"                  
# [4] "エケセテネヘメエレヱゲゼデベペ"                  
# [5] "オコソトノホモヨロヲゴゾドボポ"                  
# [6] "キャギャシャジャチャヂャニャヒャビャピャミャリャ"
# [7] "キュギュシュジュチュヂュニュヒュビュピュミュリュ"
# [8] "キョギョショジョチョヂョニョヒョビョピョミョリョ"
# [9] "ン"                                              
# [1] "アカサタナハマヤラワガザダバパ"          "イキシチニヒミイリヰギジヂビピ"        
# [3] "ウクスツヌフムユルウグズヅブプ"          "エケセテネヘメエレヱゲゼデベペ"        
# [5] "オコソトノホモヨロヲゴゾドボポ"          "キャギャシャジャチャヂャニャヒャビャピャミャリャ"
# [7] "キュギュシュジュチュヂュニュヒュビュピュミュリュ" "キョギョショジョチョヂョニョヒョビョピョミョリョ"
# [9] "ン"                            

ひらがな・カタカナ

R
# ひらがな -> カタカナ
stri_trans_general(str, "Hiragana-Katakana")
stri_trans_general(str, "hiragana-katakana")
#  [1] "alphabet"                       "ALPHABET"                      
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ヒラガナ"                       "カタカナ"                      
#  [9] "カタカナ"                       "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%ノエタノール水溶液1,000mg" 

stri_trans_general(str_hiragana, "Hiragana-Katakana")
stri_trans_general(str_hiragana, "hiragana-katakana")
# [1] "アカサタナハマヤラワガザダバパ"                  
# [2] "イキシチニヒミイリヰギジヂビピ"                  
# [3] "ウクスツヌフムユルウグズヅブプ"                  
# [4] "エケセテネヘメエレヱゲゼデベペ"                  
# [5] "オコソトノホモヨロヲゴゾドボポ"                  
# [6] "キャギャシャジャチャヂャニャヒャビャピャミャリャ"
# [7] "キュギュシュジュチュヂュニュヒュビュピュミュリュ"
# [8] "キョギョショジョチョヂョニョヒョビョピョミョリョ"
# [9] "ン"                                              

# カタカナ -> ひらがな
stri_trans_general(str, "Katakana-Hiragana")
stri_trans_general(str, "katakana-hiragana")
#  [1] "alphabet"                       "ALPHABET"                      
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "かたかな"                      
#  [9] "かたかな"                       "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "えたのおる水溶液1,000mL" "50%のえたのおる水溶液1,000mg" 

英字・ギリシャ文字・キリル文字・ハングル

R
# 英字 -> ギリシャ文字
stri_trans_general(str, "Latin-Greek")
stri_trans_general(str, "latin-greek")
#  [1] "ἀλφαβετ"                  "ἈΛΦΑΒΕΤ"                 
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;·+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%のエタノール水溶液1,000μγ"    

stri_trans_general(str_latin, "latin-greek")
# [1] "ἀβκδεφγἱικλμνοπκρστυυυξυζ"
# [2] "ἈΒΚΔΕΦΓἹΙΚΛΜΝΟΠΚΡΣΤΥΥΥΞΥΖ"

# 英字 -> キリル文字
stri_trans_general(str, "Latin-Cyrillic")
stri_trans_general(str, "latin-cyrillic")
#  [1] "алпхабет"               "АЛПХАБЕТ"              
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%のエタノール水溶液1,000мг"    

stri_trans_general(str_latin, "latin-cyrillic")
# [1] "абцдефгхийклмнопкрстувуксыз"
# [2] "АБЦДЕФГХИЙКЛМНОПКРСТУВУКСЫЗ"

# 英字 -> ハングル
stri_trans_general(str, "Latin-Hangul")
stri_trans_general(str, "latin-hangul")
#  [1] "앒하벹"                         "앒하벹"                        
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+*/^_~&| "                  ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%のエタノール水溶液1,000믁"      

# 英字 -> Indic
stri_trans_general(str, "Latin-Interindic")
stri_trans_general(str, "latin-interindic")

ひらがな・カタカナ・英字

R
# ひらがな -> 英字
stri_trans_general(str, "Hiragana-Latin")
stri_trans_general(str, "hiragana-latin")
#  [1] "alphabet"                     "ALPHABET"                    
#  [3] "alphabet"             "ALPHABET"            
#  [5] ".,;:+-*/^_~&| "               ".,;:+-*/^_~&| "
#  [7] "hiragana"                     "カタカナ"                    
#  [9] "カタカナ"                         "漢字"                        
# [11] "0123456789"                   "0123456789"        
# [13] "エタノ̄ル水溶液1,000mL" "50%noエタノール水溶液1,000mg"    

stri_trans_general(str_hiragana, "Hiragana-Latin")
# [1] "akasatanahamayarawagazadabapa"       "ikishichinihimiiriwigijidjibipi"    
# [3] "ukusutsunufumuyuruuguzudzubupu"      "ekesetenehemeerewegezedebepe"       
# [5] "okosotonohomoyorowogozodobopo"       "kyagyashajachadjanyahyabyapyamyarya"
# [7] "kyugyushujuchudjunyuhyubyupyumyuryu" "kyogyoshojochodjonyohyobyopyomyoryo"
# [9] "n" 

# カタカナ -> 英字
stri_trans_general(str, "Katakana-Latin")
stri_trans_general(str, "katakana-latin")
#  [1] "alphabet"                     "ALPHABET"                    
#  [3] "alphabet"             "ALPHABET"            
#  [5] ".,;:+-*/^_~&| "               ".,;:+-*/^_~&| "
#  [7] "ひらがな"                     "katakana"                    
#  [9] "katakana"                     "漢字"                        
# [11] "0123456789"                   "0123456789"        
# [13] "etanōru水溶液1,000mL" "50%のetanōru水溶液1,000mg" 

# 英字 -> ひらがな
stri_trans_general(str, "Latin-Hiragana")
stri_trans_general(str, "latin-hiragana")
#  [1] "あるぷはべて"                   "あるぷはべて"                  
#  [3] "alphabet"               "ALPHABET"              
#  [5] "。、;:+-*/^_&| "                ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%のエタノール水溶液1、000むぐ"   

stri_trans_general(str_alphabet, "Latin-Hiragana")
# [1] "あぶくでふぐひじくるむのぷくるすてぅゔうくせぃず"
# [2] "あぶくでふぐひじくるむのぷくるすてぅゔうくせぃず"

# 英字 -> カタカナ
stri_trans_general(str, "Latin-Katakana")
stri_trans_general(str, "latin-katakana")
#  [1] "アルプハベテ"                   "アルプハベテ"                  
#  [3] "アルプハベテ"                   "アルプハベテ"                  
#  [5] "。、;:+-*/^_&| "                ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000ムル" "50%のエタノール水溶液1、000ムグ"   

stri_trans_general(str_alphabet, "Latin-Katakana")
# [1] "アブクデフグヒジクルムノプクルステゥヴウクセィズ"
# [2] "アブクデフグヒジクルムノプクルステゥヴウクセィズ"

# Any -> 英字
stri_trans_general(str, "Any-Latin")
stri_trans_general(str, "any-latin")
#  [1] "alphabet"                               "ALPHABET"                              
#  [3] "alphabet"                       "ALPHABET"                      
#  [5] ".,;:+-*/^_~&| "                         ".,;:+-*/^_~&| "          
#  [7] "hiragana"                               "katakana"                              
#  [9] "katakana"                               "hàn zì"                              
# [11] "0123456789"                             "0123456789"                  
# [13] "etanōru shuǐ róng yè1,000mL" "50%noetanōru shuǐ róng yè1,000mg" 

# Any -> ひらがな
stri_trans_general(str, "Any-Hiragana")
stri_trans_general(str, "any-hiragana")
#  [1] "あるぷはべて"                     "あるぷはべて"                    
#  [3] "alphabet"                 "ALPHABET"                
#  [5] ".,;:+-*/^_~&| "                   ".,;:+-*/^_~&| "    
#  [7] "ひらがな"                         "かたかな"                        
#  [9] "かたかな"                         "漢字"                            
# [11] "0123456789"                       "0123456789"            
# [13] "えたのおる水溶液1,000mL"   "50%のえたのおる水溶液1、000むぐ"

# Any -> カタカナ
stri_trans_general(str, "Any-Katakana")
stri_trans_general(str, "any-katakana")
#  [1] "アルプハベテ"                   "アルプハベテ"                  
#  [3] "アルプハベテ"                   "アルプハベテ"                  
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ヒラガナ"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000ムル" "50%ノエタノール水溶液1、000ムグ"  

Unicode Normalization

全角英数字を半角に、半角カタカナを全角に、というように “まとも” な形に変換します。'NFKC'が一番まともそう。

R
stri_trans_general(str, 'NFC')
stri_trans_general(str, 'Any-NFC')
#  [1] "alphabet"                       "ALPHABET"                      
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%のエタノール水溶液1,000mg"   

stri_trans_general(str, 'NFD')
#  [1] "alphabet"                       "ALPHABET"                      
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%のエタノール水溶液1,000mg"      

stri_trans_general(str, 'NFKD')
#  [1] "alphabet"                     "ALPHABET"                    
#  [3] "alphabet"                     "ALPHABET"                    
#  [5] ".,;:+-*/^_~&| "               ".,;:+-*/^_~&| "              
#  [7] "ひらがな"                     "カタカナ"                    
#  [9] "カタカナ"                     "漢字"                        
# [11] "0123456789"                   "0123456789"                  
# [13] "エタノール水溶液1,000mL"      "50%のエタノール水溶液1,000mg"

stri_trans_general(str, 'NFKC')
#  [1] "alphabet"                     "ALPHABET"                    
#  [3] "alphabet"                     "ALPHABET"                    
#  [5] ".,;:+-*/^_~&| "               ".,;:+-*/^_~&| "              
#  [7] "ひらがな"                     "カタカナ"                    
#  [9] "カタカナ"                     "漢字"                        
# [11] "0123456789"                   "0123456789"                  
# [13] "エタノール水溶液1,000mL"      "50%のエタノール水溶液1,000mg"

複数の変換

複数の変換を同時にもできる。

R
# カタカナ -> ひらがな and 半角 -> 全角
stri_trans_general(str, "katakana-hiragana; halfwidth-fullwidth")
#  [1] "alphabet"                       "ALPHABET"                      
#  [3] "alphabet"                       "ALPHABET"                      
#  [5] ".,;:+-*/^_~&| "           ".,;:+-*/^_~&| "          
#  [7] "ひらがな"                               "かたかな"                              
#  [9] "かたかな"                               "漢字"                                  
# [11] "0123456789"                   "0123456789"                  
# [13] "えたのおる水溶液1,000mL"         "50%のえたのおる水溶液1,000mg"

# ひらがな -> カタカナ and 全角 -> 半角
stri_trans_general(str, "hiragana-katakana; fullwidth-halfwidth")
#  [1] "alphabet"               "ALPHABET"              
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "         ".,;:+-*/^_~&| "        
#  [7] "ヒラガナ"                  "カタカナ"                  
#  [9] "カタカナ"                   "漢字"                  
# [11] "0123456789"             "0123456789"            
# [13] "エタノール水溶液1,000mL"     "50%ノエタノール水溶液1,000mg"

# 半角 -> 全角 and Any -> 大文字
stri_trans_general(str, "halfwidth-fullwidth; upper")
stri_trans_general(str, "halfwidth-fullwidth; Any-Upper")
#  [1] "ALPHABET"                       "ALPHABET"                      
#  [3] "ALPHABET"                       "ALPHABET"                      
#  [5] ".,;:+-*/^_~&| "           ".,;:+-*/^_~&| "          
#  [7] "ひらがな"                               "カタカナ"                              
#  [9] "カタカナ"                               "漢字"                                  
# [11] "0123456789"                   "0123456789"                  
# [13] "エタノール水溶液1,000ML"         "50%のエタノール水溶液1,000MG"

stri_trans_toupper(), stri_trans_tolower(), stri_trans_totitle():大文字・小文字変換

stringrパッケージのstr_to_upper(), str_to_lower(), str_to_title(), str_to_sentence()と同じようです。

R
stri_trans_toupper(str)
#  [1] "ALPHABET"                       "ALPHABET"                      
#  [3] "ALPHABET"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000ML" "50%のエタノール水溶液1,000MG"  
str_to_upper(str)
#  [1] "ALPHABET"                       "ALPHABET"                      
#  [3] "ALPHABET"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000ML" "50%のエタノール水溶液1,000MG"     

stri_trans_tolower(str)
#  [1] "alphabet"                       "alphabet"                      
#  [3] "alphabet"               "alphabet"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000ml" "50%のエタノール水溶液1,000mg"  
str_to_lower(str)
#  [1] "alphabet"                       "alphabet"                      
#  [3] "alphabet"               "alphabet"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000ml" "50%のエタノール水溶液1,000mg"   

stri_trans_totitle(str)
#  [1] "Alphabet"                       "Alphabet"                      
#  [3] "Alphabet"               "Alphabet"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000Ml" "50%のエタノール水溶液1,000mg"   
str_to_title(str)
#  [1] "Alphabet"                       "Alphabet"                      
#  [3] "Alphabet"               "Alphabet"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000Ml" "50%のエタノール水溶液1,000mg"      

stri_trans_totitle(str_c(str, " ", str))
#  [1] "Alphabet Alphabet"                                             "Alphabet Alphabet"                                            
#  [3] "Alphabet Alphabet"                             "Alphabet Alphabet"                            
#  [5] ".,;:+-*/^_~&|  .,;:+-*/^_~&| "                                 ".,;:+-*/^_~&|  .,;:+-*/^_~&| "    
#  [7] "ひらがな ひらがな"                                             "カタカナ カタカナ"                                            
#  [9] "カタカナ カタカナ"                                                     "漢字 漢字"                                                    
# [11] "0123456789 0123456789"                                         "0123456789 0123456789"                    
# [13] "エタノール水溶液1,000Ml エタノール水溶液1,000Ml" "50%のエタノール水溶液1,000mg 50%のエタノール水溶液1,000mg"     
str_to_title(str_c(str, " ", str))
#  [1] "Alphabet Alphabet"                                             "Alphabet Alphabet"                                            
#  [3] "Alphabet Alphabet"                             "Alphabet Alphabet"                            
#  [5] ".,;:+-*/^_~&|  .,;:+-*/^_~&| "                                 ".,;:+-*/^_~&|  .,;:+-*/^_~&| "    
#  [7] "ひらがな ひらがな"                                             "カタカナ カタカナ"                                            
#  [9] "カタカナ カタカナ"                                                     "漢字 漢字"                                                    
# [11] "0123456789 0123456789"                                         "0123456789 0123456789"                    
# [13] "エタノール水溶液1,000Ml エタノール水溶液1,000Ml" "50%のエタノール水溶液1,000mg 50%のエタノール水溶液1,000mg"            

stri_trans_totitle(str_c(str, " ", str), type='sentence')
#  [1] "Alphabet alphabet"                                             "Alphabet alphabet"                                            
#  [3] "Alphabet alphabet"                             "Alphabet alphabet"                            
#  [5] ".,;:+-*/^_~&|  .,;:+-*/^_~&| "                                 ".,;:+-*/^_~&|  .,;:+-*/^_~&| "    
#  [7] "ひらがな ひらがな"                                             "カタカナ カタカナ"                                            
#  [9] "カタカナ カタカナ"                                                     "漢字 漢字"                                                    
# [11] "0123456789 0123456789"                                         "0123456789 0123456789"                    
# [13] "エタノール水溶液1,000ml エタノール水溶液1,000ml" "50%のエタノール水溶液1,000mg 50%のエタノール水溶液1,000mg"            
str_to_sentence(str_c(str, " ", str))
#  [1] "Alphabet alphabet"                                             "Alphabet alphabet"                                            
#  [3] "Alphabet alphabet"                             "Alphabet alphabet"                            
#  [5] ".,;:+-*/^_~&|  .,;:+-*/^_~&| "                                 ".,;:+-*/^_~&|  .,;:+-*/^_~&| "    
#  [7] "ひらがな ひらがな"                                             "カタカナ カタカナ"                                            
#  [9] "カタカナ カタカナ"                                                     "漢字 漢字"                                                    
# [11] "0123456789 0123456789"                                         "0123456789 0123456789"                    
# [13] "エタノール水溶液1,000ml エタノール水溶液1,000ml" "50%のエタノール水溶液1,000mg 50%のエタノール水溶液1,000mg"       

stri_trans_nfc(), stri_trans_nfd() 等:Unicode Normalization

NFC, NFKC, NFD, NFKD, or NFKC_Casefold Unicode Normalization Form
全角英数字を半角に、半角カタカナを全角に、というように “まとも” な形に変換します。'NFKC'が一番まともそう。

R
# Unicode Normalization
# NFC (Canonical Decomposition, followed by Canonical Composition)
stri_trans_nfc(str)
#  [1] "alphabet"                       "ALPHABET"                      
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%のエタノール水溶液1,000mg"      

# NFD (Canonical Decomposition)
stri_trans_nfd(str)
#  [1] "alphabet"                       "ALPHABET"                      
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%のエタノール水溶液1,000mg"     

# NFKC (Compatibility Decomposition, followed by Canonical Composition)
stri_trans_nfkc(str)
#  [1] "alphabet"                     "ALPHABET"                    
#  [3] "alphabet"                     "ALPHABET"                    
#  [5] ".,;:+-*/^_~&| "               ".,;:+-*/^_~&| "              
#  [7] "ひらがな"                     "カタカナ"                    
#  [9] "カタカナ"                     "漢字"                        
# [11] "0123456789"                   "0123456789"                  
# [13] "エタノール水溶液1,000mL"      "50%のエタノール水溶液1,000mg"

# NFKD (Compatibility Decomposition)
stri_trans_nfkd(str)
#  [1] "alphabet"                     "ALPHABET"                    
#  [3] "alphabet"                     "ALPHABET"                    
#  [5] ".,;:+-*/^_~&| "               ".,;:+-*/^_~&| "              
#  [7] "ひらがな"                     "カタカナ"                    
#  [9] "カタカナ"                     "漢字"                        
# [11] "0123456789"                   "0123456789"                  
# [13] "エタノール水溶液1,000mL"      "50%のエタノール水溶液1,000mg"

# NFKC_Casefold (combination of NFKC, case folding, and removing ignorable characters which was introduced with Unicode 5.2)
stri_trans_nfkc_casefold(str)
#  [1] "alphabet"                     "alphabet"                    
#  [3] "alphabet"                     "alphabet"                    
#  [5] ".,;:+-*/^_~&| "               ".,;:+-*/^_~&| "              
#  [7] "ひらがな"                     "カタカナ"                    
#  [9] "カタカナ"                     "漢字"                        
# [11] "0123456789"                   "0123456789"                  
# [13] "エタノール水溶液1,000ml"      "50%のエタノール水溶液1,000mg"

stri_trans_isnfc(str)
#  [1] TRUE TRUE
#  [3] TRUE TRUE
#  [5] TRUE TRUE
#  [7] TRUE TRUE
#  [9] TRUE TRUE
# [11] TRUE TRUE
# [13] TRUE TRUE

stri_trans_isnfd(str)
#  [1]  TRUE  TRUE
#  [3]  TRUE  TRUE
#  [5]  TRUE  TRUE
#  [7] FALSE  TRUE
#  [9]  TRUE  TRUE
# [11]  TRUE  TRUE
# [13]  TRUE  TRUE

stri_trans_isnfkd(str)
#  [1]  TRUE  TRUE
#  [3] FALSE FALSE
#  [5]  TRUE FALSE
#  [7] FALSE  TRUE
#  [9] FALSE  TRUE
# [11]  TRUE FALSE
# [13] FALSE FALSE

stri_trans_isnfkc(str)
#  [1]  TRUE  TRUE
#  [3] FALSE FALSE
#  [5]  TRUE FALSE
#  [7]  TRUE  TRUE
#  [9] FALSE  TRUE
# [11]  TRUE FALSE
# [13] FALSE FALSE

stri_trans_isnfkc_casefold(str)
#  [1]  TRUE FALSE
#  [3] FALSE FALSE
#  [5]  TRUE FALSE
#  [7]  TRUE  TRUE
#  [9] FALSE  TRUE
# [11]  TRUE FALSE
# [13] FALSE FALSE

NFKCとNFKDは同じように見えますが、違うようです。

R
stri_trans_nfkc(str) == stri_trans_nfkd(str)
#  [1]  TRUE  TRUE
#  [3]  TRUE  TRUE
#  [5]  TRUE  TRUE
#  [7] FALSE  TRUE
#  [9]  TRUE  TRUE
# [11]  TRUE  TRUE
# [13]  TRUE  TRUE
str_length(stri_trans_nfkc("ひらがな"))
# [1] 4
str_length(stri_trans_nfkd("ひらがな"))
# [1] 5

stri_trans_char():対応する1文字ずつの置換

対応する1文字ずつを置換します。

R
# 小文字英字 -> 大文字英字
stri_trans_char(str, "abcdefghijklmnopqrstuvwxyz", "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
#  [1] "ALPHABET"                       "ALPHABET"                      
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%のエタノール水溶液1,000MG"      

# 半角英字 -> 全角英字
stri_trans_char(str, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ")
#  [1] "alphabet"                       "ALPHABET"              
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%のエタノール水溶液1,000mg"      

# 小文字a・i・u・e・o -> 全角大文字A・I・U・E・O
stri_trans_char(str, "aiueo", "AIUEO")
stri_trans_char(str, "aiueo", stri_trans_general("aiueo", "halfwidth-fullwidth; upper"))
#  [1] "AlphAbEt"                    "ALPHABET"                      
#  [3] "alphabet"               "ALPHABET"              
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1,000mL" "50%のエタノール水溶液1,000mg"   

# 半角・全角カンマ -> 半角・全角アンダースコア
stri_trans_char(str, ",,", "__")
#  [1] "alphabet"                       "ALPHABET"                      
#  [3] "alphabet"               "ALPHABET"              
#  [5] "._;:+-*/^_~&| "                 "._;:+-*/^_~&| "  
#  [7] "ひらがな"                       "カタカナ"                      
#  [9] "カタカナ"                           "漢字"                          
# [11] "0123456789"                     "0123456789"          
# [13] "エタノール水溶液1_000mL" "50%のエタノール水溶液1_000mg"      

# 半角数字・カンマ -> 全角数字・カンマ
stri_trans_char(str, "0123456789,", "0123456789,")
stri_trans_char(str, "0123456789,", stri_trans_general("0123456789,", "halfwidth-fullwidth"))
#  [1] "alphabet"                        "ALPHABET"                       
#  [3] "alphabet"                "ALPHABET"               
#  [5] ".,;:+-*/^_~&| "                 ".,;:+-*/^_~&| "   
#  [7] "ひらがな"                        "カタカナ"                       
#  [9] "カタカナ"                            "漢字"                           
# [11] "0123456789"            "0123456789"           
# [13] "エタノール水溶液1,000mL"  "50%のエタノール水溶液1,000mg"

# アラビア数字 -> 漢数字
stri_trans_char(str, "0123456789,0123456789,", "〇一二三四五六七八九、〇一二三四五六七八九、")
#  [1] "alphabet"                        "ALPHABET"                       
#  [3] "alphabet"                "ALPHABET"               
#  [5] ".、;:+-*/^_~&| "                 ".、;:+-*/^_~&| "   
#  [7] "ひらがな"                        "カタカナ"                       
#  [9] "カタカナ"                            "漢字"                           
# [11] "〇一二三四五六七八九"            "〇一二三四五六七八九"           
# [13] "エタノール水溶液一、〇〇〇mL"  "五〇%のエタノール水溶液一、〇〇〇mg"

stri_datetime_format():日付フォーマット

R
## 日付
date <- stri_datetime_create(1964, 10, 10)
date
# [1] "1964-10-10 12:00:00 JST"
stri_datetime_format(date, "date_long", locale = "@calendar=japanese")
# [1] "昭和39年10月10日"
stri_datetime_format(date, "date_full", locale = "@calendar=japanese")
# [1] "昭和39年10月10日土曜日"
stri_datetime_format(date, "date_full", locale = "ja_JP@calendar=japanese")
# [1] "昭和39年10月10日土曜日"
stri_datetime_format(date, "date_full", locale = "@calendar=us")
# [1] "1964年10月10日土曜日"
stri_datetime_format(date, "date_full", locale = "@calendar=hebrew")
# [1] "AM5725年2月4日土曜日"

参考

6
10
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
6
10

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?