わたくしの Rustの文字列処理コレクション です。
// https://godbolt.org/z/aT6EhP753
fn type_of<T>(_: T) -> String {
std::any::type_name::<T>().to_string()
}
pub fn main() {
// 文字
let ch: char = 'a';
assert_eq!(ch, 'a');
// 文字列 &strとString
let s: &str = "abcde";
assert_eq!(s, "abcde"); // &str同士比較できる
assert!(s == "abcde");
let s = "";
assert!(s.is_empty());
// long &str
let st = "abc\
def\
ghi";
assert!(st == "abcdefghi");
let word: String = "abcde".to_string();
assert_eq!(word, String::from("abcde"));
assert_eq!(word, "abcde"); // Stringは&strと比較できる
// 長さ
let st = "abcdef";
assert_eq!(st.len(), 6);
assert_eq!(st.len(), st.chars().count());
// 文字列数個の結合
let abc = "abc".to_string();
let def = String::from("def");
assert_eq!(format!("{}{}", abc, def), "abcdef");
let abcdef: String = format!("{}{}", abc, def);
assert_eq!(abcdef, "abcdef");
assert_eq!(abcdef, abc + "def"); // String + &str
assert_eq!("abc".to_string() + "def" + "ghi", "abcdefghi");
// &strのVecの結合
let strs = vec!["Red", "Green", "Blue"];
let joined = strs.iter().fold("".to_string(), |acc, cur| acc + cur);
assert_eq!(joined, "RedGreenBlue");
// StringのVecの結合
let words = vec!["Red".to_string(), "Green".to_string(), "Blue".to_string()];
let joined = words
.into_iter()
.fold("".to_string(), |acc, cur| acc + &cur);
assert_eq!(joined, "RedGreenBlue");
let words = vec!["Red".to_string(), "Green".to_string(), "Blue".to_string()];
let joined = words.into_iter().reduce(|acc, cur| acc + &cur).unwrap();
assert_eq!(joined, "RedGreenBlue");
// 別のやり方
let words = vec!["Red".to_string(), "Green".to_string(), "Blue".to_string()];
let joined = words.iter().flat_map(|s| s.chars()).collect::<String>();
assert_eq!(joined, "RedGreenBlue");
// Option(String)のVecの結合
let words = vec![
None,
Some("Red".to_string()),
None,
Some("Green".to_string()),
Some("Blue".to_string()),
];
let joined = words
.into_iter()
.flatten()
.fold("".to_string(), |acc, cur| acc + &cur);
assert_eq!(joined, "RedGreenBlue");
// 文字列を1文字ずつ出力
let mut chs = vec![];
for ch in abcdef.chars() {
chs.push(ch);
//print!("{} ", ch);
}
//println!("{:?}",chs);
assert_eq!(chs, ['a', 'b', 'c', 'd', 'e', 'f']);
// 位置と文字
let mut tpls = vec![];
for (idx, ch) in abcdef.char_indices() {
tpls.push((idx, ch));
}
assert_eq!(
tpls,
[(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd'), (4, 'e'), (5, 'f')]
);
assert_eq!(tpls, abcdef.char_indices().collect::<Vec<_>>());
// 文字列から文字のVec
let cs = abcdef.chars().collect::<Vec<_>>();
assert_eq!(cs, ['a', 'b', 'c', 'd', 'e', 'f']);
// 文字のVecから文字列
let line = cs.iter().collect::<String>();
assert_eq!(line, abcdef);
// 最初の1文字
let ch: char = abcdef.chars().next().unwrap();
assert_eq!(ch, 'a');
// 最初から2文字目
let ch: char = abcdef.chars().nth(1).unwrap();
assert_eq!(ch, 'b');
assert_eq!(abcdef.chars().nth(1), Some('b'));
// 最後の1文字
let ch: char = abcdef.chars().nth(abcdef.len() - 1).unwrap();
assert_eq!(ch, 'f');
// 1文字目以上と2文字目未満のスライス
let st: &str = &abcdef[1..2];
assert_eq!(st, "b");
// 1文字目以上から4文字目未満のスライス
let st: &str = &abcdef[1..4];
assert_eq!(st, "bcd");
// 1文字目以上から4文字目未満のString
let word = abcdef.chars().skip(1).take(3).collect::<String>();
assert_eq!(word, "bcd");
// 最初と最後を取り除く
let st = "[message]";
assert_eq!(&st[1..(st.len() - 1)], "message");
// 最初の3文字とそれ以降
let st = "abcdef";
let (fst, snd) = st.split_at(3);
assert_eq!((fst, snd), ("abc", "def"));
let (fst, snd) = (&st[..3], &st[3..]);
assert_eq!((fst, snd), ("abc", "def"));
// 'a'から'z'まで
let atoz = ('a'..='z').collect::<String>();
assert_eq!(atoz, "abcdefghijklmnopqrstuvwxyz");
assert_eq!(atoz.len(), 26);
// Stringの逆順
let ztoa = atoz.chars().rev().collect::<String>();
assert_eq!(ztoa, "zyxwvutsrqponmlkjihgfedcba");
// 末尾に追加
let mut word: String = "abc".to_string();
word.push('d'); // charを追加
assert_eq!(word, "abcd");
word.push_str("ef"); // &strを追加
assert_eq!(word, "abcdef");
// 種別判定
assert!('a'.is_ascii_alphabetic());
assert!('a'.is_ascii_alphanumeric());
assert!('a'.is_ascii_lowercase());
assert!('1'.is_ascii_digit());
assert!('$'.is_ascii_punctuation());
assert!(' '.is_ascii_whitespace());
// 文字を大文字にしたものを作って返す
assert_eq!('A', 'a'.to_ascii_uppercase());
// 文字列を大文字にしたものを作って返す
let word = "abcdef".to_string();
assert_eq!("ABCDEF", word.to_ascii_uppercase());
assert_eq!("abcdef", word);
// 文字をその場で大文字に書き換える
let mut ascii = 'a';
ascii.make_ascii_uppercase();
assert_eq!('A', ascii);
// Stringをその場で大文字に書き換える
let mut word = "abcdef".to_string();
word.make_ascii_uppercase();
assert_eq!("ABCDEF", word);
// コピーしないで大文字小文字を無視して比較
assert!("AbcDEf".eq_ignore_ascii_case("abCdEF"));
// 数値リテラルを数値に
let word = "123";
assert_eq!(Ok(123), word.parse::<i32>());
assert_eq!(123, word.parse::<i32>().unwrap());
//println!("{:?}", word.parse::<i32>());
let word = "xxx";
assert!(word.parse::<i32>().is_err());
//println!("{:?}", word.parse::<i32>());
// 数値を数値リテラルに
let len = 123_usize;
assert_eq!(len.to_string(), "123");
// trim
let st = "\t\n hello world \t\n";
//println!("{:?}", st.trim());
assert_eq!("hello world", st.trim());
assert_eq!("\t\n hello world", st.trim_end());
// 数値を取り除く
let st = "abc1d23ef";
assert_eq!(
"abcdef",
st.chars()
.filter(|c| !c.is_ascii_digit())
.collect::<String>()
);
// 特定の文字を取り除く
let st = "03-555-1234";
assert_eq!(
"035551234",
st.chars().filter(|&c| c != '-').collect::<String>()
);
// 文字列中の文字の置き換え - replaceするとStringが返ってくる
let line = "paleale";
assert_eq!("plele", line.replace('a', ""));
assert_eq!("pAleAle", line.replace('a', "A"));
assert_eq!("pALEALE", line.replace("ale", "ALE"));
let st = line.replace('e', "E");
assert_eq!(type_of(&st), "&alloc::string::String");
// 繰り返し
let minus = "-".to_string();
assert_eq!(minus.repeat(8), "--------");
// 分割
let line = "red green blue ";
let words = line.split_whitespace().collect::<Vec<&str>>();
assert_eq!(words, ["red", "green", "blue"]);
let line = "red,green,blue";
let words = line.split(',').collect::<Vec<&str>>();
assert_eq!(words, ["red", "green", "blue"]);
// 最大分割数指定
let line = "1.2.3.4.5.6.7.8";
let words = line.splitn(3, '.').collect::<Vec<&str>>();
assert_eq!(words, ["1", "2", "3.4.5.6.7.8"]);
// 分割して単語にしてから数値に
let line = "123,456,789";
let words = line.split(',').collect::<Vec<&str>>();
let nums = words
.iter()
.map(|&w| w.parse::<i32>().unwrap())
.collect::<Vec<i32>>();
assert_eq!(nums, [123, 456, 789]);
// 分割して数値に
let line = "123,456,789";
let nums = line
.split(',')
.map(|w| w.parse::<i32>().unwrap())
.collect::<Vec<i32>>();
assert_eq!(nums, [123, 456, 789]);
// 複数行文字列を1行ずつのVecに分割
let line = "first\r\nsecond\n\nfourth";
let lines = line.lines().collect::<Vec<&str>>();
assert_eq!(lines, ["first", "second", "", "fourth"]);
// 開始•終了•含む, row string
let fname = r".\file.xlsx";
assert!(fname.starts_with(".\\file"));
assert!(fname.ends_with(".xlsx"));
assert!(fname.contains("xls"));
assert!(['a', 'i', 'u', 'e', 'o'].contains(&'e'));
// パターンマッチ (これは正規表現ではない)
let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect();
assert_eq!(v, ["abc", "abc", "abc"]);
let nv: Vec<&str> = "1ab23cde456gh".matches(char::is_numeric).collect();
assert_eq!(nv, ["1", "2", "3", "4", "5", "6"]);
// 検索
let s: &str = "hello, world";
assert_eq!(None, s.find('z'));
assert_eq!(Some(0), s.find('h'));
assert_eq!(Some(7), s.find("world"));
assert_eq!(Some(6), s.find(char::is_whitespace));
// 数字だけを抽出する
let s: &str = "1 2 a 3 b 4 aaa 5 l 10";
let cs: Vec<i32> = s
.split_whitespace()
.filter_map(|k| k.parse().ok())
.collect::<Vec<i32>>();
assert_eq!(cs, [1, 2, 3, 4, 5, 10]);
// format!
let city = "tokyo";
let year = 2020;
assert_eq!(format!("{1} {0}", year, city), "tokyo 2020");
assert_eq!(
format!("{city} {year}", year = year, city = city),
"tokyo 2020"
);
// 書式指定付き format!
let h: i64 = 0xcafe;
assert_eq!(format!("{:#x}", h), "0xcafe");
assert_eq!(format!("{:#X}", h), "0xCAFE");
assert_eq!(format!("{:8x}", h), " cafe");
assert_eq!(format!("{:016X}", h), "000000000000CAFE");
let s = "word";
assert_eq!(format!("{:^8}", s), " word ");
assert_eq!(format!("{:<8}", s), "word ");
assert_eq!(format!("{:>8}", s), " word");
let n = 12345678;
assert_eq!(format!("{:+}", n), "+12345678");
//assert f'{n:+}' == '+12345678' Rustではできないらしい
//assert f'{n:,}' == '12,345,678' Rustではできないらしい
//println!("{:+}", n);
//println!("{:,}", n);
//let f = 3.14159;
let f = std::f32::consts::PI;
assert_eq!(format!("{:.3}", f), "3.142");
// https://doc.rust-lang.org/std/fmt/
// 前後2文字ずつ見てCamelCaseを単語に分けてVec<&str>
let line = "RedGreenBlue";
let border = |left: char, right: char| -> bool {
left.is_ascii_lowercase() && right.is_ascii_uppercase()
};
let mut words: Vec<&str> = vec![];
let mut from = 0;
let mut left: char = line.chars().next().unwrap();
for (idx, right) in line.char_indices().skip(1) {
if border(left, right) {
words.push(&line[from..idx]);
from = idx;
}
left = right;
}
words.push(&line[from..]);
assert_eq!(words, ["Red", "Green", "Blue"]);
// 前後2文字ずつ見て大文字と小文字で分けてVec<String>
let line = "abcDEFghi";
let border = |left: char, right: char| -> bool {
left.is_ascii_lowercase() != right.is_ascii_lowercase()
};
let mut cs: Vec<char> = vec![line.chars().next().unwrap()];
for w in line.chars().collect::<Vec<char>>().windows(2) {
let (left, right) = (w[0], w[1]);
if border(left, right) {
cs.push(',');
}
cs.push(right);
}
assert_eq!(cs, ['a', 'b', 'c', ',', 'D', 'E', 'F', ',', 'g', 'h', 'i']);
let joined = cs.iter().collect::<String>();
let words = joined.split(',').collect::<Vec<&str>>();
assert_eq!(words, ["abc", "DEF", "ghi"]);
// 前後2文字ずつ見て大文字と小文字で分けてVec<String>
let mut cs: Vec<char> = vec![];
let mut iter = line.chars();
let mut left = iter.next().unwrap();
cs.push(left);
for right in iter {
if border(left, right) {
cs.push(',');
}
cs.push(right);
left = right;
}
assert_eq!(cs, ['a', 'b', 'c', ',', 'D', 'E', 'F', ',', 'g', 'h', 'i']);
let words = joined.split(',').collect::<Vec<&str>>();
assert_eq!(words, ["abc", "DEF", "ghi"]);
// 前後2文字ずつ見て大文字と小文字で分けてVec<String>
let mut words: Vec<String> = vec![];
let mut iter = line.chars();
let mut left: char = iter.next().unwrap();
let mut word: String = left.to_string();
for right in iter {
if border(left, right) {
words.push(word);
word = "".to_string();
}
word.push(right);
left = right;
}
words.push(word);
assert_eq!(words, ["abc", "DEF", "ghi"]);
// 前後2文字ずつ見て大文字と小文字で分けてVec<&str>
let mut words: Vec<&str> = vec![];
let mut from = 0;
let mut left: char = line.chars().next().unwrap();
for (idx, right) in line.char_indices().skip(1) {
if border(left, right) {
words.push(&line[from..idx]);
from = idx;
}
left = right;
}
words.push(&line[from..]);
assert_eq!(words, ["abc", "DEF", "ghi"]);
// wとbのランレングス
let line = "wwwbbwwbbbwbwww".replace("wb", "w b").replace("bw", "b w");
let bws = line.split_ascii_whitespace().collect::<Vec<&str>>();
// println!("{:?}", bws);
assert_eq!(bws, ["www", "bb", "ww", "bbb", "w", "b", "www"]);
let run_lengthes = bws
.iter()
.map(|bw| (bw.chars().next().unwrap(), bw.len()))
.collect::<Vec<(char, usize)>>();
// println!("{:?}", run_lengthes);
assert_eq!(
run_lengthes,
[
('w', 3),
('b', 2),
('w', 2),
('b', 3),
('w', 1),
('b', 1),
('w', 3)
]
);
/*
grouping methods are experimental for now
*/
// 文字列のソート
let mut words = vec!["2nd", "1st", "3rd"];
words.sort_unstable();
assert_eq!(words, ["1st", "2nd", "3rd"]);
// ソートしたものを逆順に
words.reverse();
assert_eq!(words, ["3rd", "2nd", "1st"]);
// 大文字と小文字をそのままソート
let mut words = vec!["yellow", "Magenta", "cyan", "Black"];
words.sort_unstable();
assert_eq!(words, ["Black", "Magenta", "cyan", "yellow"]);
// 大文字と小文字を無視してソート
words.sort_unstable_by(|a, b| {
a.to_ascii_lowercase()
.partial_cmp(&b.to_ascii_lowercase())
.unwrap()
});
assert_eq!(words, ["Black", "cyan", "Magenta", "yellow"]);
// 数値リテラルの数値の内積
let a = vec!["1000", "100", "10", "1"];
let b = vec!["1", "2", "3", "4"];
let n = a.iter().zip(b.iter()).try_fold(0_i32, |acc, (fst, snd)| {
let f = fst.parse::<i32>().ok()?;
let s = snd.parse::<i32>().ok()?;
Some(acc + f * s)
});
assert_eq!(n, Some(1234));
// ユニコード番号
let u = b'!';
assert_eq!('\u{21}', char::from(u));
let ex: char = 0x21_u8.into();
assert_eq!(ex, '!');
assert_eq!(char::from(0x0021_u8), '!');
// ascii_punctuationの一部
let mut puncs = "".to_string();
for u in 0x0021_u8..=0x002f_u8 {
puncs.push(u.into());
}
assert_eq!(puncs, r##"!"#$%&'()*+,-./"##);
// ascii_punctuationの一部
let punctures = (0x0021_u8..=0x002f_u8).map(char::from).collect::<Vec<_>>();
assert!(punctures.iter().all(char::is_ascii_punctuation));
let joined = punctures.iter().collect::<String>();
//println!("[{}]", joined);
assert_eq!(joined, r##"!"#$%&'()*+,-./"##);
// ascii_punctuationの全部
let range1 = 0x0021_u8..=0x002f_u8;
let range2 = 0x003a_u8..=0x0040_u8;
let range3 = 0x005b_u8..=0x0060_u8;
let range4 = 0x007b_u8..=0x007e_u8;
let punctures = vec![range1, range2, range3, range4]
.into_iter()
.flat_map(|r| r.map(char::from))
.collect::<Vec<char>>();
//println!("{:?}", punctures);
assert!(punctures.iter().all(char::is_ascii_punctuation));
let joined = punctures.iter().collect::<String>();
//println!("[{}]", joined);
assert_eq!(joined, r##"!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"##); // raw string
println!("fine");
}
実行結果
添削をお願いします。
これも入れたほうがいい、も教えてください。