7
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 1 year has passed since last update.

Rustの文字列処理

Last updated at Posted at 2022-07-12

わたくしの Rustの文字列処理コレクション です。

// https://godbolt.org/z/aT6EhP753

fn type_of<T>(_: T) -> String {
    std::any::type_name::<T>().to_string()
}

pub fn main() {
    // 文字
    let ch: char = 'a';
    assert_eq!(ch, 'a');
    // 文字列 &strとString
    let s: &str = "abcde";
    assert_eq!(s, "abcde"); // &str同士比較できる
    assert!(s == "abcde");
    let s = "";
    assert!(s.is_empty());
    // long &str
    let st = "abc\
                    def\
                    ghi";
    assert!(st == "abcdefghi");

    let word: String = "abcde".to_string();
    assert_eq!(word, String::from("abcde"));
    assert_eq!(word, "abcde"); // Stringは&strと比較できる

    // 長さ
    let st = "abcdef";
    assert_eq!(st.len(), 6);
    assert_eq!(st.len(), st.chars().count());

    // 文字列数個の結合
    let abc = "abc".to_string();
    let def = String::from("def");
    assert_eq!(format!("{}{}", abc, def), "abcdef");
    let abcdef: String = format!("{}{}", abc, def);
    assert_eq!(abcdef, "abcdef");
    assert_eq!(abcdef, abc + "def"); // String + &str
    assert_eq!("abc".to_string() + "def" + "ghi", "abcdefghi");

    // &strのVecの結合
    let strs = vec!["Red", "Green", "Blue"];
    let joined = strs.iter().fold("".to_string(), |acc, cur| acc + cur);
    assert_eq!(joined, "RedGreenBlue");
    // StringのVecの結合
    let words = vec!["Red".to_string(), "Green".to_string(), "Blue".to_string()];
    let joined = words
        .into_iter()
        .fold("".to_string(), |acc, cur| acc + &cur);
    assert_eq!(joined, "RedGreenBlue");
    let words = vec!["Red".to_string(), "Green".to_string(), "Blue".to_string()];
    let joined = words.into_iter().reduce(|acc, cur| acc + &cur).unwrap();
    assert_eq!(joined, "RedGreenBlue");
    // 別のやり方
    let words = vec!["Red".to_string(), "Green".to_string(), "Blue".to_string()];
    let joined = words.iter().flat_map(|s| s.chars()).collect::<String>();
    assert_eq!(joined, "RedGreenBlue");
    // Option(String)のVecの結合
    let words = vec![
        None,
        Some("Red".to_string()),
        None,
        Some("Green".to_string()),
        Some("Blue".to_string()),
    ];
    let joined = words
        .into_iter()
        .flatten()
        .fold("".to_string(), |acc, cur| acc + &cur);
    assert_eq!(joined, "RedGreenBlue");

    // 文字列を1文字ずつ出力
    let mut chs = vec![];
    for ch in abcdef.chars() {
        chs.push(ch);
        //print!("{} ", ch);
    }
    //println!("{:?}",chs);
    assert_eq!(chs, ['a', 'b', 'c', 'd', 'e', 'f']);
    // 位置と文字
    let mut tpls = vec![];
    for (idx, ch) in abcdef.char_indices() {
        tpls.push((idx, ch));
    }
    assert_eq!(
        tpls,
        [(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd'), (4, 'e'), (5, 'f')]
    );
    assert_eq!(tpls, abcdef.char_indices().collect::<Vec<_>>());

    // 文字列から文字のVec
    let cs = abcdef.chars().collect::<Vec<_>>();
    assert_eq!(cs, ['a', 'b', 'c', 'd', 'e', 'f']);
    // 文字のVecから文字列
    let line = cs.iter().collect::<String>();
    assert_eq!(line, abcdef);

    // 最初の1文字
    let ch: char = abcdef.chars().next().unwrap();
    assert_eq!(ch, 'a');
    // 最初から2文字目
    let ch: char = abcdef.chars().nth(1).unwrap();
    assert_eq!(ch, 'b');
    assert_eq!(abcdef.chars().nth(1), Some('b'));
    // 最後の1文字
    let ch: char = abcdef.chars().nth(abcdef.len() - 1).unwrap();
    assert_eq!(ch, 'f');
    // 1文字目以上と2文字目未満のスライス
    let st: &str = &abcdef[1..2];
    assert_eq!(st, "b");
    // 1文字目以上から4文字目未満のスライス
    let st: &str = &abcdef[1..4];
    assert_eq!(st, "bcd");
    // 1文字目以上から4文字目未満のString
    let word = abcdef.chars().skip(1).take(3).collect::<String>();
    assert_eq!(word, "bcd");

    // 最初と最後を取り除く
    let st = "[message]";
    assert_eq!(&st[1..(st.len() - 1)], "message");
    // 最初の3文字とそれ以降
    let st = "abcdef";
    let (fst, snd) = st.split_at(3);
    assert_eq!((fst, snd), ("abc", "def"));
    let (fst, snd) = (&st[..3], &st[3..]);
    assert_eq!((fst, snd), ("abc", "def"));

    // 'a'から'z'まで
    let atoz = ('a'..='z').collect::<String>();
    assert_eq!(atoz, "abcdefghijklmnopqrstuvwxyz");
    assert_eq!(atoz.len(), 26);
    // Stringの逆順
    let ztoa = atoz.chars().rev().collect::<String>();
    assert_eq!(ztoa, "zyxwvutsrqponmlkjihgfedcba");

    // 末尾に追加
    let mut word: String = "abc".to_string();
    word.push('d'); // charを追加
    assert_eq!(word, "abcd");
    word.push_str("ef"); // &strを追加
    assert_eq!(word, "abcdef");

    // 種別判定
    assert!('a'.is_ascii_alphabetic());
    assert!('a'.is_ascii_alphanumeric());
    assert!('a'.is_ascii_lowercase());
    assert!('1'.is_ascii_digit());
    assert!('$'.is_ascii_punctuation());
    assert!(' '.is_ascii_whitespace());
    // 文字を大文字にしたものを作って返す
    assert_eq!('A', 'a'.to_ascii_uppercase());
    // 文字列を大文字にしたものを作って返す
    let word = "abcdef".to_string();
    assert_eq!("ABCDEF", word.to_ascii_uppercase());
    assert_eq!("abcdef", word);
    // 文字をその場で大文字に書き換える
    let mut ascii = 'a';
    ascii.make_ascii_uppercase();
    assert_eq!('A', ascii);
    // Stringをその場で大文字に書き換える
    let mut word = "abcdef".to_string();
    word.make_ascii_uppercase();
    assert_eq!("ABCDEF", word);

    // コピーしないで大文字小文字を無視して比較
    assert!("AbcDEf".eq_ignore_ascii_case("abCdEF"));

    // 数値リテラルを数値に
    let word = "123";
    assert_eq!(Ok(123), word.parse::<i32>());
    assert_eq!(123, word.parse::<i32>().unwrap());
    //println!("{:?}", word.parse::<i32>());
    let word = "xxx";
    assert!(word.parse::<i32>().is_err());
    //println!("{:?}", word.parse::<i32>());
    // 数値を数値リテラルに
    let len = 123_usize;
    assert_eq!(len.to_string(), "123");

    // trim
    let st = "\t\n  hello world \t\n";
    //println!("{:?}", st.trim());
    assert_eq!("hello world", st.trim());
    assert_eq!("\t\n  hello world", st.trim_end());
    // 数値を取り除く
    let st = "abc1d23ef";
    assert_eq!(
        "abcdef",
        st.chars()
            .filter(|c| !c.is_ascii_digit())
            .collect::<String>()
    );
    // 特定の文字を取り除く
    let st = "03-555-1234";
    assert_eq!(
        "035551234",
        st.chars().filter(|&c| c != '-').collect::<String>()
    );
    // 文字列中の文字の置き換え - replaceするとStringが返ってくる
    let line = "paleale";
    assert_eq!("plele", line.replace('a', ""));
    assert_eq!("pAleAle", line.replace('a', "A"));
    assert_eq!("pALEALE", line.replace("ale", "ALE"));
    let st = line.replace('e', "E");
    assert_eq!(type_of(&st), "&alloc::string::String");

    // 繰り返し
    let minus = "-".to_string();
    assert_eq!(minus.repeat(8), "--------");

    // 分割
    let line = "red green  blue  ";
    let words = line.split_whitespace().collect::<Vec<&str>>();
    assert_eq!(words, ["red", "green", "blue"]);
    let line = "red,green,blue";
    let words = line.split(',').collect::<Vec<&str>>();
    assert_eq!(words, ["red", "green", "blue"]);
    // 最大分割数指定
    let line = "1.2.3.4.5.6.7.8";
    let words = line.splitn(3, '.').collect::<Vec<&str>>();
    assert_eq!(words, ["1", "2", "3.4.5.6.7.8"]);

    // 分割して単語にしてから数値に
    let line = "123,456,789";
    let words = line.split(',').collect::<Vec<&str>>();
    let nums = words
        .iter()
        .map(|&w| w.parse::<i32>().unwrap())
        .collect::<Vec<i32>>();
    assert_eq!(nums, [123, 456, 789]);

    // 分割して数値に
    let line = "123,456,789";
    let nums = line
        .split(',')
        .map(|w| w.parse::<i32>().unwrap())
        .collect::<Vec<i32>>();
    assert_eq!(nums, [123, 456, 789]);

    // 複数行文字列を1行ずつのVecに分割
    let line = "first\r\nsecond\n\nfourth";
    let lines = line.lines().collect::<Vec<&str>>();
    assert_eq!(lines, ["first", "second", "", "fourth"]);

    // 開始•終了•含む, row string
    let fname = r".\file.xlsx";
    assert!(fname.starts_with(".\\file"));
    assert!(fname.ends_with(".xlsx"));
    assert!(fname.contains("xls"));
    assert!(['a', 'i', 'u', 'e', 'o'].contains(&'e'));
    // パターンマッチ (これは正規表現ではない)
    let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect();
    assert_eq!(v, ["abc", "abc", "abc"]);
    let nv: Vec<&str> = "1ab23cde456gh".matches(char::is_numeric).collect();
    assert_eq!(nv, ["1", "2", "3", "4", "5", "6"]);

    // 検索
    let s: &str = "hello, world";
    assert_eq!(None, s.find('z'));
    assert_eq!(Some(0), s.find('h'));
    assert_eq!(Some(7), s.find("world"));
    assert_eq!(Some(6), s.find(char::is_whitespace));
    // 数字だけを抽出する
    let s: &str = "1 2 a 3 b 4 aaa 5 l 10";
    let cs: Vec<i32> = s
        .split_whitespace()
        .filter_map(|k| k.parse().ok())
        .collect::<Vec<i32>>();
    assert_eq!(cs, [1, 2, 3, 4, 5, 10]);

    // format!
    let city = "tokyo";
    let year = 2020;
    assert_eq!(format!("{1} {0}", year, city), "tokyo 2020");
    assert_eq!(
        format!("{city} {year}", year = year, city = city),
        "tokyo 2020"
    );
    // 書式指定付き format!
    let h: i64 = 0xcafe;
    assert_eq!(format!("{:#x}", h), "0xcafe");
    assert_eq!(format!("{:#X}", h), "0xCAFE");
    assert_eq!(format!("{:8x}", h), "    cafe");
    assert_eq!(format!("{:016X}", h), "000000000000CAFE");

    let s = "word";
    assert_eq!(format!("{:^8}", s), "  word  ");
    assert_eq!(format!("{:<8}", s), "word    ");
    assert_eq!(format!("{:>8}", s), "    word");

    let n = 12345678;
    assert_eq!(format!("{:+}", n), "+12345678");
    //assert f'{n:+}' == '+12345678'  Rustではできないらしい
    //assert f'{n:,}' == '12,345,678' Rustではできないらしい
    //println!("{:+}", n);
    //println!("{:,}", n);
    //let f = 3.14159;
    let f = std::f32::consts::PI;
    assert_eq!(format!("{:.3}", f), "3.142");
    // https://doc.rust-lang.org/std/fmt/

    // 前後2文字ずつ見てCamelCaseを単語に分けてVec<&str>
    let line = "RedGreenBlue";
    let border = |left: char, right: char| -> bool {
        left.is_ascii_lowercase() && right.is_ascii_uppercase()
    };
    let mut words: Vec<&str> = vec![];
    let mut from = 0;
    let mut left: char = line.chars().next().unwrap();
    for (idx, right) in line.char_indices().skip(1) {
        if border(left, right) {
            words.push(&line[from..idx]);
            from = idx;
        }
        left = right;
    }
    words.push(&line[from..]);
    assert_eq!(words, ["Red", "Green", "Blue"]);

    // 前後2文字ずつ見て大文字と小文字で分けてVec<String>
    let line = "abcDEFghi";
    let border = |left: char, right: char| -> bool {
        left.is_ascii_lowercase() != right.is_ascii_lowercase()
    };
    let mut cs: Vec<char> = vec![line.chars().next().unwrap()];
    for w in line.chars().collect::<Vec<char>>().windows(2) {
        let (left, right) = (w[0], w[1]);
        if border(left, right) {
            cs.push(',');
        }
        cs.push(right);
    }
    assert_eq!(cs, ['a', 'b', 'c', ',', 'D', 'E', 'F', ',', 'g', 'h', 'i']);
    let joined = cs.iter().collect::<String>();
    let words = joined.split(',').collect::<Vec<&str>>();
    assert_eq!(words, ["abc", "DEF", "ghi"]);

    // 前後2文字ずつ見て大文字と小文字で分けてVec<String>
    let mut cs: Vec<char> = vec![];
    let mut iter = line.chars();
    let mut left = iter.next().unwrap();
    cs.push(left);
    for right in iter {
        if border(left, right) {
            cs.push(',');
        }
        cs.push(right);
        left = right;
    }
    assert_eq!(cs, ['a', 'b', 'c', ',', 'D', 'E', 'F', ',', 'g', 'h', 'i']);
    let words = joined.split(',').collect::<Vec<&str>>();
    assert_eq!(words, ["abc", "DEF", "ghi"]);

    // 前後2文字ずつ見て大文字と小文字で分けてVec<String>
    let mut words: Vec<String> = vec![];
    let mut iter = line.chars();
    let mut left: char = iter.next().unwrap();
    let mut word: String = left.to_string();
    for right in iter {
        if border(left, right) {
            words.push(word);
            word = "".to_string();
        }
        word.push(right);
        left = right;
    }
    words.push(word);
    assert_eq!(words, ["abc", "DEF", "ghi"]);

    // 前後2文字ずつ見て大文字と小文字で分けてVec<&str>
    let mut words: Vec<&str> = vec![];
    let mut from = 0;
    let mut left: char = line.chars().next().unwrap();
    for (idx, right) in line.char_indices().skip(1) {
        if border(left, right) {
            words.push(&line[from..idx]);
            from = idx;
        }
        left = right;
    }
    words.push(&line[from..]);
    assert_eq!(words, ["abc", "DEF", "ghi"]);

    // wとbのランレングス
    let line = "wwwbbwwbbbwbwww".replace("wb", "w b").replace("bw", "b w");
    let bws = line.split_ascii_whitespace().collect::<Vec<&str>>();
    // println!("{:?}", bws);
    assert_eq!(bws, ["www", "bb", "ww", "bbb", "w", "b", "www"]);
    let run_lengthes = bws
        .iter()
        .map(|bw| (bw.chars().next().unwrap(), bw.len()))
        .collect::<Vec<(char, usize)>>();
    // println!("{:?}", run_lengthes);
    assert_eq!(
        run_lengthes,
        [
            ('w', 3),
            ('b', 2),
            ('w', 2),
            ('b', 3),
            ('w', 1),
            ('b', 1),
            ('w', 3)
        ]
    );

    /*
    grouping methods are experimental for now
    */
    // 文字列のソート
    let mut words = vec!["2nd", "1st", "3rd"];
    words.sort_unstable();
    assert_eq!(words, ["1st", "2nd", "3rd"]);
    // ソートしたものを逆順に
    words.reverse();
    assert_eq!(words, ["3rd", "2nd", "1st"]);
    // 大文字と小文字をそのままソート
    let mut words = vec!["yellow", "Magenta", "cyan", "Black"];
    words.sort_unstable();
    assert_eq!(words, ["Black", "Magenta", "cyan", "yellow"]);
    // 大文字と小文字を無視してソート
    words.sort_unstable_by(|a, b| {
        a.to_ascii_lowercase()
            .partial_cmp(&b.to_ascii_lowercase())
            .unwrap()
    });
    assert_eq!(words, ["Black", "cyan", "Magenta", "yellow"]);

    // 数値リテラルの数値の内積
    let a = vec!["1000", "100", "10", "1"];
    let b = vec!["1", "2", "3", "4"];
    let n = a.iter().zip(b.iter()).try_fold(0_i32, |acc, (fst, snd)| {
        let f = fst.parse::<i32>().ok()?;
        let s = snd.parse::<i32>().ok()?;
        Some(acc + f * s)
    });
    assert_eq!(n, Some(1234));

    // ユニコード番号
    let u = b'!';
    assert_eq!('\u{21}', char::from(u));
    let ex: char = 0x21_u8.into();
    assert_eq!(ex, '!');
    assert_eq!(char::from(0x0021_u8), '!');
    // ascii_punctuationの一部
    let mut puncs = "".to_string();
    for u in 0x0021_u8..=0x002f_u8 {
        puncs.push(u.into());
    }
    assert_eq!(puncs, r##"!"#$%&'()*+,-./"##);
    // ascii_punctuationの一部
    let punctures = (0x0021_u8..=0x002f_u8).map(char::from).collect::<Vec<_>>();
    assert!(punctures.iter().all(char::is_ascii_punctuation));
    let joined = punctures.iter().collect::<String>();
    //println!("[{}]", joined);
    assert_eq!(joined, r##"!"#$%&'()*+,-./"##);
    // ascii_punctuationの全部
    let range1 = 0x0021_u8..=0x002f_u8;
    let range2 = 0x003a_u8..=0x0040_u8;
    let range3 = 0x005b_u8..=0x0060_u8;
    let range4 = 0x007b_u8..=0x007e_u8;
    let punctures = vec![range1, range2, range3, range4]
        .into_iter()
        .flat_map(|r| r.map(char::from))
        .collect::<Vec<char>>();
    //println!("{:?}", punctures);
    assert!(punctures.iter().all(char::is_ascii_punctuation));
    let joined = punctures.iter().collect::<String>();
    //println!("[{}]", joined);
    assert_eq!(joined, r##"!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"##); // raw string

    println!("fine");
}

実行結果

添削をお願いします。
これも入れたほうがいい、も教えてください。

7
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
7
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?