GrepUtilTest.cs
using Microsoft.VisualStudio.TestTools.UnitTesting;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
[TestClass]
public class GrepUtilTest
{
[TestMethod]
public void TestGrep()
{
string input = @"Java 1 is nice.
Java 2 is fine.
Java 3 is good.
Java 4 is good.
Java 5 is fine.
Java 6 is good.
Java 7 is good.
Java 8 is marvelous.";
using (var sr = new StringReader(input))
{
// GrepUtil.SearcherFactory = words => new Searcher(words);
List<string> result = GrepUtil.Grep(sr, "fine", "marvelous").ToList();
Assert.AreEqual(result.Count, 3);
CollectionAssert.AreEqual(result, new[]{
"2: Java 2 is fine.",
"5: Java 5 is fine.",
"8: Java 8 is marvelous."
});
}
}
}
public static class GrepUtil
{
public static Func<string[], Searcher> SearcherFactory
= words => new Searcher(words);
public static IEnumerable<String> Grep(TextReader reader, params string[] words)
{
var searcher = SearcherFactory(words);
string line;
for (var i = 1; (line = reader.ReadLine()) != null; i++)
if (searcher.Search(line))
yield return i + ": " + line;
}
}
public class Searcher
{
protected readonly string[] Words;
public Searcher(string[] words)
{
Words = words;
}
public virtual bool Search(string line)
{
return Words.Any(x => line.Contains(x));
}
}
public class RegexSearcher : Searcher
{
private readonly Regex pattern;
public RegexSearcher(string[] words)
: base(words)
{
Array.Sort(words);
var p = string.Join("|", words.Select(Regex.Escape));
pattern = new Regex(p, RegexOptions.Compiled);
}
public override bool Search(string line)
{
return pattern.IsMatch(line);
}
}
public class KmpSearcher : Searcher
{
private readonly Kmp[] matchers;
public KmpSearcher(string[] words)
: base(words)
{
matchers = words.Select(x => new Kmp(x)).ToArray();
}
public override bool Search(string line)
{
foreach (var m in matchers)
m.Reset();
return
line.Any(c => matchers.Any(m => m.Match(c)));
}
private class Kmp
{
private string pattern;
private int[] table;
private int matchingLength = 0;
public Kmp(string p)
{
pattern = p;
table = new int[p.Length];
if (p.Length < 1) return;
table[0] = -1;
if (p.Length < 2) return;
table[1] = 0;
int i = 2;
int length = 0; // 先頭から一致しているサブ文字列の長さ
while (i < p.Length)
{
if (p[i - 1] == p[length])
{
// サブ文字列が一致中
length++;
table[i] = length;
i++;
}
else if (length > 0)
{
// サブ文字列が再帰的に含まれているかどうか
// テーブルを使って確認
length = table[length];
}
else
{
table[i] = 0;
i++;
}
}
}
public bool Match(char c)
{
if (pattern.Length <= matchingLength)
{
return true;
}
while (pattern[matchingLength] != c)
{
// バックトラック
matchingLength = table[matchingLength];
if (matchingLength < 0)
{
matchingLength = 0;
return false;
}
}
matchingLength++;
return (pattern.Length <= matchingLength);
}
public void Reset()
{
matchingLength = 0;
}
}
}