1. sengoku

    Posted

    sengoku
Changes in title
+[C#][.NET] CSVファイルの読み込み(ダブルクォート、カンマ、改行対応)
Changes in tags
Changes in body
Source | HTML | Preview
@@ -0,0 +1,248 @@
+Nugetを見ればCSVHelperとか便利なものがありますが、ちょと自前でやらなくてはいけなかったので作りました。
+
+処理内容は「Perl正規表現遊戯」というサイトからいただきました。
+感謝 :smile:
+
+# 概要
+
+Streamオブジェクトを受け取り、CSVを1レコードずつ読み込んで返します。
+
+## 特長
+
+* 値中にあるカンマに対応しています。 "a,a" ===> a,a
+* 値中にあるエスケープされたダブルクォートに対応しています。 "a,"",a" ===> a,",a
+* 値中にある改行に対応しています。
+
+
+# 使い方
+
+```csharp:
+// どこかにStreamオブジェクトがあったとしまして
+using (var reader = new CSVReader(stream)) {
+ while (!reader.EndOfStream) {
+ var record reader.ReadLine();
+ // ここで色々楽しいことをする
+ }
+}
+```
+
+# ソース
+
+```csharp:CSVReader.cs
+ /// <summary>
+ /// generic CSV reader
+ /// </summary>
+ public class CSVReader : IDisposable
+ {
+ private static readonly string END_OF_FILE = "\x1a";
+ private static readonly string DELIMITER = "\"";
+ private readonly StreamReader _reader;
+
+ public CSVReader(Stream inputStream, Encoding encoding)
+ {
+ _reader = new StreamReader(inputStream, encoding);
+ }
+
+ public IEnumerable<string> ReadLine()
+ {
+ var line = ReadSemanticLine();
+
+ if (line == null)
+ {
+ return null;
+ }
+
+ line = TrimRight(line);
+ line += ",";
+
+ var matches = Regex.Matches(line, "(\"[^ \"]*(?:\"\"[^\"] *)*\"|[^,]*),");
+
+ return matches.Cast<Match>().Select(x => Dequote(x));
+ }
+
+ public bool EndOfStream
+ {
+ get
+ {
+ return _reader.EndOfStream;
+ }
+ }
+
+ private String TrimRight(string src)
+ {
+ return Regex.Replace(src, "(?:\x0D\x0A|[\x0D\x0A])?$", "", RegexOptions.Singleline);
+ }
+
+ private string Dequote(Match match)
+ {
+ var s = match.Groups[1].Value;
+ var quoted = Regex.Match(s, "^\"(.*)\"$", RegexOptions.Singleline);
+
+ if (quoted.Success)
+ {
+ return quoted.Groups[1].Value.Replace("\"\"", "\"");
+ }
+ else
+ {
+ return s;
+ }
+ }
+
+ private string ReadSemanticLine()
+ {
+ if (_reader.EndOfStream)
+ {
+ return null;
+ }
+
+ var line = _reader.ReadLine();
+
+ if (line == null | line == END_OF_FILE || line.Length == 0)
+ {
+ return null;
+ }
+
+ while (!HasEnoughQuote(line) && !_reader.EndOfStream)
+ {
+ // Complete missing line break.
+ line += "\n" + _reader.ReadLine();
+ }
+
+ return line;
+ }
+
+ private bool HasEnoughQuote(string line)
+ {
+ return (Regex.Matches(line, DELIMITER).Count % 2) == 0;
+ }
+
+
+ #region IDisposable Support
+ private bool disposedValue = false; // To detect redundant calls
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (!disposedValue)
+ {
+ if (disposing)
+ {
+ if (_reader != null)
+ {
+ _reader.Dispose();
+ }
+ }
+
+ // TODO: free unmanaged resources (unmanaged objects) and override a finalizer below.
+ // TODO: set large fields to null.
+
+ disposedValue = true;
+ }
+ }
+
+ // TODO: override a finalizer only if Dispose(bool disposing) above has code to free unmanaged resources.
+ // ~CSVReader() {
+ // // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
+ // Dispose(false);
+ // }
+
+ // This code added to correctly implement the disposable pattern.
+ public void Dispose()
+ {
+ // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
+ Dispose(true);
+ // TODO: uncomment the following line if the finalizer is overridden above.
+ // GC.SuppressFinalize(this);
+ }
+ #endregion
+
+
+ }
+```
+
+# テスト
+
+```csharp:CSVReaderTests.cs
+ [TestClass]
+ public class CSVReaderTests
+ {
+
+ [TestMethod]
+ public void TestReadLineReturnsSimpleResult()
+ {
+ var csvsample = "a,\"b\",c\r\nd,e,\r\n";
+
+ using (var instance = CreateInstance(csvsample))
+ {
+ var result = instance.ReadLine();
+
+ Check.That(result).HasSize(3);
+ Check.That(result).ContainsExactly("a", "b", "c");
+
+ result = instance.ReadLine();
+
+ Check.That(result).HasSize(3);
+ Check.That(result).ContainsExactly("d", "e", "");
+ }
+ }
+
+ [TestMethod]
+ public void TestReadLineTreatMultilieCsv()
+ {
+ var csvsample = "a,b,\"c\n\nc\"\r\n";
+
+ using (var instance = CreateInstance(csvsample))
+ {
+ var result = instance.ReadLine();
+
+ Check.That(result).HasSize(3);
+ Check.That(result).ContainsExactly("a", "b", "c\n\nc");
+ }
+
+ }
+
+ [TestMethod]
+ public void TestReadLineUnescapeQuote()
+ {
+ var csvsample = "\"a,a\",\"b,\"\"c\"\r\n";
+
+ using (var instance = CreateInstance(csvsample))
+ {
+ var result = instance.ReadLine();
+
+ Check.That(result).HasSize(2);
+ Check.That(result).ContainsExactly("a,a", "b,\"c");
+ }
+ }
+
+
+ [TestMethod]
+ public void TestEndOfStream()
+ {
+ var csvsample = "a,b,c\r\na,b,c\r\n";
+
+ using (var instance = CreateInstance(csvsample))
+ {
+ Check.That(instance.EndOfStream).IsFalse();
+
+ instance.ReadLine();
+
+ Check.That(instance.EndOfStream).IsFalse();
+
+ instance.ReadLine();
+
+ Check.That(instance.EndOfStream).IsTrue();
+ }
+
+ }
+
+ private CSVReader CreateInstance(string src)
+ {
+ return new CSVReader(CreateStream(src), Encoding.UTF8);
+ }
+
+ private Stream CreateStream(string src)
+ {
+ return new MemoryStream(Encoding.UTF8.GetBytes(src));
+ }
+ }
+```