LoginSignup
9
17

More than 5 years have passed since last update.

[C#][.NET] CSVファイルの読み込み(ダブルクォート、カンマ、改行対応)

Last updated at Posted at 2017-02-22

Nugetを見ればCSVHelperとか便利なものがありますが、ちょと自前でやらなくてはいけなかったので作りました。

処理内容は「Perl正規表現雑技」というサイトからいただきました。
感謝 :smile:

2018/2/1 追記
いくらか手直しして、こちらにソースを置きました。
https://github.com/sengokyu/csvreader-csvwriter-for-dot-net

概要

Streamオブジェクトを受け取り、CSVを1レコードずつ読み込んで返します。

特長

  • 値中にあるカンマに対応しています。 "a,a" ===> a,a
  • 値中にあるエスケープされたダブルクォートに対応しています。 "a,"",a" ===> a,",a
  • 値中にある改行に対応しています。

使い方

// どこかにStreamオブジェクトがあったとしまして
using (var reader = new CSVReader(stream)) {
    while (!reader.EndOfStream) {
        var record = reader.ReadLine();
        // ここで色々楽しいことをする
    }
}

ソース

CSVReader.cs
    /// <summary>
    /// generic CSV reader
    /// </summary>
    public class CSVReader : IDisposable
    {
        private static readonly string END_OF_FILE = "\x1a";
        private static readonly string DELIMITER = "\"";
        private readonly StreamReader _reader;

        public CSVReader(Stream inputStream, Encoding encoding)
        {
            _reader = new StreamReader(inputStream, encoding);
        }

        public IEnumerable<string> ReadLine()
        {
            var line = ReadSemanticLine();

            if (line == null)
            {
                return null;
            }

            line = TrimRight(line);
            line += ",";

            var matches = Regex.Matches(line, "(\"[^\"]*(?:\"\"[^\"]*)*\"|[^,]*),");

            return matches.Cast<Match>().Select(x => Dequote(x));
        }

        public bool EndOfStream
        {
            get
            {
                return _reader.EndOfStream;
            }
        }

        private String TrimRight(string src)
        {
            return Regex.Replace(src, "(?:\x0D\x0A|[\x0D\x0A])?$", "", RegexOptions.Singleline);
        }

        private string Dequote(Match match)
        {
            var s = match.Groups[1].Value;
            var quoted = Regex.Match(s, "^\"(.*)\"$", RegexOptions.Singleline);

            if (quoted.Success)
            {
                return quoted.Groups[1].Value.Replace("\"\"", "\"");
            }
            else
            {
                return s;
            }
        }

        private string ReadSemanticLine()
        {
            if (_reader.EndOfStream)
            {
                return null;
            }

            var line = _reader.ReadLine();

            if (line == null | line == END_OF_FILE || line.Length == 0)
            {
                return null;
            }

            while (!HasEnoughQuote(line) && !_reader.EndOfStream)
            {
                // Complete missing line break.
                line += "\n" + _reader.ReadLine();
            }

            return line;
        }

        private bool HasEnoughQuote(string line)
        {
            return (Regex.Matches(line, DELIMITER).Count % 2) == 0;
        }


        #region IDisposable Support
        private bool disposedValue = false; // To detect redundant calls

        protected virtual void Dispose(bool disposing)
        {
            if (!disposedValue)
            {
                if (disposing)
                {
                    if (_reader != null)
                    {
                        _reader.Dispose();
                    }
                }

                // TODO: free unmanaged resources (unmanaged objects) and override a finalizer below.
                // TODO: set large fields to null.

                disposedValue = true;
            }
        }

        // TODO: override a finalizer only if Dispose(bool disposing) above has code to free unmanaged resources.
        // ~CSVReader() {
        //   // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
        //   Dispose(false);
        // }

        // This code added to correctly implement the disposable pattern.
        public void Dispose()
        {
            // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
            Dispose(true);
            // TODO: uncomment the following line if the finalizer is overridden above.
            // GC.SuppressFinalize(this);
        }
        #endregion


    }

テスト

CSVReaderTests.cs
    [TestClass]
    public class CSVReaderTests
    {

        [TestMethod]
        public void TestReadLineReturnsSimpleResult()
        {
            var csvsample = "a,\"b\",c\r\nd,e,\r\n";

            using (var instance = CreateInstance(csvsample))
            {
                var result = instance.ReadLine();

                Check.That(result).HasSize(3);
                Check.That(result).ContainsExactly("a", "b", "c");

                result = instance.ReadLine();

                Check.That(result).HasSize(3);
                Check.That(result).ContainsExactly("d", "e", "");
            }
        }

        [TestMethod]
        public void TestReadLineTreatMultilieCsv()
        {
            var csvsample = "a,b,\"c\n\nc\"\r\n";

            using (var instance = CreateInstance(csvsample))
            {
                var result = instance.ReadLine();

                Check.That(result).HasSize(3);
                Check.That(result).ContainsExactly("a", "b", "c\n\nc");
            }

        }

        [TestMethod]
        public void TestReadLineUnescapeQuote()
        {
            var csvsample = "\"a,a\",\"b,\"\"c\"\r\n";

            using (var instance = CreateInstance(csvsample))
            {
                var result = instance.ReadLine();

                Check.That(result).HasSize(2);
                Check.That(result).ContainsExactly("a,a", "b,\"c");
            }
        }


        [TestMethod]
        public void TestEndOfStream()
        {
            var csvsample = "a,b,c\r\na,b,c\r\n";

            using (var instance = CreateInstance(csvsample))
            {
                Check.That(instance.EndOfStream).IsFalse();

                instance.ReadLine();

                Check.That(instance.EndOfStream).IsFalse();

                instance.ReadLine();

                Check.That(instance.EndOfStream).IsTrue();
            }

        }

        private CSVReader CreateInstance(string src)
        {
            return new CSVReader(CreateStream(src), Encoding.UTF8);
        }

        private Stream CreateStream(string src)
        {
            return new MemoryStream(Encoding.UTF8.GetBytes(src));
        }
    }

参考サイト

9
17
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
9
17