Qiita Teams that are logged in
You are not logged in to any team

Log in to Qiita Team
Community
OrganizationEventAdvent CalendarQiitadon (β)
Service
Qiita JobsQiita ZineQiita Blog
16
Help us understand the problem. What are the problem?

More than 3 years have passed since last update.

posted at

updated at

[C#][.NET] CSVファイルの読み込み(ダブルクォート、カンマ、改行対応)

Nugetを見ればCSVHelperとか便利なものがありますが、ちょと自前でやらなくてはいけなかったので作りました。

処理内容は「Perl正規表現雑技」というサイトからいただきました。
感謝 :smile:

2018/2/1 追記
いくらか手直しして、こちらにソースを置きました。
https://github.com/sengokyu/csvreader-csvwriter-for-dot-net

概要

Streamオブジェクトを受け取り、CSVを1レコードずつ読み込んで返します。

特長

  • 値中にあるカンマに対応しています。 "a,a" ===> a,a
  • 値中にあるエスケープされたダブルクォートに対応しています。 "a,"",a" ===> a,",a
  • 値中にある改行に対応しています。

使い方

// どこかにStreamオブジェクトがあったとしまして
using (var reader = new CSVReader(stream)) {
    while (!reader.EndOfStream) {
        var record = reader.ReadLine();
        // ここで色々楽しいことをする
    }
}

ソース

CSVReader.cs
    /// <summary>
    /// generic CSV reader
    /// </summary>
    public class CSVReader : IDisposable
    {
        private static readonly string END_OF_FILE = "\x1a";
        private static readonly string DELIMITER = "\"";
        private readonly StreamReader _reader;

        public CSVReader(Stream inputStream, Encoding encoding)
        {
            _reader = new StreamReader(inputStream, encoding);
        }

        public IEnumerable<string> ReadLine()
        {
            var line = ReadSemanticLine();

            if (line == null)
            {
                return null;
            }

            line = TrimRight(line);
            line += ",";

            var matches = Regex.Matches(line, "(\"[^\"]*(?:\"\"[^\"]*)*\"|[^,]*),");

            return matches.Cast<Match>().Select(x => Dequote(x));
        }

        public bool EndOfStream
        {
            get
            {
                return _reader.EndOfStream;
            }
        }

        private String TrimRight(string src)
        {
            return Regex.Replace(src, "(?:\x0D\x0A|[\x0D\x0A])?$", "", RegexOptions.Singleline);
        }

        private string Dequote(Match match)
        {
            var s = match.Groups[1].Value;
            var quoted = Regex.Match(s, "^\"(.*)\"$", RegexOptions.Singleline);

            if (quoted.Success)
            {
                return quoted.Groups[1].Value.Replace("\"\"", "\"");
            }
            else
            {
                return s;
            }
        }

        private string ReadSemanticLine()
        {
            if (_reader.EndOfStream)
            {
                return null;
            }

            var line = _reader.ReadLine();

            if (line == null | line == END_OF_FILE || line.Length == 0)
            {
                return null;
            }

            while (!HasEnoughQuote(line) && !_reader.EndOfStream)
            {
                // Complete missing line break.
                line += "\n" + _reader.ReadLine();
            }

            return line;
        }

        private bool HasEnoughQuote(string line)
        {
            return (Regex.Matches(line, DELIMITER).Count % 2) == 0;
        }


        #region IDisposable Support
        private bool disposedValue = false; // To detect redundant calls

        protected virtual void Dispose(bool disposing)
        {
            if (!disposedValue)
            {
                if (disposing)
                {
                    if (_reader != null)
                    {
                        _reader.Dispose();
                    }
                }

                // TODO: free unmanaged resources (unmanaged objects) and override a finalizer below.
                // TODO: set large fields to null.

                disposedValue = true;
            }
        }

        // TODO: override a finalizer only if Dispose(bool disposing) above has code to free unmanaged resources.
        // ~CSVReader() {
        //   // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
        //   Dispose(false);
        // }

        // This code added to correctly implement the disposable pattern.
        public void Dispose()
        {
            // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
            Dispose(true);
            // TODO: uncomment the following line if the finalizer is overridden above.
            // GC.SuppressFinalize(this);
        }
        #endregion


    }

テスト

CSVReaderTests.cs
    [TestClass]
    public class CSVReaderTests
    {

        [TestMethod]
        public void TestReadLineReturnsSimpleResult()
        {
            var csvsample = "a,\"b\",c\r\nd,e,\r\n";

            using (var instance = CreateInstance(csvsample))
            {
                var result = instance.ReadLine();

                Check.That(result).HasSize(3);
                Check.That(result).ContainsExactly("a", "b", "c");

                result = instance.ReadLine();

                Check.That(result).HasSize(3);
                Check.That(result).ContainsExactly("d", "e", "");
            }
        }

        [TestMethod]
        public void TestReadLineTreatMultilieCsv()
        {
            var csvsample = "a,b,\"c\n\nc\"\r\n";

            using (var instance = CreateInstance(csvsample))
            {
                var result = instance.ReadLine();

                Check.That(result).HasSize(3);
                Check.That(result).ContainsExactly("a", "b", "c\n\nc");
            }

        }

        [TestMethod]
        public void TestReadLineUnescapeQuote()
        {
            var csvsample = "\"a,a\",\"b,\"\"c\"\r\n";

            using (var instance = CreateInstance(csvsample))
            {
                var result = instance.ReadLine();

                Check.That(result).HasSize(2);
                Check.That(result).ContainsExactly("a,a", "b,\"c");
            }
        }


        [TestMethod]
        public void TestEndOfStream()
        {
            var csvsample = "a,b,c\r\na,b,c\r\n";

            using (var instance = CreateInstance(csvsample))
            {
                Check.That(instance.EndOfStream).IsFalse();

                instance.ReadLine();

                Check.That(instance.EndOfStream).IsFalse();

                instance.ReadLine();

                Check.That(instance.EndOfStream).IsTrue();
            }

        }

        private CSVReader CreateInstance(string src)
        {
            return new CSVReader(CreateStream(src), Encoding.UTF8);
        }

        private Stream CreateStream(string src)
        {
            return new MemoryStream(Encoding.UTF8.GetBytes(src));
        }
    }

参考サイト

Why not register and get more from Qiita?
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away
16
Help us understand the problem. What are the problem?