Skip to content

Commit

Permalink
Fix buffer read for windows style newlines (#67)
Browse files Browse the repository at this point in the history
Implement Joel's test for complex large files.  Correct the counting of positions when a chunk ends with a `\r` and the next chunk begins with a `\n`.  Update to version 3.2.0.
  • Loading branch information
tspence authored Aug 6, 2024
1 parent 5af4695 commit 4355ed8
Show file tree
Hide file tree
Showing 11 changed files with 1,787 additions and 8 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/dotnet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,22 @@ jobs:
- name: Build (Framework 2.0 Tests)
run: msbuild ./tests/net20/tests.net20.csproj
- name: Test (net20)
run: ./NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests/net20/bin/Debug/tests.net20.dll
working-directory: ./tests/net20/bin/Debug/
run: ../../../../NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests.net20.dll
- name: Build (Framework 4.0)
run: msbuild ./src/net40/src.net40.csproj
- name: Build (Framework 4.0 Tests)
run: msbuild ./tests/net40/tests.net40.csproj
- name: Test (net40)
run: ./NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests/net40/bin/Debug/tests.net40.dll
working-directory: ./tests/net40/bin/Debug
run: ../../../../NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests.net40.dll
- name: Build (Framework 4.5)
run: msbuild ./src/net45/src.net45.csproj
- name: Build (Framework 4.5 Tests)
run: msbuild ./tests/net45/tests.net45.csproj
- name: Test (net45)
run: ./NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests/net45/bin/Debug/tests.net45.dll
working-directory: ./tests/net45/bin/Debug/
run: ../../../../NUnit.ConsoleRunner.3.4.0/tools/nunit3-console.exe ./tests.net45.dll
- name: Build (DotNet Core 5.0 and NetStandard 2.0)
run: dotnet build ./csharp-csv-reader.sln
- name: Test (net50)
Expand Down
6 changes: 3 additions & 3 deletions CSVFile.nuspec
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<package >
<metadata>
<id>CSVFile</id>
<version>3.1.4</version>
<version>3.2.0</version>
<title>CSVFile</title>
<authors>Ted Spence</authors>
<owners>Ted Spence</owners>
Expand All @@ -15,8 +15,8 @@
<releaseNotes>
August 5, 2024

* Add serialization options for arrays and objects
* Fix bad deploy of 3.1.3
* Fix issue with Windows-style newlines crossing chunks found by @joelverhagen
* Fix issue with endless loops reported by @wvvegt
</releaseNotes>
<readme>docs/README.md</readme>
<copyright>Copyright 2006 - 2024</copyright>
Expand Down
8 changes: 8 additions & 0 deletions src/CSV.cs
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,10 @@ public static IEnumerable<string[]> ParseStream(StreamReader inStream, CSVSettin
{
yield return row;
}
else if (inStream.EndOfStream)
{
break;
}
}
}

Expand Down Expand Up @@ -101,6 +105,10 @@ public static async IAsyncEnumerable<string[]> ParseStreamAsync(StreamReader inS
{
yield return row;
}
else if (inStream.EndOfStream)
{
break;
}
}
}
#endif
Expand Down
6 changes: 4 additions & 2 deletions src/CSVStateMachine.cs
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public class CSVStateMachine
/// <returns></returns>
public bool NeedsMoreText()
{
return String.IsNullOrEmpty(_line) || _position >= _line.Length;
return String.IsNullOrEmpty(_line) || _position + _settings.LineSeparator.Length >= _line.Length;
}

/// <summary>
Expand Down Expand Up @@ -202,11 +202,13 @@ public string[] ParseChunk(string chunk, bool reachedEnd)
var notEnoughChars = _position + _settings.LineSeparator.Length > _line.Length;
if (notEnoughChars && !reachedEnd)
{
// Backtrack one character so we can pick up the line separator completely next time
_position--;
return null;
}

// If we have reached the end, but this isn't a complete line separator, it's just text
if (notEnoughChars && reachedEnd)
if (notEnoughChars)
{
_work.Append(c);
}
Expand Down
1,695 changes: 1,695 additions & 0 deletions tests/PackageAssets.csv

Large diffs are not rendered by default.

39 changes: 39 additions & 0 deletions tests/ReaderTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
* Home page: https://github.com/tspence/csharp-csv-reader
*/
using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Text;
using NUnit.Framework;
using CSVFile;
#if HAS_ASYNC
Expand Down Expand Up @@ -336,6 +340,41 @@ public void TestMultipleNewlines()
}
}

[Test]
public void TestIssue62()
{
var inputLines = File.ReadAllLines("PackageAssets.csv");
var desiredLines = 53_543;
var linesToRead = Enumerable
.Repeat(inputLines, desiredLines / inputLines.Length + 1)
.SelectMany(x => x)
.Take(desiredLines)
.ToArray();

var config = new CSVSettings
{
HeaderRowIncluded = false,
};

var outputLines = 0;
var rawText = string.Join(Environment.NewLine, linesToRead);
var rawBytes = Encoding.UTF8.GetBytes(rawText);
using (var memoryStream = new MemoryStream(rawBytes))
{
using (var streamReader = new StreamReader(memoryStream))
{
using (var csvReader = new CSVReader(streamReader, config))
{
foreach (var row in csvReader)
{
outputLines++;
}
}
}
}
Assert.AreEqual(desiredLines, outputLines);
}

#if HAS_ASYNC_IENUM
[Test]
public async Task TestAsyncReader()
Expand Down
6 changes: 6 additions & 0 deletions tests/net20/tests.net20.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,12 @@
<Name>src.net20</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<Content Include="..\PackageAssets.csv">
<Link>PackageAssets.csv</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
</ItemGroup>
Expand Down
6 changes: 6 additions & 0 deletions tests/net40/tests.net40.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@
<Name>src.net40</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<Content Include="..\PackageAssets.csv">
<Link>PackageAssets.csv</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
</ItemGroup>
Expand Down
6 changes: 6 additions & 0 deletions tests/net45/tests.net45.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,12 @@
<Name>src.net45</Name>
</ProjectReference>
</ItemGroup>
<ItemGroup>
<Content Include="..\PackageAssets.csv">
<Link>PackageAssets.csv</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />
</ItemGroup>
Expand Down
7 changes: 7 additions & 0 deletions tests/net50/tests.net50.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,11 @@
<ProjectReference Include="..\..\src\net50\src.net50.csproj" />
</ItemGroup>

<ItemGroup>
<Content Include="..\PackageAssets.csv">
<Link>PackageAssets.csv</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>

</Project>
7 changes: 7 additions & 0 deletions tests/net60/tests.net60.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,12 @@
<ProjectReference Include="..\..\src\netstandard20\src.netstandard20.csproj" />
</ItemGroup>

<ItemGroup>
<Content Include="..\PackageAssets.csv">
<Link>PackageAssets.csv</Link>
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
</ItemGroup>

</Project>

0 comments on commit 4355ed8

Please sign in to comment.