Reputation: 4027
I used this answer https://stackoverflow.com/a/49672348/832783 to parse a csv file.
Here is the code:
open Microsoft.VisualBasic.FileIO
open System.Text
let parseCsv (fileName: string) (encoding: Encoding) =
//[
seq {
use csvParser = new TextFieldParser(fileName, encoding)
csvParser.SetDelimiters([|","|])
csvParser.TextFieldType <- FieldType.Delimited
csvParser.HasFieldsEnclosedInQuotes <- true
while not (csvParser.EndOfData) do
yield csvParser.ReadFields()
}
//]
parseCsv @"<the path to your csv file>" Encoding.UTF8 |> Seq.length |> Dump |> ignore //iter(fun it -> printfn "%A" it)
it uses the Microsoft.VisualBasic library - you need to add it to your project and you also need to specify the path to the csv file. Dump
comes from linqpad but you can replace it with Console.WriteLine
.
How does the F# compiler ensure that the resource, in this case the TextFieldParser
object, is disposed after the sequence is processed?
I have to say, this construction is awesome. Like the OP of the post the answer was for, my first reaction was to implement some class and maybe one of the interfaces that allow the traverse of the sequence.
I also experimented with moving the following block:
use csvParser = new TextFieldParser(fileName, encoding)
csvParser.SetDelimiters([|","|])
csvParser.TextFieldType <- FieldType.Delimited
csvParser.HasFieldsEnclosedInQuotes <- true
above the sequence expression, but it stops working for sequence, however, it works for list and array, which makes sense.
I prefer the option with the seq though because I assume it is not as memory hungry as the list or array options.
TIA
Upvotes: 0
Views: 71
Reputation: 4027
I used ILSpy to decompile the code to C# and this answers my question. The compiler creates the class below that extends GeneratedSequenceBase and it implements a state machine to keep track of where it is in the processing of the sequence. It disposes the object when the end of the sequence is reached.
Here it is:
// query_jpvgrt, Version=0.0.0.0, Culture=neutral, PublicKeyToken=null
// Query_jpvgrt
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Text;
using LINQPad;
using Microsoft.FSharp.Core;
using Microsoft.FSharp.Core.CompilerServices;
using Microsoft.VisualBasic.FileIO;
[CompilationMapping(SourceConstructFlags.Module)]
public static class Query_jpvgrt
{
[Serializable]
[SpecialName]
[StructLayout(LayoutKind.Auto, CharSet = CharSet.Auto)]
[CompilationMapping(SourceConstructFlags.Closure)]
internal sealed class parseCsv@30 : GeneratedSequenceBase<string[]>
{
public string fileName;
public Encoding encoding;
public TextFieldParser csvParser;
[DebuggerBrowsable(/*Could not decode attribute arguments.*/)]
[CompilerGenerated]
[DebuggerNonUserCode]
public int pc;
[DebuggerBrowsable(/*Could not decode attribute arguments.*/)]
[CompilerGenerated]
[DebuggerNonUserCode]
public string[] current;
public parseCsv@30(string fileName, Encoding encoding, TextFieldParser csvParser, int pc, string[] current)
{
this.fileName = fileName;
this.encoding = encoding;
this.csvParser = csvParser;
this.pc = pc;
this.current = current;
base..ctor();
}
public override int GenerateNext(ref IEnumerable<string[]> next)
{
switch (pc)
{
default:
csvParser = new TextFieldParser(fileName, encoding);
pc = 1;
csvParser.SetDelimiters(",");
csvParser.TextFieldType = FieldType.Delimited;
csvParser.HasFieldsEnclosedInQuotes = true;
goto case 2;
case 2:
if (!csvParser.EndOfData)
{
pc = 2;
current = csvParser.ReadFields();
return 1;
}
goto case 1;
case 1:
pc = 3;
LanguagePrimitives.IntrinsicFunctions.Dispose(csvParser);
csvParser = null;
pc = 3;
break;
case 3:
break;
}
current = null;
return 0;
}
public override void Close()
{
Exception ex = default(Exception);
while (true)
{
switch (pc)
{
case 3:
if (ex != null)
{
throw ex;
}
return;
}
try
{
switch (pc)
{
default:
pc = 3;
LanguagePrimitives.IntrinsicFunctions.Dispose(csvParser);
break;
case 0:
case 3:
break;
}
pc = 3;
current = null;
}
catch (object obj)
{
Exception e = (Exception)obj;
ex = e;
}
}
}
public bool get_CheckClose()
{
switch (pc)
{
default:
return true;
case 1:
return true;
case 0:
case 3:
return false;
}
}
[CompilerGenerated]
[DebuggerNonUserCode]
public string[] get_LastGenerated()
{
return current;
}
[CompilerGenerated]
[DebuggerNonUserCode]
public override IEnumerator<string[]> GetFreshEnumerator()
{
return new parseCsv@30(fileName, encoding, null, 0, null);
}
}
public static a Dump<a>(a o)
{
return o.Dump();
}
[CompilationArgumentCounts(new int[] { 1, 1 })]
public static IEnumerable<string[]> parseCsv(string fileName, Encoding encoding)
{
return new parseCsv@30(fileName, encoding, null, 0, null);
}
}
and:
// query_jpvgrt, Version=0.0.0.0, Culture=neutral, PublicKeyToken=null
// <StartupCode$query_jpvgrt>.$Query_jpvgrt
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Text;
using Microsoft.FSharp.Collections;
internal static class $Query_jpvgrt
{
[DebuggerBrowsable(/*Could not decode attribute arguments.*/)]
[CompilerGenerated]
[DebuggerNonUserCode]
internal static int init@;
public static void main@()
{
IEnumerable<string[]> source = Query_jpvgrt.parseCsv("<the path to your csv file>", Encoding.UTF8);
int o = SeqModule.Length(source);
int num = Query_jpvgrt.Dump(o);
int num2 = num;
}
}
Upvotes: 2