Addi
Addi

Reputation: 3

Extracting data from a txt file to Excel via vba

I'm trying to extract data from a txt file and put them into a structured Excel table. The txt file looks something like this.

Date 28.07.2022 Time: 16:52
Neue Datei
Date 28.07.2022 Time: 16:52
WS-Typ 4 SOLL Durchmesser: 83.24
WS-Nr.(DMC Stelle 12-21) 2220900102 WS-Temp. 30.23
Zylinderbohrung=6 Ebene=3 Tiefe=130
Kalibrierwert -14.9
X-Mitte_aus 36 Punkten 0.006
Y-Mitte_aus 36 Punkten -0.004
Pferch-Durchmesser 83.287 Korr.20°C 83.268
------------------------
Date 28.07.2022 Time: 22:32
WS-Typ 4 SOLL Durchmesser: 83.24
WS-Nr.(DMC Stelle 12-21) 2220900181 WS-Temp. 30.03
Zylinderbohrung=6 Ebene=1 Tiefe=8
Kalibrierwert -14.9
X-Mitte_aus 36 Punkten -0.006
Y-Mitte_aus 36 Punkten 0
Pferch-Durchmesser 83.299 Korr.20°C 83.279
...

While I am able to extract the first set of data. I can't get any of the following sets of data to appear in my table. The closest I could find to my problem was this, but unless I missed it, the only thing they told him to do was to implement a loop into his code. I tried doing it and so far my code looks like this.

Sub Button()
    Dim myFile As String, text As String, textline As String
    Dim posA As Integer, posB As Integer, ...
    Dim i As Long
    myFile = "Path\myFile.TXT"
    Open myFile For Input As #1
    i = 1
    Do Until EOF(1)
        Line Input #1, textline
        text = text & textline
        posDate = InStr(text, "Date")
        If posDate = 1 Then
            i = i + 1
        End If
        posTime = InStr(text, "Time")
        posA = InStr(text, "A")
        ...
        Cells(i, 1).Value = Mid(text, posDate + 5, 10)
        Cells(i, 2).Value = Mid(text, posTime + 6, 5)
        Cells(i, 3).Value = Mid(text, posA + 27, 5)
        ...
    Loop
    Close #1
End Sub

I'm not sure how to change it as I have very little experience with vba.

Edit: Adding the line that includes the variables in the solution using regular expression: ws.Range("A1:M1") = Array("Date", "Time", "WS-Typ", "SOLL Durchmesser", "WS-Nr.(DMC Stelle 12-21)", "WS-Temp.", "Zylinderbohrung", "Ebene", "Tiefe", "Kalibrierwert", "X-Mitte_aus 36 Punkten", "Y-Mitte_aus 36 Punkten", "Pferch-Durchmesser", "Korr.20°C")

Upvotes: 0

Views: 1254

Answers (2)

Ron Rosenfeld
Ron Rosenfeld

Reputation: 60414

You can obtain your desired output using Power Query, available in Windows Excel 2010+ and Office 365 Excel

  • Data => Get&Transform => Get Data => From File => From Text/CSV

  • When the PQ UI opens, navigate to Home => Advanced Editor

  • Make note of the File Path in Line 2 of the code.

  • Replace the existing code with the M-Code below

  • Change the file path in line 2 of the pasted code to your "real" file path (don't change anything else in that line)

  • Examine any comments, and also the Applied Steps window, to better understand the algorithm and steps

  • Code assumes each "set" of data has all of the column headers

M Code

let

//Change next line to reflect actual data source
    Source = Csv.Document(File.Contents("C:\Users\ron\Desktop\New Text Document.txt"),[Delimiter="~", Columns=1, Encoding=65001, QuoteStyle=QuoteStyle.None]),

//set data types
    #"Changed Type" = Table.TransformColumnTypes(Source,{{"Column1", type text}}),

//List of expected column names
ColumnNames = {"Date", "Time", "WS-Typ 4 SOLL Durchmesser", "WS-Nr.(DMC Stelle 12-21)", "WS-Temp.", "Zylinderbohrung", "Ebene", "Tiefe", "Kalibrierwert", "X-Mitte_aus 36 Punkten", "Y-Mitte_aus 36 Punkten", "Pferch-Durchmesser", "Korr.20°C"},

//Add Index column, then grouping column based on each "set" starting with a date/time line
    Index = Table.AddIndexColumn(#"Changed Type","Index"),
    #"Grouping Column" = Table.FillDown(Table.AddColumn(Index, "Grouper", 
        each if Text.Contains([Column1], "Time:") then [Index] else null, Int64.Type),{"Grouper"}),

//Remove index column
    #"Removed Columns" = Table.RemoveColumns(#"Grouping Column",{"Index"}),

//remove first two rows and the row separating the "sets"
    #"Filtered Rows" = Table.SelectRows(#"Removed Columns", each ([Column1] <> "------------------------") and ([Grouper] <> 0)),

//Group by the grouper column
/*For each group
  Combine into a single string
  Split by the column Headers
  Remove the "garbage*: Initial colon, equal sign or space
    If there is a plethora of garbage characters, we could use a different method
  Create Records which we will expand into the new columns*/
    #"Grouped Rows" = Table.Group(#"Filtered Rows", {"Grouper"}, { 
        {"splitted", (t)=> 
            let
                vals=List.RemoveFirstN(Splitter.SplitTextByEachDelimiter(ColumnNames)(Text.Combine(t[Column1]," "))),
                #"Remove Garbage"=List.Transform(vals, each Text.TrimStart(_,{":","="," "})),
                recs = Record.FromList(#"Remove Garbage",ColumnNames)
            in 
                recs}}),
    
//Remove the grouper column
    #"Removed Columns1" = Table.RemoveColumns(#"Grouped Rows",{"Grouper"}),

//expand the column of records to our new table
    #"Expanded splitted" = Table.ExpandRecordColumn(#"Removed Columns1", "splitted", ColumnNames),
    #"Changed Type1" = Table.TransformColumnTypes(#"Expanded splitted",{
        {"Date", type date}, {"Time", type time}, {"WS-Typ 4 SOLL Durchmesser", type number}, 
        {"WS-Nr.(DMC Stelle 12-21)", Int64.Type}, {"WS-Temp.", type number}, 
        {"Zylinderbohrung", Int64.Type}, {"Ebene", Int64.Type}, {"Tiefe", Int64.Type}, 
        {"Kalibrierwert", type number}, {"X-Mitte_aus 36 Punkten", type number}, 
        {"Y-Mitte_aus 36 Punkten", type number}, {"Pferch-Durchmesser", type number}, {"Korr.20°C", type number}}, "en-150")
in
    #"Changed Type1"

Result from your newly posted data
enter image description here

Upvotes: 0

CDP1802
CDP1802

Reputation: 16357

Try using a Regular Expression.

update : real data

Option Explicit

Sub extract()

    Const TEXTFILE = "data.txt"
   
    Dim wb As Workbook, ws As Worksheet, r As Long, ar, arFields
    Dim fso As Object, ts As Object, n As Long, s As String
    Dim c As Long, v As String, i As Long
    
    Set wb = ThisWorkbook
    Set ws = wb.Sheets("Sheet1")
    
    ' results sheet header
    arFields = Array("WS-Typ 4 SOLL Durchmesser", _
    "WS-Nr.(DMC Stelle 12-21)", "WS-Temp.", "Zylinderbohrung", "Ebene", _
    "Tiefe", "Kalibrierwert", "X-Mitte_aus 36 Punkten", _
    "Y-Mitte_aus 36 Punkten", "Pferch-Durchmesser", "Korr.20°C")
    
    ws.Cells.Clear
    ws.Range("A1:B1") = Array("Date", "Time")
    ws.Range("C1").Resize(1, UBound(arFields) + 1) = arFields
   
    r = 1
    
    ' Convert field to column usin fictionary
    Dim dict As Object
    Set dict = CreateObject("Scripting.Dictionary")
    For i = 0 To UBound(arFields)
        dict.Add arFields(i), i + 3 ' col C start
        'Debug.Print arFields(i), i + 3
    Next
    
    'create regex engine
    Dim Regex As Object, m As Object
    Dim sPattern As String, sFields As String
    Set Regex = CreateObject("vbscript.regexp")
    
    ' build pattern and escape brackets ()
    sFields = Join(arFields, "|")
    sFields = Replace(sFields, "(", "\(")
    sFields = Replace(sFields, ")", "\)")
    
    sPattern = "(" & sFields & ")[ =:]*([-0-9.]+)"
    Debug.Print sPattern
    
    With Regex
      .Global = True
      .MultiLine = False
      .IgnoreCase = True
      .Pattern = sPattern
    End With
    
    ' open text file
    Set fso = CreateObject("Scripting.FileSystemObject")
    Set ts = fso.OpenTextFile(TEXTFILE, 1) ' ASCII
    
    ' read lines
    Do While ts.AtEndOfLine <> True
        n = n + 1
        s = ts.readline
        'Debug.Print s
        
        ' check for date and start new line
        If Left(s, 4) = "Date" Then
            r = r + 1
            ar = Split(s, " ")
            ws.Cells(r, 1) = ar(1) ' date
            ws.Cells(r, 2) = ar(3) ' time
            
        ' check for pattern
        ElseIf Regex.test(s) Then
            Set m = Regex.Execute(s) ' matches
            For i = 0 To m.Count - 1
                s = m(i).submatches(0) ' fieldname
                If dict.exists(s) Then
                    c = dict(s) ' column no
                    v = m(i).submatches(1) ' value
                    ws.Cells(r, c) = v
                    'Debug.Print s, r, c, v
                Else
                    MsgBox "Field not found '" & s & "'", vbCritical
                    Exit Sub
                End If
            Next
        End If
        
    Loop
    ts.Close

    MsgBox n & " lines read from " & TEXTFILE, vbInformation

End Sub

Upvotes: 1

Related Questions