Karim Daoud
Karim Daoud

Reputation: 53

Counting the position of a character after two or more spaces

I am trying to return a value equal to the position of the first letter coming after two or more spaces.

I have a tool that extract tables with variable column length into TXT docs. I need to get these tables into an Excel sheet without having to put fixed width to each column in each table (that is a lot of coding to be done). I am trying to find something more dynamic to do so based on the position of the first character after two or more spaces.

Bearing in mind that not all rows are fully populated but the first row would make a perfect candidate to get the width of the column.

To give an example, the lines of the text would look like this

John       Robert       Eric       Tom

10          11            143        43

21                       265        56

99          241                     76

All I got so far is to make it work with fixed width as per the code below

Sub exporttosheet()

Dim fPath As String
fPath = "C:\test.txt"

Const fsoForReading = 1
Const F_LEN_A As Integer = 10
Const F_LEN_B As Integer = 23
Const F_LEN_C As Integer = 7
Const F_LEN_D As Integer = 10

Dim objFSO As Object, objTextStream As Object, txt, f1, f2, f3, f4
Dim start As Integer
Dim fLen As Integer
Dim rw As Long

Set objFSO = CreateObject("scripting.filesystemobject")
Set objTextStream = objFSO.OpenTextFile(fPath, fsoForReading)
rw = 1

Do Until objTextStream.AtEndOfStream
    txt = objTextStream.Readline


    f1 = Trim(Left(txt, F_LEN_A))
    start = F_LEN_A + 1
    f2 = Trim(Mid(txt, start, F_LEN_B))
    start = start + F_LEN_B + 1
    f3 = Trim(Mid(txt, start, F_LEN_C))
    start = start + F_LEN_C + 1
    f4 = Trim(Mid(txt, start, F_LEN_D))

    With ThisWorkbook.Sheets("data").Cells(rw, 1).Resize(1, 4)
        .NumberFormat = "@" 'format cells as text
        .Value = Array(f1, f2, f3, f4)
    End With
    rw = rw + 1
Loop

objTextStream.Close
End Sub

Upvotes: 3

Views: 433

Answers (3)

z32a7ul
z32a7ul

Reputation: 3787

You may try this function below:

Public Function InterpretLine(strLine As String) As Variant
    Dim rgxCell As RegExp: Set rgxCell = New RegExp
    rgxCell.Pattern = "([^ ]+([ ]?[^ ]+)*)"
    rgxCell.Global = True
    Dim mtcResult As MatchCollection: Set mtcResult = rgxCell.Execute(strLine)
    Dim strResult() As String: ReDim strResult(0 To mtcResult.Count - 1)
    Dim i As Long: For i = 0 To mtcResult.Count - 1
        strResult(i) = mtcResult.Item(i)
    Next i
    InterpretLine = strResult
End Function

It takes a line as a string value and returns an array of strings (each element is a cell from the line). My assumption is that none of the cells contais 2 consecutive space characters and that between cells there is always at least two space characters. (Here, space character means only the one which is input through the long key on the keyboard, tabs linefeeds, etc. are not included.)

To use Regex in VBA, you need the following Reference (in the VBA Editor choose Tools > References), and check the following option:

enter image description here

Upvotes: 0

user4039065
user4039065

Reputation:

In lieu of any confirmation from you, I am going to assume that there actually is unicode characters in your actual data.

enter image description here

Option Explicit

Sub Split_My_Data()
    Dim s As Long, str As String, tmp As Variant, varFieldInfo As Variant

    ReDim tmp(0 To 0)

    With Worksheets("Sheet3")
        str = .Cells(1, 1).Value2
        s = Application.Max(InStrRev(str, Chr(32)), _
                            InStrRev(str, ChrW(8194)))
        Do While CBool(s)
            tmp(UBound(tmp)) = Array(s, 1)
            str = Left(str, s)
            Do While Right(str, 1) = Chr(32) Or Right(str, 1) = ChrW(8194): str = Left(str, Len(str) - 1): Loop
            s = Application.Max(InStrRev(str, Chr(32)), _
                                InStrRev(str, ChrW(8194)))
            ReDim Preserve tmp(LBound(tmp) To UBound(tmp) + 1)
            If Not CBool(s) Then Exit Do
        Loop

        'make the last (first) fieldinfo element
        tmp(UBound(tmp)) = Array(0, 1)

        'make room for the reversed fieldinfo
        ReDim varFieldInfo(LBound(tmp) To UBound(tmp))

        'reverse the fieldinfo array
        For s = UBound(tmp) To LBound(tmp) Step -1
            varFieldInfo(UBound(tmp) - s) = tmp(s)
        Next s

        'run TextToColumns with the new array of arrays for FieldInfo
        .Columns("A:A").TextToColumns Destination:=.Cells(1, "A"), DataType:=xlFixedWidth, FieldInfo:=varFieldInfo

        For s = 1 To .Cells(1, .Columns.Count).End(xlToLeft).Column
            With Intersect(.Columns(s), .UsedRange).Cells
                'get rid of unicode
                .Replace what:=ChrW(8194), replacement:=vbNullString, lookat:=xlPart
                'use another texttocolumns as a fast Trim
                .TextToColumns Destination:=.Cells(1, "A"), DataType:=xlFixedWidth, FieldInfo:=Array(Array(0, 1))
                'shrink/expand the column
                .EntireColumn.AutoFit
                .EntireColumn.ColumnWidth = Application.Max(.ColumnWidth, 9)
            End With
        Next s
    End With
End Sub

Results with text as trimmed text and numbers as real numbers (no unicode):

enter image description here

Upvotes: 3

user3598756
user3598756

Reputation: 29421

you could use the following function to get columns length out of a "header" line:

Function GetF_LENs(txt As Variant, nCols As Long) As Variant
    Dim t As Variant
    Dim iFLEN As Long

    t = Split(WorksheetFunction.Trim(txt), " ")
    nCols = UBound(t) + 1 '<--| the number of columns equals the number of found values
    ReDim FLENs(1 To nCols - 1) '<--| we need the width of columns till the one before the last column
    For iFLEN = 1 To nCols - 1
        FLENs(iFLEN) = InStr(txt, t(iFLEN))
    Next
    GetF_LENs = FLENs
End Function

and you could exploit it in your code as follows:

Sub exporttosheet()        
    Const fsoForReading = 1

    Dim fPath As String
    fPath = "C:\test.txt"

    Dim F_LENs As Variant, txt As Variant        
    Dim objFSO As Object, objTextStream As Object
    Dim rw As Long, nCols As Long

    Set objFSO = CreateObject("scripting.filesystemobject")
    Set objTextStream = objFSO.OpenTextFile(fPath, fsoForReading)

    txt = objTextStream.Readline '<--| read the first "header" line
    F_LENs = GetF_LENs(txt, nCols) '<--| get 'F_LENs' array out of "header" line: it stores the widths of all columns
    ReDim values(1 To nCols) '<--| resize the array that will hold each row values accordingly to the number of columns encountered
    rw = 1
    Do Until objTextStream.AtEndOfStream
        ReadValuesAndWriteCells txt, F_LENs, values, nCols, rw
        txt = objTextStream.Readline '<--| read the first "header" line
    Loop
    ReadValuesAndWriteCells txt, F_LENs, values, nCols, rw

    objTextStream.Close
End Sub

where I demaned the current line reading and writing to the following sub

Sub ReadValuesAndWriteCells(txt As Variant, F_LENs As Variant, values As Variant, nCols As Long, rw As Long)
    Dim start As Integer
    Dim fLen As Integer

    start = 1
    For fLen = 1 To nCols - 1 '<--| loop through 'F_LENs' array, i.e.: through current line columns
        values(fLen) = Trim(Mid(txt, start, F_LENs(fLen) - start)) '<-- store current line current column value in corresponding 'Values' index
        start = F_LENs(fLen)
    Next
    values(fLen) = Trim(Mid(txt, start)) '<-- store current line last column value

    With ThisWorkbook.Sheets("data").Cells(rw, 1).Resize(1, nCols)
        .NumberFormat = "@" 'format cells as text
        .Value = values '<--| write current line array values
    End With
    rw = rw + 1
End Sub

Upvotes: 1

Related Questions