Sam Smith
Sam Smith

Reputation: 21

VBA Code error to loop through folder of excel files and paste data

Below is VBA code which essentially just takes data from a folder of files and copies it into my destination file starting at row 1079, and makes somes adjustments. The files that the data is being pulled from are in the naming format "gcts_all_tran_data_YYYYMMDD.csv". Where YYYY represents a year, MM represents a month and DD represents a day. I need help modifying this so that it starts copying from the file the date string 20240603 and continues to the newer files.

When I run this, it doesn't execute anything and just displays the message box text. Nothing is actually being copied and pasted

Sub ConsolidateData()
    Dim wsDest As Worksheet
    Dim wbDest As Workbook
    Dim wbSource As Workbook
    Dim wsSource As Worksheet
    Dim LastRowDest As Long
    Dim LastRowSource As Long
    Dim FileName As String
    Dim FolderPath As String
    Dim StartRow As Long
    Dim EndRow As Long
    Dim FormulaCell As Range
    Dim i As Long
    Dim FileDate As Date
    Dim FileDateString As String

    ' Destination workbook and worksheet
    Set wbDest = Workbooks.Open("K:\folder1\destination.xlsm")
    Set wsDest = wbDest.Sheets("Data")

    ' Folder containing the CSV files
    FolderPath = "C:\Users\folder2\"

    ' Initialize variables
    FileName = Dir(FolderPath & "gcts_all_tran_data_*.csv")

    ' Start pasting data at row 1079
    LastRowDest = 1078

    ' Loop through all CSV files in the folder
    Do While FileName <> ""
        ' Extract the date from the filename (YYYYMMDD format)
        FileDateString = Mid(FileName, 18, 8)

        ' Check if the extracted string is a valid date
        If IsNumeric(FileDateString) And Len(FileDateString) = 8 Then
            FileDate = DateSerial(CLng(Mid(FileDateString, 1, 4)), CLng(Mid(FileDateString, 5, 2)), CLng(Mid(FileDateString, 7, 2)))

            ' Check if the file date is on or after 20240603
            If FileDate >= DateSerial(2024, 6, 3) Then
                ' Open the CSV file
                Set wbSource = Workbooks.Open(FolderPath & FileName)
                Set wsSource = wbSource.Sheets(1)

                ' Find the last row of the source worksheet
                LastRowSource = wsSource.Cells(wsSource.Rows.Count, "A").End(xlUp).Row

                ' Apply filter to column E and copy the filtered data
                wsSource.Range("A1:P" & LastRowSource).AutoFilter Field:=5, Criteria1:="Bob"

                On Error Resume Next ' Skip to next file if there are no visible cells
                If wsSource.Range("A2:A" & LastRowSource).SpecialCells(xlCellTypeVisible).Count > 1 Then
                    ' Find the last row in the destination worksheet before appending new data
                    LastRowDest = LastRowDest + 1 ' Move to the next row for pasting

                    ' Copy the data without headers
                    wsSource.Range("A2:P" & LastRowSource).SpecialCells(xlCellTypeVisible).Copy
                    wsDest.Cells(LastRowDest, 1).PasteSpecial Paste:=xlPasteValues
                    Application.CutCopyMode = False ' Clear the clipboard to avoid the message

                    ' Update LastRowDest to the new last row after pasting
                    LastRowDest = wsDest.Cells(wsDest.Rows.Count, "A").End(xlUp).Row
                End If
                On Error GoTo 0 ' Reset error handling

                ' Close the source workbook without saving
                wbSource.Close False
            End If
        End If

        ' Get the next file
        FileName = Dir
    Loop

    ' Extend formulas in column Q starting from row 1079
    StartRow = 1079
    EndRow = wsDest.Cells(wsDest.Rows.Count, "A").End(xlUp).Row

    For i = StartRow To EndRow
        wsDest.Cells(i, "Q").Value = wsDest.Cells(i - 1, "Q").Value + wsDest.Cells(i, "G").Value
    Next i

    ' Sort the data in the destination worksheet by column K (oldest to newest) starting from row 1079
    With wsDest.Sort
        .SortFields.Clear
        .SortFields.Add Key:=wsDest.Range("K1079:K" & EndRow), SortOn:=xlSortOnValues, Order:=xlAscending, DataOption:=xlSortNormal
        .SetRange wsDest.Range("A1079:R" & EndRow)
        .Header = xlNo
        .Apply
    End With

    ' Delete rows where column K is [NULL] and column G is equal to 0 starting from row 1079
    For i = EndRow To 1079 Step -1
        With wsDest
            If .Cells(i, "K").Value = "[NULL]" And .Cells(i, "G").Value = 0 Then
                .Rows(i).Delete
            End If
        End With
    Next i

    ' Save and close the destination workbook
    wbDest.Save

    ' Clean up
    Set wbDest = Nothing
    Set wsDest = Nothing
    Set wbSource = Nothing
    Set wsSource = Nothing

    MsgBox "Data consolidation complete!"
End Sub

Upvotes: 1

Views: 60

Answers (2)

Darren Bartrup-Cook
Darren Bartrup-Cook

Reputation: 19857

I would use PowerQuery rather than VBA.

In Excel 365 (not sure where it is or called on earlier versions):

On the Data ribbon select Get Data > From Other Sources > Blank Query to open the PQ Editor.
Select Advanced Editor on the Home ribbon.

Add this code to the Advanced Editor:

let
    Source = Folder.Files("C:\Users\folder2"),
    FilterToFiles = Table.SelectRows(Source, each Text.StartsWith([Name], "gcts_all_tran_data_")),
    ExtractDate = Table.AddColumn(FilterToFiles, "File Date", each Text.BetweenDelimiters([Name], "_", ".", {0, RelativePosition.FromEnd}, 0), type text),
    ChangeDataType = Table.TransformColumnTypes(ExtractDate,{{"File Date", type date}}),
    FilterToDates = Table.SelectRows(ChangeDataType, each [File Date] >= #date(2024, 6, 3))
in
    FilterToDates

You should end up with a screen that looks like this:
Query Results

Click the Combine Files button. I've drawn a red square around it to highlight it.
This should display the Combine Files dialog box - press OK on this.

PQ will create a couple of helper queries to combine all your files and display the final table. Do any further transformations required and then press Close & Load on the Home ribbon to create a table on a worksheet.


You could add a custom function to read values from named ranges - folder path & the date to filter from. I've called this fGetNamedRange.

let GetNamedRange=(NamedRange) => 
    let
        name = Excel.CurrentWorkbook(){[Name=NamedRange]}[Content],
        value = name{0}[Column1]
    in
        value
in GetNamedRange

Your query would then look like (using ImportFolder and StartDate as the named ranges):

let
    Source = Folder.Files(Text.From(fGetNamedRange("ImportFolder"))),
    FilterToFiles = Table.SelectRows(Source, each Text.StartsWith([Name], "gcts_all_tran_data_")),
    ExtractDate = Table.AddColumn(FilterToFiles, "File Date", each Text.BetweenDelimiters([Name], "_", ".", {0, RelativePosition.FromEnd}, 0), type text),
    ChangeDataType = Table.TransformColumnTypes(ExtractDate,{{"File Date", type date}}),
    FilterToDates = Table.SelectRows(ChangeDataType, each [File Date] >= Date.From(fGetNamedRange("StartDate")))
in
    FilterToDates  

Now you just have change the date on your worksheet and click Refresh.

Upvotes: 0

VBasic2008
VBasic2008

Reputation: 55073

Import Values From Closed Workbooks

  • The immediate reason for not getting any data copied was revealed by
    Sorceri in your comments. You should have used FileDateString = Mid(FileName, 20, 8) because gcts_all_tran_data_ has 19 characters. A Debug.Print FileDateString right below would have revealed the mistake. I'm not sure about the rest of the code.
  • It's hard (time-consuming) to test such a lengthy code so you should (I should have) split the various operations into multiple procedures. Then it becomes easier to find mistakes.

An Improvement!?

Sub ConsolidateData()
    
    ' Constants
    
    Const SRC_FOLDER_PATH As String = "C:\Users\folder2\"
    Const SRC_FILE_BASE_NAME_LEFT As String = "gcts_all_tran_data_"
    Const SRC_FILE_EXTENSION As String = ".csv"
    Const SRC_FILE_DATE_PATTERN As String = "YYYYMMDD"
    Const SRC_FILE_MIN_DATE As Date = #6/3/2024#
    Const SRC_COPY_COLUMNS As String = "A:P"
    
    Const DST_FILE_PATH As String = "K:\folder1\destination.xlsm"
    Const DST_SHEET_NAME As String = "Data"
    Const DST_FIRST_CELL_ADDRESS As String = "A1079"
    Const DST_COLUMNS As String = "A:R"
    Const DST_SUM_COLUMN As String = "G"
    Const DST_RUNNING_SUM_COLUMN As String = "Q"
    Const DST_SORT_COLUMN As String = "K"
    
    ' Get the first source file.
    
    Dim sFileDateStart As Long:
    sFileDateStart = Len(SRC_FILE_BASE_NAME_LEFT) + 1
    Dim sFileDateLen As Long: sFileDateLen = Len(SRC_FILE_DATE_PATTERN)
    
    Dim sFileName As String: sFileName = Dir(SRC_FOLDER_PATH _
        & SRC_FILE_BASE_NAME_LEFT & String(sFileDateLen, "?") _
        & SRC_FILE_EXTENSION)
    
    If Len(sFileName) = 0 Then
        MsgBox "No files found!", vbExclamation
        Exit Sub
    End If
    
    ' Declare additional variables used in the loop.
    
    ' Source
    Dim swb As Workbook, sws As Worksheet, sfcell As Range, srg As Range
    Dim sdrg As Range, svrg As Range, sarg As Range, sFileDate As Date
    Dim scCount As Long, srCount As Long, sFileDateString As String
    ' Destination
    Dim dwb As Workbook, dws As Worksheet
    Dim dfcell As Range, drrg As Range, darg As Range, drCount As Long
    ' Flags (Booleans)
    Dim IsValidDate As Boolean, IsValidRange As Boolean
    Dim IsColumnsCountDetermined As Boolean, WasDataCopied As Boolean
    
    ' Loop through all CSV files in the folder that match the pattern.

    Do While Len(sFileName) > 0
        
        ' Extract the date from the filename (YYYYMMDD format).
        sFileDateString = Mid(sFileName, sFileDateStart, sFileDateLen)

        ' Attempt to retrieve the source file date.
        On Error Resume Next
            sFileDate = DateSerial( _
                CLng(Mid(sFileDateString, 1, 4)), _
                CLng(Mid(sFileDateString, 5, 2)), _
                CLng(Mid(sFileDateString, 7, 2))) ' Y, M, D
        On Error GoTo 0
        
        ' Check if the extracted string is a valid date.
        If sFileDate > 0 Then ' valid date
            ' Check if the file date is not before 20240603
            If sFileDate >= SRC_FILE_MIN_DATE Then IsValidDate = True
            sFileDate = 0 ' reset for the next iteration
        'Else ' invalid date; do nothing
        End If
        
        ' Reference the range to be copied.
        If IsValidDate Then
            ' Reference the source range.
            Set swb = Workbooks.Open(SRC_FOLDER_PATH & sFileName)
            Set sws = swb.Sheets(1)
            Set sfcell = sws.Columns(SRC_COPY_COLUMNS).Cells(1)
            If Not IsColumnsCountDetermined Then
                scCount = sws.Columns(SRC_COPY_COLUMNS).Columns.Count
                IsColumnsCountDetermined = True
            End If
            With sfcell.CurrentRegion
                Set srg = sfcell.Resize(.Row + .Rows.Count - sfcell.Row, _
                    .Column + .Columns.Count - sfcell.Column).Resize(, scCount)
            End With
            ' Check if any data.
            srCount = srg.Rows.Count - 1 ' exclude headers
            If srCount > 0 Then ' data found
                ' Reference the source data range (no headers).
                Set sdrg = srg.Resize(srCount).Offset(1)
                ' Apply filter to columns 'E', 'G', and 'K'.
                srg.AutoFilter Field:=5, Criteria1:="Bob" ' E
                srg.AutoFilter Field:=7, Criteria1:="<>" & "0" ' G
                srg.AutoFilter Field:=11, Criteria1:="<>" & "[NULL]" ' K
                ' Attempt to reference the visible (filtered) rows.
                On Error Resume Next
                    Set svrg = sdrg.SpecialCells(xlCellTypeVisible)
                On Error GoTo 0
                ' Turn off auto filtering.
                sws.AutoFilterMode = False
                ' Check if any filtered rows and set the flag.
                If Not svrg Is Nothing Then ' filtered rows found
                    IsValidRange = True
                'Else ' no filtered rows found
                End If
            ' Else ' no data (invalid range); do nothing
            End If
            ' Not resetting 'IsValidDate' here because it is needed
            ' below to close the file.
        'Else ' invalid date (invalid range); do nothing
        End If
            
        ' Copy values by assignment.
        If IsValidRange Then
            If Not WasDataCopied Then
                ' Open the destination file and reference the main objects.
                ' It's (unusually) here in case there are no valid
                ' source ranges (dates) so the destination file never opens.
                Set dwb = Workbooks.Open(DST_FILE_PATH)
                Set dws = dwb.Sheets(DST_SHEET_NAME)
                Set dfcell = dws.Range(DST_FIRST_CELL_ADDRESS)
                Set drrg = dfcell.Resize(, scCount)
                WasDataCopied = True
            End If
            ' Copy values by assignment.
            For Each sarg In svrg.Areas
                srCount = sarg.Rows.Count
                Set darg = drrg.Resize(srCount)
                darg.Value = sarg.Value ' copy
                drCount = drCount + srCount ' count the destination rows
                Set drrg = drrg.Offset(srCount) ' next destination row
            Next sarg
            IsValidRange = False ' reset for the next iteration
            Set svrg = Nothing ' reset for the next iteration
        'Else ' invalid range; do nothing
        End If
            
        ' Close the source workbook without saving.
        If IsValidDate Then
            swb.Close SaveChanges:=False
            IsValidDate = False ' reset for the next iteration
        End If

        ' Get the next source file.
        sFileName = Dir
    
    Loop

    ' Check if nothing was copied.
    If Not WasDataCopied Then ' (the destination file was never open)
        MsgBox "Nothing copied!", vbExclamation
        Exit Sub
    End If
    
    ' Reference the destination range.
    
    Dim drg As Range: Set drg = dws.Columns(DST_COLUMNS) _
        .Resize(drrg.Row - dfcell.Row).Offset(dfcell.Row - 1)

    ' Sort the destination range by column 'K' in ascending order.

    With dws.Sort
        .SortFields.Clear
        .SortFields.Add Key:=drg.EntireRow.Columns(DST_SORT_COLUMN), _
            SortOn:=xlSortOnValues, Order:=xlAscending, DataOption:=xlSortNormal
        .SetRange drg
        .Header = xlNo
        .Apply
    End With

    ' Extend the running sum in column 'Q' by adding from column 'G'.
    ' (it doesn't make sense to do this before sorting!?)

    Dim Data() As Variant, r As Long, Total As Double

    With drg.EntireRow.Columns(DST_SUM_COLUMN)
        If drCount = 1 Then
            ReDim Data(1 To 1, 1 To 1): Data(1, 1) = .Value
        Else
            Data = .Value
        End If
        With .EntireRow.Columns(DST_RUNNING_SUM_COLUMN)
            Total = .Cells(1).Offset(-1).Value ' the cell above
            For r = 1 To drCount
                Total = Total + Data(r, 1)
                Data(r, 1) = Total
            Next r
            .Value = Data
        End With
    End With

    ' Save and close the destination workbook.
    dwb.Close SaveChanges:=True
    
    ' Inform.
    MsgBox "Data consolidation complete!", vbInformation

End Sub

Upvotes: 0

Related Questions