Reputation: 121
I'm currently having a data validation problem in Excel and may be the victim of "over-thinking" the problem.
My requirement is simple - I receive a large amount of xls files which all need to comply to an exact format.
For example, I need all of the files I receive to have have the following strings in cells A1 to A3: "FirstName", "LastName", "Email". (Case matters).
In reality, there are a lot more headers than this and trawling through every file and ensuring that all of the headings exist and are spelt correctly/in the correct case is very tedious and time consuming. I believe that it would be possible to create a module or tool in Visual Basic which could check the format and then return either correct/false based on whether the file complies with the required format.
I have looked into regular expressions (but believe that this may be overkill as I only require EXACT matches) and have no experience in using VB. I have looked online for help - some of which has been useful, some of which has be way too advanced for the tool I need.
Any help is greatly appreciated.
Thanks.
Upvotes: 1
Views: 1183
Reputation: 1646
I started to comment on the VBA answer above, but decided a separate answer would be more clear. I started mainly to answer Rich's question "How do I implement/run this?" since this question was asked by someone with "no experience in using VB," and some respondents appear to be at that level also.
Here is how I would approach the problem: Put the file names either all in one folder, or list them on a new Excel master worksheet. Put the correct, canonical headings in that same master worksheet. Then put the code to check the other worksheets in this master worksheet. And finally, after checking the files, list whether they conform or not in that master worksheet.
Open a new Excel workbook to put your solution in. In this blank workbook, put in these items:
Press Alt+F11 to open the VBA editor, and find your new workbook in Project Explorer to the left.
Right-click on the VBAProject for that workbook, or any of the lines associated with it directly below, and select Insert -> Module. This should open a blank module area in the main section of the VBA editor.
#Const AllFilesInFolder = False
#Const SuppScreenUpdate = False ' Suppress Screen Update
Sub FileCheck()
' Purpose: Verify header content on multiple specified files. Checks headers for each file:
' 1) Listed in this Workbook.Sheet(1) in the range specified by scFilesAddr -- OR --
' 2) All .xls* files in the folder/path specified by scFolderName
' Headers are matched to the values in the range specified by scHeaderAddr
Dim IsFound As Boolean, DoMatch As Boolean
Dim nFile As Long, nHeader As Long
Dim Wkb As Workbook, wks As Worksheet
Dim rngHdrMaster As Range, rngHdrTest As Range
Dim rngFilenames As Range, sFileName As String
#If SuppScreenUpdate Then
Dim lngCalc As XlCalculation
#End If
' HEADERS' RANGE
' --> --> Change this Range address to the required matching headers <-- <--
Const scHeaderAddr As String = "A1:C1"
' This range address should also match up with the headers in the worksheets to test!
Set rngHdrMaster = ActiveSheet.Range(scHeaderAddr)
' FILENAMES' RANGE
' --> --> Change this Range address to point to the File names to examine <-- <--
Const scFilesAddr As String = "A6:A105"
' Ideally, all filenames listed should also list the full path, or be in the same
' folder as this workbook.
Set rngFilenames = ActiveSheet.Range(scFilesAddr)
#If AllFilesInFolder Then ' Get all Excel files in this folder
' FOLDER PATH
' --> --> Change this Folder name to point to where all files will be examined
Const scFolderName As String = "C:\Temp\"
sFileName = Dir(strFolderName & "*.xls*")
If Len(sFileName) > 0 Then
sFileName = scFolderName & sFileName
IsFound = True
End If
#Else ' Get Excel files listed in master spreadsheet
sFileName = rngFilenames(1).Value
IsFound = (Len(Dir(sFileName)) > 0)
#End If
nFile = 1
#If SuppScreenUpdate Then ' Optional: Set to True above if it runs slowly
With Application
.DisplayAlerts = False
.EnableEvents = False
.ScreenUpdating = False
lngCalc = .Calculation
.Calculation = xlCalculationManual
End With
#End If
Do While Len(sFileName) > 0 ' If Filename is specified, but File doesn't exist, just
If IsFound Then ' skip that file. If Filename is blank, then exit proc.
Set Wkb = Workbooks.Open(sFileName)
Set wks = Wkb.Sheets(1)
Set rngHdrTest = wks.Range(scHeaderAddr)
DoMatch = True
For nHeader = 1 To rngHdrMaster.Columns.Count
If rngHdrMaster(nHeader).Value2 <> rngHdrTest(nHeader).Value2 Then
DoMatch = False
Exit For
End If
Next
Wkb.Close False
rngFilenames(nFile, 1).HorizontalAlignment = xlRight
rngFilenames(nFile, 1).Value = sFileName
rngFilenames(nFile, 2).HorizontalAlignment = xlCenter
rngFilenames(nFile, 2).Value = DoMatch 'Could also put Y/N here
nFile = nFile + 1
#If AllFilesInFolder Then
sFileName = Dir() ' Get next file infolder
#Else ' Put value in nFile row, 2nd col - rngFilenames can be 1 col wide.
' Uncomment code below to ensure it does not read past specified range
' Otherwise, will keep reading values until it finds empty cell-maybe desired?
'If nFile > ActiveSheet.Range(scFilesAddr).Rows.Count Then
' Exit For
'Else
sFileName = rngFilenames(nFile).Value
IsFound = (Len(Dir(sFileName)) > 0)
' If sFileName = "" Then sFileName = "None"
'End If
#End If
End If
Loop
#If SuppScreenUpdate Then
With Application
.CutCopyMode = False
.DisplayAlerts = True
.EnableEvents = True
.ScreenUpdating = True
.Calculation = lngCalc
End With
#End If
End Sub ' FileCheck
There are some ranges in the code you will have to define:
Const scHeaderAddr As String = "A1:E1"
Const scFilesAddr As String = "A6:A105"
See also Selecting Filenames under Customization below.If you'd rather examine all the Excel files in a folder, see the Examine all files in a folder section under Customization below. You'll also need to change the constant scFolderName to the path where these files can be found.
Const scFolderName As String = "C:\Temp\"
'Make sure it ends with a backslash!
There are some compiler constants in the code, which is kind of an advanced feature, but it makes it really easy to customize. If you want to examine all the files in the folder, just change the line at the top of the code to #Const AllFilesInFolder = True
Basically, when AllFilesInFolder = True
, anything that is between #If AllFilesInFolder Then
... and ... #Else
(or ... #End If
, if #Else
is missing) will be run, and anything between the #Else
... and ... #End If
will be ignored. Or vice-versa, when it is False
, anything that is between #If AllFilesInFolder Then
... and ... #Else
will be ignored, and anything between the #Else
... and ... #End If
will be run.
If it's running slowly with the opening of files, etc. change the line near the top to #Const SuppScreenUpdate = True
. I put it in there because brettdj included it in his code, but it sometimes gets in the way of troubleshooting.
One final customization: The current functionality will start with the first cell in the range scFilesAddr, continue down the column and quit when it finds the first blank cell, whether inside or outside the range. So technically, if you forgot to expand the range, it will still find over 100 filenames, as long as there is not a blank cell between them. If you'd rather have each cell in the range be examined for a filename, whether or not there is an intervening blank cell, remove the initial single-quotes (that comment out) from the beginning of lines 132-138 (beginning with 'If nFile > ActiveSheet.Range(scFilesAddr).Rows.Count Then
and ending with 'End If
).
By the way, it does check to see if the file exists, and any non-existent files will be ignored, but it will not stop the process.
Upvotes: 0
Reputation: 3188
Do the following if you are using Windows:
The script will then run through the folder and tell you which files are not conforming to your header requirements.
(you can also modify the code below to include more headers, it should be obvious from my comments below in the "Else If" part).
Dim sFolder, fso, files, folder, objExcel, objWorkbook
sFolder = SelectFolder( "" )
If sFolder = vbNull Then
WScript.Echo "Cancelled"
Else
WScript.Echo "Selected Folder: """ & sFolder & """"
End If
' use strPath to look for excel files list
Set fso = CreateObject("Scripting.FileSystemObject")
Set folder = fso.GetFolder(sFolder)
Set files = folder.Files
Set objExcel = CreateObject("Excel.Application")
For Each file In files
Set objWorkbook = objExcel.Workbooks.Open(file)
' add more headers as you wish as ElseIf statements below
If objExcel.Cells(1, 1).Value <> "FirstName" Then
MsgBox(file & " is not correct.")
ElseIf objExcel.Cells(1, 2).Value <> "LastName" Then
MsgBox(file & " is not correct.")
ElseIf objExcel.Cells(1, 3).Value <> "Email" Then
MsgBox(file & " is not correct.")
End If
objExcel.ActiveWorkbook.Close(0)
Next
objExcel.Quit
Function SelectFolder( myStartFolder )
' This function opens a "Select Folder" dialog and will
' return the fully qualified path of the selected folder
'
' Argument:
' myStartFolder [string] the root folder where you can start browsing;
' if an empty string is used, browsing starts
' on the local computer
'
' Returns:
' A string containing the fully qualified path of the selected folder
'
' Written by Rob van der Woude
' http://www.robvanderwoude.com
' Standard housekeeping
Dim objFolder, objItem, objShell
' Custom error handling
On Error Resume Next
SelectFolder = vbNull
' Create a dialog object
Set objShell = CreateObject( "Shell.Application" )
Set objFolder = objShell.BrowseForFolder( 0, "Select Folder", 0, myStartFolder )
' Return the path of the selected folder
If IsObject( objfolder ) Then SelectFolder = objFolder.Self.Path
' Standard housekeeping
Set objFolder = Nothing
Set objshell = Nothing
On Error Goto 0
End Function
Function ReadExcel( myXlsFile, mySheet, my1stCell, myLastCell, blnHeader )
' Function : ReadExcel
' Version : 2.00
' This function reads data from an Excel sheet without using MS-Office
'
' Arguments:
' myXlsFile [string] The path and file name of the Excel file
' mySheet [string] The name of the worksheet used (e.g. "Sheet1")
' my1stCell [string] The index of the first cell to be read (e.g. "A1")
' myLastCell [string] The index of the last cell to be read (e.g. "D100")
' blnHeader [boolean] True if the first row in the sheet is a header
'
' Returns:
' The values read from the Excel sheet are returned in a two-dimensional
' array; the first dimension holds the columns, the second dimension holds
' the rows read from the Excel sheet.
'
' Written by Rob van der Woude
' http://www.robvanderwoude.com
Dim arrData( ), i, j
Dim objExcel, objRS
Dim strHeader, strRange
Const adOpenForwardOnly = 0
Const adOpenKeyset = 1
Const adOpenDynamic = 2
Const adOpenStatic = 3
' Define header parameter string for Excel object
If blnHeader Then
strHeader = "HDR=YES;"
Else
strHeader = "HDR=NO;"
End If
' Open the object for the Excel file
Set objExcel = CreateObject( "ADODB.Connection" )
' IMEX=1 includes cell content of any format; tip by Thomas Willig
objExcel.Open "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & _
myXlsFile & ";Extended Properties=""Excel 8.0;IMEX=1;" & _
strHeader & """"
' Open a recordset object for the sheet and range
Set objRS = CreateObject( "ADODB.Recordset" )
strRange = mySheet & "$" & my1stCell & ":" & myLastCell
objRS.Open "Select * from [" & strRange & "]", objExcel, adOpenStatic
' Read the data from the Excel sheet
i = 0
Do Until objRS.EOF
' Stop reading when an empty row is encountered in the Excel sheet
If IsNull( objRS.Fields(0).Value ) Or Trim( objRS.Fields(0).Value ) = "" Then Exit Do
' Add a new row to the output array
ReDim Preserve arrData( objRS.Fields.Count - 1, i )
' Copy the Excel sheet's row values to the array "row"
' IsNull test credits: Adriaan Westra
For j = 0 To objRS.Fields.Count - 1
If IsNull( objRS.Fields(j).Value ) Then
arrData( j, i ) = ""
Else
arrData( j, i ) = Trim( objRS.Fields(j).Value )
End If
Next
' Move to the next row
objRS.MoveNext
' Increment the array "row" number
i = i + 1
Loop
' Close the file and release the objects
objRS.Close
objExcel.Close
Set objRS = Nothing
Set objExcel = Nothing
' Return the results
ReadExcel = arrData
End Function
P.S. Thanks to Rob van der Woude for the bottom Function :)
Upvotes: 1
Reputation: 55672
vbs answer below that gives the same output as the Excel VBA above. This version opens up the complete report when finished.
Dim objExcel
Dim objFSO
Dim objFolder
Dim objFile
Dim objTF
Dim Wb
Dim ws
Dim strFolderName
Dim strArray
Dim StrTest
Set objExcel = CreateObject("Excel.application")
strFolderName = "c:\Temp"
strArray = Join(Array("FirstName", "LastName", "Email"), ",")
Set objFSO = CreateObject("scripting.filesystemobject")
Set objFolder = objFSO.getFolder(strFolderName)
Set objTF = objFSO.createtextfile(strFolderName & "ErrReport.csv")
With objExcel
.DisplayAlerts = False
.EnableEvents = False
.ScreenUpdating = False
End With
On Error Resume Next
For Each objFile In objFolder.Files
'If Right$(objFile.Name, Len(objFile.Name) - InStrRev(objFile.Name, ".")) Like "xls" Then
If Right(objFile.Name, Len(objFile.Name) - InStrRev(objFile.Name, ".")) like "xls*" Then
Set Wb = objExcel.Workbooks.Open(objFile)
Set ws = Wb.Sheets(1)
StrTest = Join(objExcel.Transpose(ws.Range([ws].[a1], ws.[a3]).Value2), ",")
objTF.writeline strFileName & "," & (StrComp(StrTest, strArray, vbBinaryCompare) = 0)
Wb.Close False
End If
Next
On Error GoTo 0
objTF.Close
With objExcel
.DisplayAlerts = True
.EnableEvents = True
.ScreenUpdating = True
.Workbooks.Open (strFolderName & "\ErrReport.csv")
.Visible = True
End With
Upvotes: 0
Reputation: 55672
The code below
strFolderName
Runs a single case sensitive test on the first three cells of the first sheet, and writes all filenames and the test results to a csv file "ErrReport.csv" in the strFolderName
directory with
objTF.writeline strFileName & "," & (StrComp(StrTest, strArray, vbBinaryCompare) = 0)
Sub FileChk()
Dim Wb As Workbook
Dim ws As Worksheet
Dim objFSO As Object
Dim objTF As Object
Dim strFolderName As String
Dim strFileName As String
Dim strArray As String
Dim StrTest As String
strFolderName = "c:\temp\"
strFileName = Dir(strFolderName & "*.xls*")
strArray = Join(Array("FirstName", "LastName", "Email"), ",")
Set objFSO = CreateObject("scripting.filesystemobject")
Set objTF = objFSO.createtextfile(strFolderName & "ErrReport.csv")
With Application
.DisplayAlerts = False
.EnableEvents = False
.ScreenUpdating = False
lngCalc = .Calculation
.Calculation = xlCalculationManual
End With
Do While Len(strFileName) > 0
Set Wb = Workbooks.Open(strFolderName & strFileName)
Set ws = Wb.Sheets(1)
StrTest = Join(Application.Transpose(Range([ws].[a1], ws.[a3]).Value2), ",")
objTF.writeline strFileName & "," & (StrComp(StrTest, strArray, vbBinaryCompare) = 0)
Wb.Close False
strFileName = Dir
Loop
With Application
.CutCopyMode = False
.DisplayAlerts = True
.EnableEvents = True
.ScreenUpdating = True
.Calculation = lngCalc
End With
objTF.Close
End Sub
Upvotes: 1