skatun
skatun

Reputation: 877

Finding nested duplicates

I am trying to make a Bill of Material list from my CAD software(Creo), this is exported out as a text file and its almost working perferct. The problem is that the text file spits out all parts in model, and I would like to count up duplicates instead of list them out after each other i.e quanties larger then 1.

My code does this nicely if the duplicate is listed in the row above, however if its nested(sub assy) this does not work. In picture below 00151564.asm(level 2) should be listed as quantity=3 but the parts on level 3 should be listed as quantity=1

enter image description here

Below is the original textfile creo spitts out, groups and patterns makes extra unwanteded indents, and material is just sometimes given..

enter image description here

Here is my code:

Sub simen2(Optional myFile As String = "Z:\Prosjekt\33907\Equipment and 
materials\Structure\treetool2.txt")
Dim text As String
Dim textline As String
Dim textlineTemp As String
Dim foo As String
Dim output As String
Dim parent As String
Dim grandma As String
Dim greatgrandma As String
Dim greatgreatgrandma As String
Dim partNumber As String
Dim quantity As Integer
Dim material As String
Dim wsOut As Worksheet
Dim i, k As Long
Dim level, levelOld, levelTemp, levelTempOld, subtractLevel As Integer
Dim duplicate As Boolean
Dim levelDictionary As Object
'Init variables

Set wsOut = ThisWorkbook.Worksheets("Output")

subtractLevel = 0
quantity = 1
duplicate = True

partNumberOld = ""
commonNameOld = ""
levelOld = 0
levelTemp = 0
levelTempOld = 0
materialOld = ""
textlineOld = ""
material = "NA"
materialOld = "NA"

text = wsOut.Cells(1, 1).Value
wsOut.Cells.ClearContents
wsOut.Cells(1, 1).Value = text
wsOut.Cells(1, 2).Value = Now
wsOut.Cells(1, 3).Value = myFile


Call write2ExcelHeader(wsOut)

Set levelDictionary = CreateObject("Scripting.Dictionary")


i = 0
k = 1


FileNum = FreeFile()
Open myFile For Input As #FileNum

Line Input #FileNum, foo
Line Input #FileNum, foo


Do Until EOF(FileNum)
    k = k + 1
    
    ' read in
    Line Input #FileNum, textline
    
    ' Get level, however group and pattern fuck things up
    If InStr(10, textline, "<HTML>") > 0 Or InStr(textline, "Pattern") > 0 Or InStr(textline, "Group") > 0 Then
        levelTemp = getLevel(textline)
        
        If levelTemp < levelTempOld Then
            subtractLevel = 0
        End If
        
        If InStr(textline, "Pattern") > 0 Or InStr(textline, "Group") > 0 Then
            subtractLevel = subtractLevel + 1
            k = 1
        End If
    End If
    
    ' Grab material
    If InStr(textline, "Materials") > 0 Then
        Line Input #FileNum, textline
        material = Trim(Replace(textline, "<curr>", ""))
    End If
    
    'we need to find out if the line has number as first item, i.e trim away spaces, it prints out previous item here...
    If InStr(10, textline, "<HTML>") > 0 Then
        'textlineTemp = RemoveHTML(textline)
        'textlineTemp = Replace(textlineTemp, "Ã", "Ø")
        
        
        partNumber = getPartNumber(textline)
        commonName = getCommonName(textline)
        partType = getType(partNumber)
        material = "NA"
            
        
        ' add part to dictionary, this is unique parts
        If levelDictionary.exists(partNumber) Then
            levelDictionary(partNumber) = levelDictionary(partNumber) + 1
        Else
            levelDictionary.Add partNumber, 1
        End If
        
        ' Remove duplicates ......
        If partNumberOld = partNumber And levelTempOld = levelTemp Then
            duplicate = True
            quantity = quantity + 1
        Else
            quantity = 1
            duplicate = False
        End If
        


        ' get family history
        level = levelTemp - subtractLevel
        LevelArray(level) = partNumber
        
     
        
        'lets present result
        If Not duplicate Then
            If level > 1 Then
                parent = LevelArray(level - 1)
            Else
                parent = "NA"
            End If
            
            If level > 2 Then
                grandma = LevelArray(level - 2)
            Else
                grandma = "NA"
            End If
            
            If level > 3 Then
                greatgrandma = LevelArray(level - 3)
            Else
                greatgrandma = "NA"
            End If
            
            If level > 4 Then
                greatgreatgrandma = LevelArray(level - 4)
            Else
                greatgreatgrandma = "NA"
            End If
            
            If i > 0 Then
                 Call write2Excel(wsOut, i + 2, partNumberOld, commonNameOld, quantityOld, materialOld, levelOld, partTypeOld, parentOld, grandmaOld, greatgrandmaOld, greatgreatgrandmaOld)
            End If
            i = i + 1
        End If
        
          
    End If
    

    'we always uses previous values for print out
    partNumberOld = partNumber
    commonNameOld = commonName
    levelOld = level
    levelTempOld = levelTemp
    partTypeOld = partType
    quantityOld = quantity
    materialOld = material
    textlineOld = textline
    
    parentOld = parent
    grandmaOld = grandma
    greatgrandmaOld = greatgrandma
    greatgreatgrandmaOld = greatgreatgrandma
    
    
    
Loop

 Close #FileNum
 Debug.Print "How many parts " & i

 Call DeList(wsOut)
 Call CreateList(wsOut, "FilterOutput")


 Call totalBOM(levelDictionary)
End Sub

Upvotes: 0

Views: 193

Answers (2)

CDP1802
CDP1802

Reputation: 16392

An alternative OO approach using a class module. Output is to a sheet named "Output2".

Update 1 - Added debugging log, creo.log in same folder as workbook.

Option Explicit

Sub ProcessTextFile()
    Const TXTFILE = "treetool_Rextroth.txt" '"treetool20210503.txt"
    Const MAX_LEVEL = 10

    Dim tree() As clsItem, item As clsItem
    Dim ruler() As Integer, level As Integer, rs
    Dim FileNum As Integer, textline As String, text As String
    Dim start_name As Integer, width_name As Integer, n As Long
    Dim t0 As Single: t0 = Timer

    ReDim tree(MAX_LEVEL)
    ReDim ruler(MAX_LEVEL)
    
    FileNum = FreeFile()
    Open ThisWorkbook.Path & "\" & TXTFILE For Input As #FileNum
    ' use first header line to get common name column position
    Line Input #FileNum, textline
    start_name = InStr(1, textline, "PTC_COMMON_NAME")
    width_name = InStr(1, textline, "PRO_MP_") - start_name
    
    ' skip
    Line Input #FileNum, textline

    ' set start level and indent
    Set tree(0) = New clsItem
    tree(0).level = 0
    tree(0).id = "NA"
    level = 1
    ruler(1) = 1

    ' open log file
    Dim fso, ts
    Set fso = CreateObject("Scripting.Filesystemobject")
    Set ts = fso.createtextfile("creo.log")

    ' scan text file
    n = 2
    Do Until EOF(FileNum)
       n = n + 1
       Line Input #FileNum, textline
       rs = ParseLine(textline, n, ts)
       If rs(0) = "ASM" Or rs(0) = "PRT" Then
           ' determine level from indent
           level = GetLevel(ruler, rs(2), level, ts)
           ' create new item
           Set item = New clsItem
           With item
             .itemtype = rs(0)
             .id = rs(1)
             .name = Mid(textline, start_name, width_name)
             .qu = 1
             .level = level
             .parent = tree(level - 1).id
             .creo = n
           End With
           ' build tree
           Set tree(level) = item
           tree(level - 1).addItem item
        ' groups or patterns
        ElseIf rs(0) = "GRP" Or rs(0) = "PTN" Then
            ' increase ruler for current level by 2
            ruler(level) = ruler(level) + 2
            ts.writeline n & " " & rs(0) & " change ruler(" & level & ")=" & ruler(level)
        ' materials
        ElseIf rs(0) = "MTL" Then
            If item.itemtype = "PRT" Then
                ' get material from next line
                n = n + 1
                Line Input #FileNum, textline
                item.material = Trim(Replace(textline, "<curr>", ""))
            End If
        End If
    Loop

    ' output tree
    Application.ScreenUpdating = False
    With Sheets("Output2")
        text = .Range("A1")
        .Cells.ClearContents
        .Cells.Clear
        .Range("A1") = text
        .Range("B1") = Now
        .Range("C1") = TXTFILE

        With .Range("A2:H2")
            .Value2 = Array("Part No", "Common Name", _
                  "Qu.", "Material", "Level", "Type", "Parent", "Creo Lines")
            .Interior.Color = RGB(255, 200, 0)
            .Font.Bold = True
        End With

        ' save objects to sheet
        tree(0).SaveToWorksheet .Range("A3")

        ' prettify
        .Range("C:C,E:E").HorizontalAlignment = xlCenter
        .Columns("H:H").HorizontalAlignment = xlRight
        .Columns("A:H").AutoFit
        .ListObjects.add(xlSrcRange, .UsedRange.Offset(1), , xlYes).name = "Table2"
        .ListObjects("Table2").TableStyle = "TableStyleLight1"
        .Activate
        .Range("A1").Select
    End With

    Application.ScreenUpdating = True
    MsgBox Format(n, "#,###") & " lines parsed in " & _
           Format(Timer - t0, "0.00") & " seconds"
End Sub

' determine level from indent using ruler
Function GetLevel(ByRef ruler, indent, level, ts) As Integer

    Dim n As Integer
    n = level ' current level
    ' is this an increase on previous
    If indent > ruler(n) Then
        ts.writeline "GetLevel before ruler(" & n & ")=" & ruler(n)
        n = n + 1
    Else
       ' find previous level
       n = 0
       Do
           n = n + 1
       Loop While indent > ruler(n)
    End If
    ruler(n) = indent ' update
    ts.writeline "Level now " & n & " ruler(" & n & ")=" & indent
    GetLevel = n

End Function

' determine linetype, partno, indent
Function ParseLine(s As String, n, ts) As Variant

    Dim indent As Integer, partno As String
    Dim tmp As String, linetype As String

    indent = Len(s) - Len(LTrim(s)) ' no of spaces
    If InStr(1, s, "<HTML>") Then
        tmp = WorksheetFunction.Trim(s)
        partno = Split(tmp, " ")(0)
        linetype = Right(partno, 3)
        ts.writeline vbCrLf & n & " INDENT=" & indent & " '" & s
    ElseIf InStr(1, s, "Materials") Then
        linetype = "MTL"
    ElseIf InStr(1, s, "Group") Then
        linetype = "GRP"
    ElseIf InStr(1, s, "Pattern") Then
        linetype = "PTN"
    End If
    ParseLine = Array(linetype, partno, indent)
    
End Function

class module clsItem 
========================
Option Explicit

' this class represent a part or assembly
Public id As String ' partno
Public name As String ' common name
Public itemtype As String ' PRT or ASM
Public parent As String
Public level As Integer
Public qu  As Integer
Public material As String
Public creo As String ' source line nos in Creo file
Public items As New Collection

' add items
Public Function addItem(obj As clsItem)
    ' check if exists, if so increment quantity
    Dim item As clsItem, bExists As Boolean
    For Each item In items
        If item.id = obj.id Then
            item.qu = item.qu + obj.qu
            item.creo = item.creo & " " & obj.creo
            bExists = True
            Exit For
        End If
    Next
    ' does not exist so add new
    If Not bExists Then items.add obj, obj.id
End Function

' save object and all children
Public Sub SaveToWorksheet(rng As Range)
    Const SP = 5 ' no of spaces to indent at each level
    Dim item As clsItem
    If level > 0 Then
        rng = Space(level * SP) & id
        rng.Offset(0, 1) = name
        rng.Offset(0, 2) = qu
        rng.Offset(0, 3) = material
        rng.Offset(0, 4) = level
        rng.Offset(0, 5) = itemtype
        rng.Offset(0, 6) = parent
        rng.Offset(0, 7) = creo
        Set rng = rng.Offset(1)
    End If
    ' recurse
    For Each item In Me.items
        item.SaveToWorksheet rng
    Next
End Sub

Upvotes: 1

CDP1802
CDP1802

Reputation: 16392

This scans the Output sheet and creates a new sheet called "No Duplicates". However, with the bugs in your data I suspect this won't work for more complex cases e.g. where an assembly appears in other assemblies at different levels.

Update 1 - dictionary at each level

Sub RemoveDuplicates()

    Const START_ROW = 3 ' skip headers
    Const COL_PARTNO = 1 ' A
    Const COL_QU = 3 '  C
    Const COL_LEVEL = 5 ' E
    Const COL_TYPE = 6 '  F

    Dim wb As Workbook, ws As Worksheet, wsNew As Worksheet
    Dim iLastRow As Long, i As Long, r As Long
    Dim PartNo As String, isASM As Boolean
    Dim dictLevel As Integer, level As Integer
    ' dictionary at each level
    Dim dictASM(10) As Object, arQu As Variant

    Set wb = ThisWorkbook
    Set ws = wb.Sheets("Output")

    iLastRow = ws.Cells(Rows.Count, "A").End(xlUp).Row
    ReDim arQu(iLastRow)
    
    For i = 0 To 10
        Set dictASM(i) = CreateObject("Scripting.Dictionary")
    Next

    r = START_ROW
    dictLevel = 0
    Do
        PartNo = Trim(ws.Cells(r, COL_PARTNO))
        isASM = Trim(ws.Cells(r, COL_TYPE) = "ASM")
        arQu(r) = ws.Cells(r, COL_QU)
        level = ws.Cells(r, COL_LEVEL)
        If isASM Then
            ' is this a new assembly clear lower level dictionaries
            If level < dictLevel Then
                For i = dictLevel To UBound(dictASM)
                    Set dictASM(i) = CreateObject("Scripting.Dictionary")
                Next
                dictLevel = level
            Else

                If dictASM(dictLevel).exists(PartNo) Then
        
                    ' duplicate assembly
                    ' increment first occurence
                    i = dictASM(dictLevel)(PartNo)
                    arQu(i) = arQu(i) + arQu(r)
                    arQu(r) = 0
                    level = ws.Cells(r, COL_LEVEL)
                    ' delete lower level items
                    Do While ws.Cells(r + 1, COL_LEVEL) > level
                        r = r + 1
                        arQu(r) = 0 ' delete later
                    Loop
                    
                Else
                    ' first occurence
                    dictASM(dictLevel).add PartNo, r
                End If

            End If
        End If
        r = r + 1
    Loop While r < iLastRow

    ' create new sheet without duplicates
    Set wsNew = Sheets.add
    wsNew.Name = "No Duplicates"
    ws.Range("A2:G2").Copy wsNew.Range("A2") ' header
    i = START_ROW
    For r = START_ROW To iLastRow
        If arQu(r) > 0 Then
            ws.Cells(r, 1).Resize(1, 8).Copy wsNew.Cells(i, 1)
            wsNew.Cells(i, COL_QU) = arQu(r)
            i = i + 1
        End If
    Next
    wsNew.Columns("A:G").AutoFit

    MsgBox Format(iLastRow - 1, "#,###") & " rows scanned", vbInformation

End Sub

Upvotes: 0

Related Questions