G
GS
I've been working on this with Ron Rosefeld and Jim Cone to find an
optimum solution. I'm pleased to provide the following function for
review/testing/feedback.
The test data was 2 cols by 500,000 rows of random generated numbers
formatted as "0000000000000" so we'd have leading zeros.
The test machine is a 1.6Ghz dual core Dell Precision series laptop
running XP SP3 and Excel2007. Times are approximate, as per method
shown in function, and are as follows:
Allow duplicate values: 9secs
Allow unique values: 10secs
This is a considerable performance improvement over using Dictionary,
plus no ref to the Microsoft Scripting Runtime is needed.
I'd be pleased to here results from running this on other machines.
Here's the code I used to set up the data...
Sub Setup_Data_StripDupes()
With Range("A1:B500000")
.Formula = "=text(randbetween(1,10^6),""0000000000000"")"
.Value = .Value
End With
End Sub
Function StripDupes(Optional AllowDupes As Boolean = True) As Boolean
' Compares colA to colB and removes colA matches found in colB.
' Args In: AllowDupes: True by default. Keeps duplicate values
' found in colA that are not found in colB. If False,
' duplicate values in colA not found in colB are removed.
'
' Returns: True if matches found and no error occurs;
' False if matches not found --OR-- error occurs.
'
' Sources: Ron Rosenfeld, Jim Cone, Garry Sansom
Dim i&, j&, lMatchesFound& 'as long
Dim vRngA, vRngB, vRngOut() 'as variant
vRngA = Range("A1:A" & Cells(Rows.Count, "A").End(xlUp).Row)
vRngB = Range("B1:B" & Cells(Rows.Count, "B").End(xlUp).Row)
Debug.Print Now()
Dim dRngB As New Collection
On Error Resume Next
For j = LBound(vRngB) To UBound(vRngB)
dRngB.Add Key:=CStr(vRngB(j, 1)), Item:=CStr(vRngB(j, 1))
Next 'j
Err.Clear: On Error GoTo ErrExit
If AllowDupes Then '//fastest
On Error GoTo MatchFound
For i = LBound(vRngA) To UBound(vRngA)
If dRngB.Item(CStr(vRngA(i, 1))) <> "" Then _
vRngA(i, 1) = "": lMatchesFound = lMatchesFound + 1
skipit:
Next 'i
Else '//slowest
On Error GoTo MatchFound
For i = LBound(vRngA) To UBound(vRngA)
dRngB.Add Key:=CStr(vRngA(i, 1)), Item:=CStr(vRngA(i, 1))
Next 'i
End If 'AllowDupes
Err.Clear: On Error GoTo ErrExit
j = 0: ReDim vRngOut(UBound(vRngA) - lMatchesFound, 0)
For i = LBound(vRngA) To UBound(vRngA)
If Not vRngA(i, 1) = "" Then _
vRngOut(j, 0) = vRngA(i, 1): j = j + 1
Next 'i
Range("A1:A" & lRows1).ClearContents
Range("A1").Resize(UBound(vRngOut), 1) = vRngOut
Debug.Print Now()
ErrExit:
StripDupes = (Err = 0)
Exit Function
MatchFound:
If AllowDupes Then Resume skipit
vRngA(i, 1) = "": lMatchesFound = lMatchesFound + 1: Resume Next
End Function 'StripDupes()
optimum solution. I'm pleased to provide the following function for
review/testing/feedback.
The test data was 2 cols by 500,000 rows of random generated numbers
formatted as "0000000000000" so we'd have leading zeros.
The test machine is a 1.6Ghz dual core Dell Precision series laptop
running XP SP3 and Excel2007. Times are approximate, as per method
shown in function, and are as follows:
Allow duplicate values: 9secs
Allow unique values: 10secs
This is a considerable performance improvement over using Dictionary,
plus no ref to the Microsoft Scripting Runtime is needed.
I'd be pleased to here results from running this on other machines.
Here's the code I used to set up the data...
Sub Setup_Data_StripDupes()
With Range("A1:B500000")
.Formula = "=text(randbetween(1,10^6),""0000000000000"")"
.Value = .Value
End With
End Sub
Function StripDupes(Optional AllowDupes As Boolean = True) As Boolean
' Compares colA to colB and removes colA matches found in colB.
' Args In: AllowDupes: True by default. Keeps duplicate values
' found in colA that are not found in colB. If False,
' duplicate values in colA not found in colB are removed.
'
' Returns: True if matches found and no error occurs;
' False if matches not found --OR-- error occurs.
'
' Sources: Ron Rosenfeld, Jim Cone, Garry Sansom
Dim i&, j&, lMatchesFound& 'as long
Dim vRngA, vRngB, vRngOut() 'as variant
vRngA = Range("A1:A" & Cells(Rows.Count, "A").End(xlUp).Row)
vRngB = Range("B1:B" & Cells(Rows.Count, "B").End(xlUp).Row)
Debug.Print Now()
Dim dRngB As New Collection
On Error Resume Next
For j = LBound(vRngB) To UBound(vRngB)
dRngB.Add Key:=CStr(vRngB(j, 1)), Item:=CStr(vRngB(j, 1))
Next 'j
Err.Clear: On Error GoTo ErrExit
If AllowDupes Then '//fastest
On Error GoTo MatchFound
For i = LBound(vRngA) To UBound(vRngA)
If dRngB.Item(CStr(vRngA(i, 1))) <> "" Then _
vRngA(i, 1) = "": lMatchesFound = lMatchesFound + 1
skipit:
Next 'i
Else '//slowest
On Error GoTo MatchFound
For i = LBound(vRngA) To UBound(vRngA)
dRngB.Add Key:=CStr(vRngA(i, 1)), Item:=CStr(vRngA(i, 1))
Next 'i
End If 'AllowDupes
Err.Clear: On Error GoTo ErrExit
j = 0: ReDim vRngOut(UBound(vRngA) - lMatchesFound, 0)
For i = LBound(vRngA) To UBound(vRngA)
If Not vRngA(i, 1) = "" Then _
vRngOut(j, 0) = vRngA(i, 1): j = j + 1
Next 'i
Range("A1:A" & lRows1).ClearContents
Range("A1").Resize(UBound(vRngOut), 1) = vRngOut
Debug.Print Now()
ErrExit:
StripDupes = (Err = 0)
Exit Function
MatchFound:
If AllowDupes Then Resume skipit
vRngA(i, 1) = "": lMatchesFound = lMatchesFound + 1: Resume Next
End Function 'StripDupes()