Deleting EXACT duplicate rows

L

luu980

Does anyone know of a macro to remove identical rows in excel?

That is, the macro is to first *_compare_rows_against_one_another_*
then delete the identical ones, leaving only one copy.

I have found many macros that delete rows, but it only compares values
in a single column. This is NOT what I need.

What I need is this, for example:

aaaa bbbb cccc dddd
aaaa bbbb zzzzz dddd
aaaa rrrrrr cccc dddd
mmm bbbb cccc dddd
aaaa bbbb cccc dddd
aaaa bbbb zzzzz dddd
aaaa bbbb zzzzz dddd

Leaving me:
aaaa bbbb cccc dddd
aaaa bbbb zzzzz dddd
aaaa rrrrrr cccc dddd
mmm bbbb cccc dddd
 
K

KC

You can join up all test conditions then delete
or concatenate them into say column E then compare

like if A(i)=A(i-1) and B(i)=B(i-1) and C(i)=C(i-1) and D(i)=D(i-1) then
row(i).delete

or E=A & B & C & D
compare E and delete as desire
 
L

luu980

hmmm... I'm not familiar with VB programming, actually excel programming
also for that matter. :( The VB example provided above is okay for a
table with a few lines of data, but is it easy to setup for large data
sets? I have over 4000 rows of data that needs the duplicates filtered
out and I don't know where to begin in terms of VB programming.

No one knows of a macro that does this? Setting up filters to do this
task seems significantly more difficult than just executing a macro
each time.

Surely there must be one out there already for this?!
 
R

RB Smissaert

Try something like this:

Sub FilterDuplicateRows()

Dim i As Long
Dim c As Long
Dim n As Long
Dim LR As Long
Dim LC As Long
Dim rngLast As Range
Dim arr
Dim arr2
Dim strTemp As String

Set rngLast = _
Application.InputBox(Prompt:="Choose the last cell of the range to
filter", _
Title:="clear duplicate rows", _
Type:=8)

Application.ScreenUpdating = False

Cells(1).EntireColumn.Insert

LR = rngLast.Row
LC = rngLast.Column

ReDim arr2(1 To LR, 1 To LC - 1)

arr = Range(Cells(1), Cells(LR, LC))

For c = 2 To LC
arr2(1, c - 1) = arr(1, c)
Next

For i = 1 To LR
strTemp = ""
For c = 2 To LC
strTemp = strTemp & arr(i, c)
Next
arr(i, 1) = strTemp
Next

procSort2D arr, "A", 1

n = 1

For i = 2 To LR
If arr(i, 1) <> arr(i - 1, 1) Then
n = n + 1
For c = 2 To LC
arr2(n, c - 1) = arr(i, c)
Next
End If
Next

Range(Cells(1), Cells(LR, LC)).Clear

Range(Cells(1), Cells(n, LC - 1)) = arr2

Application.ScreenUpdating = True

End Sub

Function procSort2D(ByRef avArray, _
ByRef sOrder As String, _
ByRef iKey As Long, _
Optional ByRef iLow1 As Long = -1, _
Optional ByRef iHigh1 As Long = -1) As Boolean

Dim iLow2 As Long
Dim iHigh2 As Long
Dim i As Long
Dim vItem1 As Variant
Dim vItem2 As Variant

On Error GoTo ERROROUT

If iLow1 = -1 Then
iLow1 = LBound(avArray, 1)
End If

If iHigh1 = -1 Then
iHigh1 = UBound(avArray, 1)
End If

'Set new extremes to old extremes
iLow2 = iLow1
iHigh2 = iHigh1

'Get value of array item in middle of new extremes
vItem1 = avArray((iLow1 + iHigh1) \ 2, iKey)

'Loop for all the items in the array between the extremes
While iLow2 < iHigh2

If sOrder = "A" Then
'Find the first item that is greater than the mid-point item
While avArray(iLow2, iKey) < vItem1 And iLow2 < iHigh1
iLow2 = iLow2 + 1
Wend

'Find the last item that is less than the mid-point item
While avArray(iHigh2, iKey) > vItem1 And iHigh2 > iLow1
iHigh2 = iHigh2 - 1
Wend
Else
'Find the first item that is less than the mid-point item
While avArray(iLow2, iKey) > vItem1 And iLow2 < iHigh1
iLow2 = iLow2 + 1
Wend

'Find the last item that is greater than the mid-point item
While avArray(iHigh2, iKey) < vItem1 And iHigh2 > iLow1
iHigh2 = iHigh2 - 1
Wend
End If

'If the two items are in the wrong order, swap the rows
If iLow2 < iHigh2 Then
For i = LBound(avArray) To UBound(avArray, 2)
vItem2 = avArray(iLow2, i)
avArray(iLow2, i) = avArray(iHigh2, i)
avArray(iHigh2, i) = vItem2
Next
End If

'If the pointers are not together, advance to the next item
If iLow2 <= iHigh2 Then
iLow2 = iLow2 + 1
iHigh2 = iHigh2 - 1
End If
Wend

'Recurse to sort the lower half of the extremes
If iHigh2 > iLow1 Then procSort2D avArray, sOrder, iKey, iLow1, iHigh2

'Recurse to sort the upper half of the extremes
If iLow2 < iHigh1 Then procSort2D avArray, sOrder, iKey, iLow2, iHigh1

procSort2D = True

Exit Function
ERROROUT:

procSort2D = False

End Function


RBS
 
R

RB Smissaert

It needed a few adjustments:

Sub FilterDuplicateRows()

Dim i As Long
Dim c As Long
Dim n As Long
Dim LR As Long
Dim LC As Long
Dim rngLast As Range
Dim arr
Dim arr2
Dim strTemp As String

On Error GoTo ERROROUT 'for cancelled input
Set rngLast = _
Application.InputBox(Prompt:="Choose the last cell of the range to
filter", _
Title:="clear duplicate rows", _
Default:=Cells(1).End(xlDown).End(xlToRight).Address,
_
Type:=8)
On Error GoTo 0

Application.ScreenUpdating = False

Cells(1).EntireColumn.Insert

LR = rngLast.Row
LC = rngLast.Column

ReDim arr2(1 To LR, 1 To LC - 1)

arr = Range(Cells(1), Cells(LR, LC))

For i = 1 To LR
strTemp = ""
For c = 2 To LC
strTemp = strTemp & arr(i, c)
Next
arr(i, 1) = strTemp
Next

procSort2D arr, "A", 1

For c = 2 To LC
arr2(1, c - 1) = arr(1, c)
Next

n = 1

For i = 2 To LR
If arr(i, 1) <> arr(i - 1, 1) Then
n = n + 1
For c = 2 To LC
arr2(n, c - 1) = arr(i, c)
Next
End If
Next

Range(Cells(1), Cells(LR, LC)).Clear

Range(Cells(1), Cells(n, LC - 1)) = arr2

Application.ScreenUpdating = True

ERROROUT:

End Sub


RBS
 
R

RB Smissaert

One more refinement, to keep the row order as it is:

Sub FilterDuplicateRows()

Dim i As Long
Dim c As Long
Dim n As Long
Dim LR As Long
Dim LC As Long
Dim rngLast As Range
Dim arr
Dim arr2
Dim strTemp As String
Dim btExtraColumn As Byte

On Error GoTo ERROROUT 'for cancelled input
Set rngLast = _
Application.InputBox(Prompt:="Choose the last cell of the range to
filter", _
Title:="clear duplicate rows", _
Default:=Cells(1).End(xlDown).End(xlToRight).Address,
_
Type:=8)
On Error GoTo 0

If MsgBox("Keep current row order?", _
vbYesNo + vbDefaultButton1 + vbQuestion, _
"clear duplicate rows") = vbYes Then
btExtraColumn = 1
End If

Application.ScreenUpdating = False

Cells(1).EntireColumn.Insert

LR = rngLast.Row
LC = rngLast.Column + btExtraColumn

ReDim arr2(1 To LR, 1 To LC - 1)

arr = Range(Cells(1), Cells(LR, LC))

Cells(1).EntireColumn.Delete

For i = 1 To LR
strTemp = ""
For c = 2 To LC - btExtraColumn
strTemp = strTemp & arr(i, c)
Next
arr(i, 1) = strTemp
'to keep track of the original order
If btExtraColumn = 1 Then
arr(i, LC) = i
End If
Next

procSort2D arr, "A", 1

For c = 2 To LC
arr2(1, c - 1) = arr(1, c)
Next

n = 1

For i = 2 To LR
If arr(i, 1) <> arr(i - 1, 1) Then
n = n + 1
For c = 2 To LC
arr2(n, c - 1) = arr(i, c)
Next
End If
Next

'for if there are no duplicates
If n = LR Then
Application.ScreenUpdating = True
Exit Sub
End If

'put the original order back
If btExtraColumn = 1 Then
procSort2D arr2, "A", LC - 1, 1, n
End If

Range(Cells(1), Cells(LR, LC)).Clear

Range(Cells(1), Cells(n, LC - 1)) = arr2

If btExtraColumn = 1 Then
Cells(LC - 1).EntireColumn.Clear
End If

Application.ScreenUpdating = True

ERROROUT:

End Sub


RBS
 
K

kounoike

This one takes a very primitive way, so it'll take a long time to be
done.
so, if you have many rows to deal, it may looks like Excel to be
freezed.
but statusbar will show processing state.
i assume data is populated in column A, B, C, D and starting at row 1.

Sub delduplicate()
Const cl1 = "a" '<==column to compare -change if need
Const cl2 = "b" '<==column to compare -change if need
Const cl3 = "c" '<==column to compare -change if need
Const cl4 = "d" '<==column to compare -change if need
Const frow = 1 '<== start row noumber -change if need

sr = frow
nr = sr + 1
Set lastcell = Cells(Cells(frow, cl1). _
CurrentRegion.Rows.count + frow, cl1)
Do While (sr < lastcell.Row - 1)
Application.ScreenUpdating = False
Do While (nr < lastcell.Row)
Application.StatusBar = "last row is " & _
lastcell.Row - 1 & " processing row is " & sr
If Cells(sr, cl1) = Cells(nr, cl1) _
And Cells(sr, cl2) = Cells(nr, cl2) _
And Cells(sr, cl3) = Cells(nr, cl3) _
And Cells(sr, cl4) = Cells(nr, cl4) Then
Rows(nr).Delete
Else
nr = nr + 1
End If
Loop
sr = sr + 1
nr = sr + 1
Loop
End Sub

keizi
..
 
R

RB Smissaert

Some further refinements in that it will keep the same order between
duplicates, making the final result as close
as possible to the original.
Also this one won't clear formats.


Sub FilterDuplicateRows()

Dim i As Long
Dim c As Long
Dim n As Long
Dim LR As Long
Dim LC As Long
Dim rngLast As Range
Dim arr
Dim arr2
Dim strTemp As String
Dim btExtraColumn As Byte
Dim strSplitter As String
Dim lMaxLenIndex As Long

'this may need altering if it is in the sheet data
strSplitter = "||"

On Error GoTo ERROROUT 'for cancelled input
Set rngLast = _
Application.InputBox(Prompt:="Choose the last cell of the range to
filter", _
Title:="clear duplicate rows", _
Default:=Cells(1).End(xlDown).End(xlToRight).Address,
_
Type:=8)
On Error GoTo 0

If MsgBox("Keep current row order?", _
vbYesNo + vbDefaultButton1 + vbQuestion, _
"clear duplicate rows") = vbYes Then
btExtraColumn = 1
End If

Application.ScreenUpdating = False

Cells(1).EntireColumn.Insert

LR = rngLast.Row
LC = rngLast.Column + btExtraColumn

lMaxLenIndex = Len(CStr(LR))

ReDim arr2(1 To LR, 1 To LC - 1)

'get the range to clear from duplicates
arr = Range(Cells(1), Cells(LR, LC))

Cells(1).EntireColumn.Delete

'get the concatenated row values
For i = 1 To LR
strTemp = ""
For c = 2 To LC - btExtraColumn
strTemp = strTemp & arr(i, c)
Next
'add the i to keep the order between duplicates
arr(i, 1) = strTemp & strSplitter & String(lMaxLenIndex - Len(CStr(i)),
"1") & i
'to keep track of the original order
If btExtraColumn = 1 Then
arr(i, LC) = i
End If
Next

procSort2D arr, "A", 1

'take the added padded i off
For i = 1 To LR
arr(i, 1) = Left$(arr(i, 1), InStr(1, arr(i, 1), strSplitter,
vbBinaryCompare) - 1)
Next

'copy first row
For c = 2 To LC
arr2(1, c - 1) = arr(1, c)
Next

n = 1

'copy the non-duplicates
For i = 2 To LR
If arr(i, 1) <> arr(i - 1, 1) Then
n = n + 1
For c = 2 To LC
arr2(n, c - 1) = arr(i, c)
Next
End If
Next

'for if there are no duplicates
If n = LR Then
Application.ScreenUpdating = True
Exit Sub
End If

'put the original order back
If btExtraColumn = 1 Then
procSort2D arr2, "A", LC - 1, 1, n
End If

'clear the old range
Range(Cells(1), Cells(LR, LC)).ClearContents

'put the new data in
Range(Cells(1), Cells(n, LC - 1)).Value = arr2

'clear the column used to re-order
If btExtraColumn = 1 Then
Cells(LC - 1).EntireColumn.ClearContents
End If

Application.ScreenUpdating = True

ERROROUT:

End Sub


RBS
 
L

luu980

I tried executing RB Smissaert's suggested macro and I got a compil
error:

Set rngLast = _
Application.InputBox(Prompt:="Choose the last cell of the range to


Does anyone knowhow I can fix it?

Thank you in advance
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Top