View Single Post
  #3   Report Post  
Posted to microsoft.public.excel.programming
Matilda Matilda is offline
external usenet poster
 
Posts: 57
Default Removing duplicate rows

Hi Leo - thanks for this.
Problem is, I am looking for speed as this is part of a larger routine all
very slow.
Even editing your code to bare essentials is slow - as is this of mine:
Sub RemoveDuplicates()
Dim name1, name2 As String
Dim dates1, dates2 As Variant
Dim i, j, datecnt As Integer
Dim dateRng As Range
Set dateRng = Range("E1:J300")
Range("A1:K300").Select
Selection.Sort Key1:=Range("A1"), Order1:=xlAscending, Header:=xlYes, _
OrderCustom:=1, MatchCase:=False, Orientation:=xlTopToBottom
For rowndx = 300 To 2 Step -1
name1 = Selection.Cells(rowndx, 1).Value & Selection.Cells(rowndx, 2).Value
name2 = Selection.Cells(rowndx - 1, 1).Value & Selection.Cells(rowndx - 1,
2).Value
If (name1 = name2) Then
nametrue = True
For j = 5 To 10
dates1 = dateRng.Rows(rowndx).Columns(j).Value
dates2 = dateRng.Rows(rowndx - 1).Columns(j).Value
If dates1 = dates2 Or (dates1 = "" And dates2 = "") Then
datecnt = datecnt + 1
End If
Next j
If datecnt = 6 Then
Rows(rowndx).EntireRow.Delete
datecnt = 0
End If
End If
datecnt = 0
Next rowndx
End Sub

works, but SLOOOOOOwwww !! 13 seconds for a 68 row list.

I have hardcoded size of list (300 rows) and this further slows execution,
but don't know the syntax to define list dynamically.
Problem seems to be Excel won't compare one range to another and return a
true or false. Gives a "type mismatch" error and shrugs it off!

Still trying ...

Matilda



"Leo Heuser" wrote:

"Matilda" skrev i en meddelelse
...
Hi All,
I have a variation on the removing duplicate rows problem:

Given a sorted list of x rows with n columns each,
The records are redundant if columns 1, 2 and 5-9 match.
A string is created of the concatenated cells 1&2 (names)and compared, and
this works fine.
But I cannot get code to compare the ranges 5-9 with one another to work,
I
get a "type mismatch" error, even though the data types are exactly the
same(dates).

Thus the solution;
Sub FixDuplicateRows()
Dim RowNdx As Long
Dim ColNum As Integer
ColNum = Selection(1).Column
For RowNdx = Selection(Selection.Cells.Count).Row To _
Selection(1).Row + 1 Step -1
If Cells(RowNdx, ColNum).Value = Cells(RowNdx - 1, ColNum).Value Then
Cells(RowNdx, ColNum).Value = "----"
End If
Next RowNdx
End Sub
(posted on Chip Pearson's page http://www.cpearson.com/excel/duplicat.htm)
fails when I substitute the code:
If (dateRng.Rows(RowNdx) = dateRng.Rows(RowNdx - 1)) Then
dateTrue = True
End If
giving the "type mismatch error".

Have tried comparing cell by cell, but am afraid that this will slow down
runtime to an extent that it is probably just as efficient to leave the
duplicates in!

Any advice gratefully received,

Matilda


Hi Matilda

This routine will do the job for you:

Sub DuplicateRecordsAND()
'Leo Heuser, Aug 17 2001
'Jan 21 2002, Sep 14 2006
'This routine deletes or formats duplicates in a list.
'Entire rows are deleted/formatted. A single cell may be
'formatted. See below.
'A list of the duplicates may be inserted in a new sheet,
'after the active sheet. Row numbers may be added to the list.
'More than one column may be used to find
'duplicates in the list. E.g. column A may
'contain several entries with the name "Peter"
'and column B several entries with "Smith",
'column F several entries with "Oxford St."
'Setting ColumnsToMatch to Array("A", "B", "F")
'will format/delete all *duplicates* where a match
'exist between "A" AND "B" AND "F"
' A B F
'1 Name Surname Address
'2 Peter Smith Oxford St.
'3 Peter Smith Regent St.
'4 Peter Jones Oxford St.
'5 Peter Smith Oxford St.
'Only the fifth row is considered a duplicate
'under these circumstances.

Dim AddRowNumberToList As Boolean
Dim CheckRange As Range
Dim CheckRows As Range
Dim CollectionKey As String
Dim ColumnsToMatch As Variant
Dim Counter As Long
Dim DeleteDuplicates As Boolean
Dim Dummy As Long
Dim DuplicateRange As Range
Dim DuplicatesExist As Boolean
Dim Element As Variant
Dim FieldsCollection As New Collection
Dim FormatColumn As String
Dim FormatDuplicates As Boolean
Dim lLBound As Long
Dim lRow As Long
Dim lUBound As Long
Dim OffsetValue() As Long
Dim RowNumberCollection As New Collection
Dim StartCell As Range
Dim SubArray As Variant
Dim WriteListOfDuplicates As Boolean

'Edit the next 7 lines to reflect the actual setup
Set CheckRows = Rows("1:10000")
ColumnsToMatch = Array("a", "b", "f")

' If FormatColumn ="" , entire rows get colored font
' If FormatColumn = a column name (e.g. "h", the interior
' of cells in that column is colored
' For either to work FormatDuplicates must be set to true
FormatColumn = "h"

DeleteDuplicates = False
FormatDuplicates = True
WriteListOfDuplicates = False
AddRowNumberToList = False

On Error GoTo Finito

lLBound = LBound(ColumnsToMatch)
lUBound = UBound(ColumnsToMatch)

Set CheckRange = Intersect(Range(ColumnsToMatch(lLBound) & _
":" & ColumnsToMatch(lLBound)), CheckRows)

ReDim OffsetValue(lUBound - lLBound + 1)

For Counter = lLBound To lUBound
OffsetValue(Counter) = Range(ColumnsToMatch(Counter) & ":" & _
ColumnsToMatch(Counter)).Column - CheckRange.Column
Next Counter

On Error Resume Next

SubArray = CheckRange.Value
For lRow = 1 To UBound(SubArray, 1)
If SubArray(lRow, 1) < "" Then
CollectionKey = ""
For Counter = lLBound To lUBound
CollectionKey = CollectionKey & _
CheckRange(lRow, 1).Offset(0, _
OffsetValue(Counter)).Value
Next Counter
FieldsCollection.Add Dummy, CStr(CollectionKey)
If Err.Number = 457 Then
Err.Clear
DuplicatesExist = True
RowNumberCollection.Add CheckRange(lRow, 1).Row
If DuplicateRange Is Nothing Then
Set DuplicateRange = _
CheckRange.Cells(lRow, 1)
Else
Set DuplicateRange = Union(DuplicateRange, _
CheckRange.Cells(lRow, 1))
End If
End If
End If
Next lRow

On Error GoTo Finito

If DuplicatesExist = False Then
MsgBox "No duplicates exist.", vbInformation
Else
With DuplicateRange.EntireRow
If WriteListOfDuplicates Then
Worksheets.Add After:=DuplicateRange.Parent
.Copy Destination:=Range("A1")
If AddRowNumberToList Then
Columns("A").Insert
Set StartCell = Range("A1")
For Each Element In RowNumberCollection
StartCell.Value = "Row " & Element
Set StartCell = StartCell.Offset(1, 0)
Next Element
End If
End If

If FormatDuplicates Then
If FormatColumn < "" Then
For Each Element In RowNumberCollection
.Parent.Range(FormatColumn & Element). _
Interior.ColorIndex = 3
Next Element
Else
.Font.ColorIndex = 3
End If
End If

If DeleteDuplicates Then .Delete
End With
End If

Finito:
If Err.Number < 0 Then
MsgBox "Unexpected error." & vbNewLine & Err.Description
End If

On Error GoTo 0

End Sub


--
Best regards
Leo Heuser

Followup to newsgroup only please.