ExcelBanter

ExcelBanter (https://www.excelbanter.com/)
-   Excel Discussion (Misc queries) (https://www.excelbanter.com/excel-discussion-misc-queries/)
-   -   Compare Two Simple Workbooks & Remove Duplicate Rows (https://www.excelbanter.com/excel-discussion-misc-queries/151424-compare-two-simple-workbooks-remove-duplicate-rows.html)

Dave

Compare Two Simple Workbooks & Remove Duplicate Rows
 
Hi Everyone!

I'm trying to find a macro that will do the following:

* Compare 2 workbooks containing 2 columns each (Surname & D.O.B)

* Remove any rows that exist in both workbooks e.g. Smith & 12.05.2007 in
both (Note: these are not necessarily in same Row in both workbooks i.e. Not
Both in Row 8)

* Leave rows that only exist in one or the other workbook.

Your help would be very much appreciated.

Thanks!
Dave

Bernie Deitrick

Compare Two Simple Workbooks & Remove Duplicate Rows
 
Dave,

Put the macro below into a module in one of the workbooks, then run it, selecting the other workbook
when prompted.

As written, it assumes that the data tables are on the first sheet of each workbook, with surnames
in column A, DOB's in column B, headers in row 1, and with the data contiguous, with no other
entries below the tables. Any other columns of data must also be contiguous, or else they won't
sort properly.

HTH,
Bernie
MS Excel MVP

Sub GetRidOfDupes()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert
mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

mySh1.Range(mySh1.Range("C2"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 - 1 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 - 1 & "C2=RC[-2]&RC[-1]))"
mySh2.Range(mySh2.Range("C2"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 - 1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 - 1 & "C2=RC[-2]&RC[-1]))"

mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh1.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If
mySh1.Range("C:C").EntireColumn.Delete

mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub




"Dave" wrote in message
...
Hi Everyone!

I'm trying to find a macro that will do the following:

* Compare 2 workbooks containing 2 columns each (Surname & D.O.B)

* Remove any rows that exist in both workbooks e.g. Smith & 12.05.2007 in
both (Note: these are not necessarily in same Row in both workbooks i.e. Not
Both in Row 8)

* Leave rows that only exist in one or the other workbook.

Your help would be very much appreciated.

Thanks!
Dave




Bernie Deitrick

Compare Two Simple Workbooks & Remove Duplicate Rows
 
Dave,

Another assumption that I made was that the lists themselves don't have repeats - so that the
formulas that I use to determine duplicate values will return either 0 or 1, not 2, or 3....

HTH,
Bernie
MS Excel MVP


"Bernie Deitrick" <deitbe @ consumer dot org wrote in message
...
Dave,

Put the macro below into a module in one of the workbooks, then run it, selecting the other
workbook when prompted.

As written, it assumes that the data tables are on the first sheet of each workbook, with surnames
in column A, DOB's in column B, headers in row 1, and with the data contiguous, with no other
entries below the tables. Any other columns of data must also be contiguous, or else they won't
sort properly.

HTH,
Bernie
MS Excel MVP

Sub GetRidOfDupes()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert
mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

mySh1.Range(mySh1.Range("C2"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 - 1 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 - 1 & "C2=RC[-2]&RC[-1]))"
mySh2.Range(mySh2.Range("C2"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 - 1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 - 1 & "C2=RC[-2]&RC[-1]))"

mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh1.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If
mySh1.Range("C:C").EntireColumn.Delete

mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub




"Dave" wrote in message
...
Hi Everyone!

I'm trying to find a macro that will do the following:

* Compare 2 workbooks containing 2 columns each (Surname & D.O.B)

* Remove any rows that exist in both workbooks e.g. Smith & 12.05.2007 in
both (Note: these are not necessarily in same Row in both workbooks i.e. Not
Both in Row 8)

* Leave rows that only exist in one or the other workbook.

Your help would be very much appreciated.

Thanks!
Dave






Dave

Compare Two Simple Workbooks & Remove Duplicate Rows
 
Hi Bernie,

Could you do me a couple of favours with this macro please?

A) Please make it so that both column A and B have no headers/title cells.
B) Add a description of what each section of the macro does. The reason I am
asking for this is so that I can learn more about VBA, so that if need be I
can edit sections myself, and so that if other users wish to use this macro
there will be less questions needed next time :) .

Other than that thanks for the quick reply !!

Dave

"Bernie Deitrick" wrote:

Dave,

Put the macro below into a module in one of the workbooks, then run it, selecting the other workbook
when prompted.

As written, it assumes that the data tables are on the first sheet of each workbook, with surnames
in column A, DOB's in column B, headers in row 1, and with the data contiguous, with no other
entries below the tables. Any other columns of data must also be contiguous, or else they won't
sort properly.

HTH,
Bernie
MS Excel MVP

Sub GetRidOfDupes()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert
mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

mySh1.Range(mySh1.Range("C2"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 - 1 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 - 1 & "C2=RC[-2]&RC[-1]))"
mySh2.Range(mySh2.Range("C2"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 - 1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 - 1 & "C2=RC[-2]&RC[-1]))"

mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh1.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If
mySh1.Range("C:C").EntireColumn.Delete

mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub




"Dave" wrote in message
...
Hi Everyone!

I'm trying to find a macro that will do the following:

* Compare 2 workbooks containing 2 columns each (Surname & D.O.B)

* Remove any rows that exist in both workbooks e.g. Smith & 12.05.2007 in
both (Note: these are not necessarily in same Row in both workbooks i.e. Not
Both in Row 8)

* Leave rows that only exist in one or the other workbook.

Your help would be very much appreciated.

Thanks!
Dave





Bernie Deitrick

Compare Two Simple Workbooks & Remove Duplicate Rows
 
Dave,

A) Change

Header:=xlYes

to

Header:=xlNo

and remove these two lines:

mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"

B) Sorry, no time now. Maybe later....

HTH,
Bernie
MS Excel MVP


"Dave" wrote in message
...
Hi Bernie,

Could you do me a couple of favours with this macro please?

A) Please make it so that both column A and B have no headers/title cells.
B) Add a description of what each section of the macro does. The reason I am
asking for this is so that I can learn more about VBA, so that if need be I
can edit sections myself, and so that if other users wish to use this macro
there will be less questions needed next time :) .

Other than that thanks for the quick reply !!

Dave

"Bernie Deitrick" wrote:

Dave,

Put the macro below into a module in one of the workbooks, then run it, selecting the other
workbook
when prompted.

As written, it assumes that the data tables are on the first sheet of each workbook, with
surnames
in column A, DOB's in column B, headers in row 1, and with the data contiguous, with no other
entries below the tables. Any other columns of data must also be contiguous, or else they won't
sort properly.

HTH,
Bernie
MS Excel MVP

Sub GetRidOfDupes()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert
mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

mySh1.Range(mySh1.Range("C2"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 - 1 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 - 1 & "C2=RC[-2]&RC[-1]))"
mySh2.Range(mySh2.Range("C2"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 - 1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 - 1 & "C2=RC[-2]&RC[-1]))"

mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh1.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If
mySh1.Range("C:C").EntireColumn.Delete

mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub




"Dave" wrote in message
...
Hi Everyone!

I'm trying to find a macro that will do the following:

* Compare 2 workbooks containing 2 columns each (Surname & D.O.B)

* Remove any rows that exist in both workbooks e.g. Smith & 12.05.2007 in
both (Note: these are not necessarily in same Row in both workbooks i.e. Not
Both in Row 8)

* Leave rows that only exist in one or the other workbook.

Your help would be very much appreciated.

Thanks!
Dave







Dave

Compare Two Simple Workbooks & Remove Duplicate Rows
 
Bernie,

The Macro is not quite working properly.

I have uploaded my two workbooks he

http://www.justupit.com/b9f5884e50fb...0e164c44c57149

I have added your macro in workbook 1.

I have highlighted the rows in workbook 1 & 2 that are one offs i.e. the
ones that should remain in each workbook.

Thanks!
Dave

"Bernie Deitrick" wrote:

Dave,

A) Change

Header:=xlYes

to

Header:=xlNo

and remove these two lines:

mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"

B) Sorry, no time now. Maybe later....

HTH,
Bernie
MS Excel MVP


"Dave" wrote in message
...
Hi Bernie,

Could you do me a couple of favours with this macro please?

A) Please make it so that both column A and B have no headers/title cells.
B) Add a description of what each section of the macro does. The reason I am
asking for this is so that I can learn more about VBA, so that if need be I
can edit sections myself, and so that if other users wish to use this macro
there will be less questions needed next time :) .

Other than that thanks for the quick reply !!

Dave

"Bernie Deitrick" wrote:

Dave,

Put the macro below into a module in one of the workbooks, then run it, selecting the other
workbook
when prompted.

As written, it assumes that the data tables are on the first sheet of each workbook, with
surnames
in column A, DOB's in column B, headers in row 1, and with the data contiguous, with no other
entries below the tables. Any other columns of data must also be contiguous, or else they won't
sort properly.

HTH,
Bernie
MS Excel MVP

Sub GetRidOfDupes()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert
mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

mySh1.Range(mySh1.Range("C2"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 - 1 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 - 1 & "C2=RC[-2]&RC[-1]))"
mySh2.Range(mySh2.Range("C2"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 - 1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 - 1 & "C2=RC[-2]&RC[-1]))"

mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh1.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If
mySh1.Range("C:C").EntireColumn.Delete

mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub




"Dave" wrote in message
...
Hi Everyone!

I'm trying to find a macro that will do the following:

* Compare 2 workbooks containing 2 columns each (Surname & D.O.B)

* Remove any rows that exist in both workbooks e.g. Smith & 12.05.2007 in
both (Note: these are not necessarily in same Row in both workbooks i.e. Not
Both in Row 8)

* Leave rows that only exist in one or the other workbook.

Your help would be very much appreciated.

Thanks!
Dave







Bernie Deitrick

Compare Two Simple Workbooks & Remove Duplicate Rows
 
Dave,

See the fixed (and semi-commented) version below.

HTH,
Bernie
MS Excel MVP


Sub GetRidOfDupes2()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

'Set object references for this workbook
Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)

'Open up the other workbook and set object references
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

'Insert columns for matching formulas
mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert

'Get the sizes of the two databases
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

'Insert formulas in first workbook to determine duplicated data
mySh1.Range(mySh1.Range("C1"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 & "C2=RC[-2]&RC[-1]))"

'Insert formulas in second workbook to determine duplicated data
mySh2.Range(mySh2.Range("C1"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 & "C2=RC[-2]&RC[-1]))"

'Convert formulas to values to prevent recalculation errors
'(from deleting the first set of duplicates)
mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

'Sort the data based on duplicates
'This makes row deletion faster since it is one block
mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlNo
With mySh1.Columns("C:C")
'Format the cells so that the value can be found
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With

'If there are any duplicates, delete their rows
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

'Get rid of the formula column inserted earlier
mySh1.Range("C:C").EntireColumn.Delete

'Do the same thing to the second workbook
mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlNo
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub

"Dave" wrote in message
...
Bernie,

The Macro is not quite working properly.

I have uploaded my two workbooks he

http://www.justupit.com/b9f5884e50fb...0e164c44c57149

I have added your macro in workbook 1.

I have highlighted the rows in workbook 1 & 2 that are one offs i.e. the
ones that should remain in each workbook.

Thanks!
Dave

"Bernie Deitrick" wrote:

Dave,

A) Change

Header:=xlYes

to

Header:=xlNo

and remove these two lines:

mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"

B) Sorry, no time now. Maybe later....

HTH,
Bernie
MS Excel MVP


"Dave" wrote in message
...
Hi Bernie,

Could you do me a couple of favours with this macro please?

A) Please make it so that both column A and B have no headers/title cells.
B) Add a description of what each section of the macro does. The reason I am
asking for this is so that I can learn more about VBA, so that if need be I
can edit sections myself, and so that if other users wish to use this macro
there will be less questions needed next time :) .

Other than that thanks for the quick reply !!

Dave

"Bernie Deitrick" wrote:

Dave,

Put the macro below into a module in one of the workbooks, then run it, selecting the other
workbook
when prompted.

As written, it assumes that the data tables are on the first sheet of each workbook, with
surnames
in column A, DOB's in column B, headers in row 1, and with the data contiguous, with no other
entries below the tables. Any other columns of data must also be contiguous, or else they
won't
sort properly.

HTH,
Bernie
MS Excel MVP

Sub GetRidOfDupes()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert
mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

mySh1.Range(mySh1.Range("C2"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 - 1 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 - 1 & "C2=RC[-2]&RC[-1]))"
mySh2.Range(mySh2.Range("C2"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 - 1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 - 1 & "C2=RC[-2]&RC[-1]))"

mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh1.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If
mySh1.Range("C:C").EntireColumn.Delete

mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub




"Dave" wrote in message
...
Hi Everyone!

I'm trying to find a macro that will do the following:

* Compare 2 workbooks containing 2 columns each (Surname & D.O.B)

* Remove any rows that exist in both workbooks e.g. Smith & 12.05.2007 in
both (Note: these are not necessarily in same Row in both workbooks i.e. Not
Both in Row 8)

* Leave rows that only exist in one or the other workbook.

Your help would be very much appreciated.

Thanks!
Dave









Dave

Compare Two Simple Workbooks & Remove Duplicate Rows
 
Thanks again Bernie.

One last thing - if the user clicks cancel on the browse screen when looking
for myB2 an error comes up. Error 1004 from memory.

Is there something we can add to prevent this?

Thanks!
Dave

"Bernie Deitrick" wrote:

Dave,

See the fixed (and semi-commented) version below.

HTH,
Bernie
MS Excel MVP


Sub GetRidOfDupes2()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

'Set object references for this workbook
Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)

'Open up the other workbook and set object references
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

'Insert columns for matching formulas
mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert

'Get the sizes of the two databases
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

'Insert formulas in first workbook to determine duplicated data
mySh1.Range(mySh1.Range("C1"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 & "C2=RC[-2]&RC[-1]))"

'Insert formulas in second workbook to determine duplicated data
mySh2.Range(mySh2.Range("C1"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 & "C2=RC[-2]&RC[-1]))"

'Convert formulas to values to prevent recalculation errors
'(from deleting the first set of duplicates)
mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

'Sort the data based on duplicates
'This makes row deletion faster since it is one block
mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlNo
With mySh1.Columns("C:C")
'Format the cells so that the value can be found
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With

'If there are any duplicates, delete their rows
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

'Get rid of the formula column inserted earlier
mySh1.Range("C:C").EntireColumn.Delete

'Do the same thing to the second workbook
mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlNo
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub

"Dave" wrote in message
...
Bernie,

The Macro is not quite working properly.

I have uploaded my two workbooks he

http://www.justupit.com/b9f5884e50fb...0e164c44c57149

I have added your macro in workbook 1.

I have highlighted the rows in workbook 1 & 2 that are one offs i.e. the
ones that should remain in each workbook.

Thanks!
Dave

"Bernie Deitrick" wrote:

Dave,

A) Change

Header:=xlYes

to

Header:=xlNo

and remove these two lines:

mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"

B) Sorry, no time now. Maybe later....

HTH,
Bernie
MS Excel MVP


"Dave" wrote in message
...
Hi Bernie,

Could you do me a couple of favours with this macro please?

A) Please make it so that both column A and B have no headers/title cells.
B) Add a description of what each section of the macro does. The reason I am
asking for this is so that I can learn more about VBA, so that if need be I
can edit sections myself, and so that if other users wish to use this macro
there will be less questions needed next time :) .

Other than that thanks for the quick reply !!

Dave

"Bernie Deitrick" wrote:

Dave,

Put the macro below into a module in one of the workbooks, then run it, selecting the other
workbook
when prompted.

As written, it assumes that the data tables are on the first sheet of each workbook, with
surnames
in column A, DOB's in column B, headers in row 1, and with the data contiguous, with no other
entries below the tables. Any other columns of data must also be contiguous, or else they
won't
sort properly.

HTH,
Bernie
MS Excel MVP

Sub GetRidOfDupes()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert
mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

mySh1.Range(mySh1.Range("C2"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 - 1 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 - 1 & "C2=RC[-2]&RC[-1]))"
mySh2.Range(mySh2.Range("C2"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 - 1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 - 1 & "C2=RC[-2]&RC[-1]))"

mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh1.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If
mySh1.Range("C:C").EntireColumn.Delete

mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub




"Dave" wrote in message
...
Hi Everyone!

I'm trying to find a macro that will do the following:

* Compare 2 workbooks containing 2 columns each (Surname & D.O.B)

* Remove any rows that exist in both workbooks e.g. Smith & 12.05.2007 in
both (Note: these are not necessarily in same Row in both workbooks i.e. Not
Both in Row 8)

* Leave rows that only exist in one or the other workbook.

Your help would be very much appreciated.

Thanks!
Dave










Bernie Deitrick

Compare Two Simple Workbooks & Remove Duplicate Rows
 
Dave,

Add a string variable:

Dim myFName As String

Then change this:

Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)

to this

myFName = Application.GetOpenFilename
If myFName = "False" Then Exit Sub
Set myB2 = Application.Workbooks.Open(myFName)

HTH,
Bernie
MS Excel MVP


"Dave" wrote in message
...
Thanks again Bernie.

One last thing - if the user clicks cancel on the browse screen when looking
for myB2 an error comes up. Error 1004 from memory.

Is there something we can add to prevent this?

Thanks!
Dave

"Bernie Deitrick" wrote:

Dave,

See the fixed (and semi-commented) version below.

HTH,
Bernie
MS Excel MVP


Sub GetRidOfDupes2()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

'Set object references for this workbook
Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)

'Open up the other workbook and set object references
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

'Insert columns for matching formulas
mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert

'Get the sizes of the two databases
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

'Insert formulas in first workbook to determine duplicated data
mySh1.Range(mySh1.Range("C1"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 & "C2=RC[-2]&RC[-1]))"

'Insert formulas in second workbook to determine duplicated data
mySh2.Range(mySh2.Range("C1"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 & "C2=RC[-2]&RC[-1]))"

'Convert formulas to values to prevent recalculation errors
'(from deleting the first set of duplicates)
mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

'Sort the data based on duplicates
'This makes row deletion faster since it is one block
mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlNo
With mySh1.Columns("C:C")
'Format the cells so that the value can be found
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With

'If there are any duplicates, delete their rows
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

'Get rid of the formula column inserted earlier
mySh1.Range("C:C").EntireColumn.Delete

'Do the same thing to the second workbook
mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlNo
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub

"Dave" wrote in message
...
Bernie,

The Macro is not quite working properly.

I have uploaded my two workbooks he

http://www.justupit.com/b9f5884e50fb...0e164c44c57149

I have added your macro in workbook 1.

I have highlighted the rows in workbook 1 & 2 that are one offs i.e. the
ones that should remain in each workbook.

Thanks!
Dave

"Bernie Deitrick" wrote:

Dave,

A) Change

Header:=xlYes

to

Header:=xlNo

and remove these two lines:

mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"

B) Sorry, no time now. Maybe later....

HTH,
Bernie
MS Excel MVP


"Dave" wrote in message
...
Hi Bernie,

Could you do me a couple of favours with this macro please?

A) Please make it so that both column A and B have no headers/title cells.
B) Add a description of what each section of the macro does. The reason I am
asking for this is so that I can learn more about VBA, so that if need be I
can edit sections myself, and so that if other users wish to use this macro
there will be less questions needed next time :) .

Other than that thanks for the quick reply !!

Dave

"Bernie Deitrick" wrote:

Dave,

Put the macro below into a module in one of the workbooks, then run it, selecting the other
workbook
when prompted.

As written, it assumes that the data tables are on the first sheet of each workbook, with
surnames
in column A, DOB's in column B, headers in row 1, and with the data contiguous, with no
other
entries below the tables. Any other columns of data must also be contiguous, or else they
won't
sort properly.

HTH,
Bernie
MS Excel MVP

Sub GetRidOfDupes()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert
mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

mySh1.Range(mySh1.Range("C2"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 - 1 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 - 1 & "C2=RC[-2]&RC[-1]))"
mySh2.Range(mySh2.Range("C2"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 - 1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 - 1 & "C2=RC[-2]&RC[-1]))"

mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh1.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If
mySh1.Range("C:C").EntireColumn.Delete

mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub




"Dave" wrote in message
...
Hi Everyone!

I'm trying to find a macro that will do the following:

* Compare 2 workbooks containing 2 columns each (Surname & D.O.B)

* Remove any rows that exist in both workbooks e.g. Smith & 12.05.2007 in
both (Note: these are not necessarily in same Row in both workbooks i.e. Not
Both in Row 8)

* Leave rows that only exist in one or the other workbook.

Your help would be very much appreciated.

Thanks!
Dave












Dave

Compare Two Simple Workbooks & Remove Duplicate Rows
 
Brilliant Bernie Thanks!!! Youve been a massive help.

"Bernie Deitrick" wrote:

Dave,

Add a string variable:

Dim myFName As String

Then change this:

Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)

to this

myFName = Application.GetOpenFilename
If myFName = "False" Then Exit Sub
Set myB2 = Application.Workbooks.Open(myFName)

HTH,
Bernie
MS Excel MVP


"Dave" wrote in message
...
Thanks again Bernie.

One last thing - if the user clicks cancel on the browse screen when looking
for myB2 an error comes up. Error 1004 from memory.

Is there something we can add to prevent this?

Thanks!
Dave

"Bernie Deitrick" wrote:

Dave,

See the fixed (and semi-commented) version below.

HTH,
Bernie
MS Excel MVP


Sub GetRidOfDupes2()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

'Set object references for this workbook
Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)

'Open up the other workbook and set object references
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

'Insert columns for matching formulas
mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert

'Get the sizes of the two databases
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

'Insert formulas in first workbook to determine duplicated data
mySh1.Range(mySh1.Range("C1"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 & "C2=RC[-2]&RC[-1]))"

'Insert formulas in second workbook to determine duplicated data
mySh2.Range(mySh2.Range("C1"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 & "C2=RC[-2]&RC[-1]))"

'Convert formulas to values to prevent recalculation errors
'(from deleting the first set of duplicates)
mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

'Sort the data based on duplicates
'This makes row deletion faster since it is one block
mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlNo
With mySh1.Columns("C:C")
'Format the cells so that the value can be found
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With

'If there are any duplicates, delete their rows
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

'Get rid of the formula column inserted earlier
mySh1.Range("C:C").EntireColumn.Delete

'Do the same thing to the second workbook
mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlNo
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub

"Dave" wrote in message
...
Bernie,

The Macro is not quite working properly.

I have uploaded my two workbooks he

http://www.justupit.com/b9f5884e50fb...0e164c44c57149

I have added your macro in workbook 1.

I have highlighted the rows in workbook 1 & 2 that are one offs i.e. the
ones that should remain in each workbook.

Thanks!
Dave

"Bernie Deitrick" wrote:

Dave,

A) Change

Header:=xlYes

to

Header:=xlNo

and remove these two lines:

mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"

B) Sorry, no time now. Maybe later....

HTH,
Bernie
MS Excel MVP


"Dave" wrote in message
...
Hi Bernie,

Could you do me a couple of favours with this macro please?

A) Please make it so that both column A and B have no headers/title cells.
B) Add a description of what each section of the macro does. The reason I am
asking for this is so that I can learn more about VBA, so that if need be I
can edit sections myself, and so that if other users wish to use this macro
there will be less questions needed next time :) .

Other than that thanks for the quick reply !!

Dave

"Bernie Deitrick" wrote:

Dave,

Put the macro below into a module in one of the workbooks, then run it, selecting the other
workbook
when prompted.

As written, it assumes that the data tables are on the first sheet of each workbook, with
surnames
in column A, DOB's in column B, headers in row 1, and with the data contiguous, with no
other
entries below the tables. Any other columns of data must also be contiguous, or else they
won't
sort properly.

HTH,
Bernie
MS Excel MVP

Sub GetRidOfDupes()
Dim myB1 As Workbook
Dim myB2 As Workbook
Dim mySh1 As Worksheet
Dim mySh2 As Worksheet
Dim myRow1 As Long
Dim myRow2 As Long
Dim myC As Range

Set myB1 = ThisWorkbook
Set mySh1 = myB1.Worksheets(1)
Set myB2 = Application.Workbooks.Open(Application.GetOpenFile name)
Set mySh2 = myB2.Worksheets(1)

mySh1.Range("C:C").EntireColumn.Insert
mySh2.Range("C:C").EntireColumn.Insert
mySh1.Range("C1").Value = "Match"
mySh2.Range("C1").Value = "Match"
myRow1 = mySh1.Cells(Rows.Count, 2).End(xlUp).Row
myRow2 = mySh2.Cells(Rows.Count, 2).End(xlUp).Row

mySh1.Range(mySh1.Range("C2"), mySh1.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB2.Name, ".xls", "") & "]" & mySh2.Name & _
"'!R1C1:R" & myRow2 - 1 & "C1&'[" & Replace(myB2.Name, ".xls", "") & "]" _
& mySh2.Name & "'!R1C2:R" & myRow2 - 1 & "C2=RC[-2]&RC[-1]))"
mySh2.Range(mySh2.Range("C2"), mySh2.Cells(Rows.Count, 2).End(xlUp). _
Offset(0, 1)).FormulaR1C1 = _
"=SUMPRODUCT(--('[" & Replace(myB1.Name, ".xls", "") & "]" & mySh1.Name & _
"'!R1C1:R" & myRow1 - 1 & "C1&'[" & Replace(myB1.Name, ".xls", "") & "]" _
& mySh1.Name & "'!R1C2:R" & myRow1 - 1 & "C2=RC[-2]&RC[-1]))"

mySh1.Range("C:C").Value = mySh1.Range("C:C").Value
mySh2.Range("C:C").Value = mySh2.Range("C:C").Value

mySh1.Range("C2").CurrentRegion.Sort Key1:=mySh1.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh1.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh1.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh1.Range(myC, mySh1.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If
mySh1.Range("C:C").EntireColumn.Delete

mySh2.Range("C2").CurrentRegion.Sort Key1:=mySh2.Range("C2"), _
Order1:=xlAscending, Header:=xlYes
With mySh2.Columns("C:C")
.NumberFormat = "0"
Set myC = .Find(What:="1", After:=mySh2.Range("C1"), LookIn:=xlValues, LookAt:= _
xlWhole, SearchOrder:=xlByRows, SearchDirection:=xlNext, MatchCase:=False _
, SearchFormat:=False)
End With
If Not myC Is Nothing Then
mySh2.Range(myC, mySh2.Cells(Rows.Count, 3).End(xlUp)).EntireRow.Delete
End If

mySh2.Range("C:C").EntireColumn.Delete

End Sub




"Dave" wrote in message
...
Hi Everyone!

I'm trying to find a macro that will do the following:

* Compare 2 workbooks containing 2 columns each (Surname & D.O.B)

* Remove any rows that exist in both workbooks e.g. Smith & 12.05.2007 in
both (Note: these are not necessarily in same Row in both workbooks i.e. Not
Both in Row 8)

* Leave rows that only exist in one or the other workbook.

Your help would be very much appreciated.

Thanks!
Dave














All times are GMT +1. The time now is 03:22 PM.

Powered by vBulletin® Copyright ©2000 - 2025, Jelsoft Enterprises Ltd.
ExcelBanter.com