View Single Post
  #1   Report Post  
Posted to microsoft.public.excel.programming
[email protected] bodhi2.71828@gmail.com is offline
external usenet poster
 
Posts: 22
Default text compare, percentage of similar words

Good question. I probably should buy a book or take a class on natural
language processing. But until I can do that, here is what I have so
far. I added a penalty for strings that have many of the same words
but are significantly different in length.

Private Function RateLabel(str1 As String, str2 As String) As Integer
Dim i As Integer, j As Integer, intMatch As Integer, intRating As
Integer, str1Array, str2Array, strTemp
If str1 < "" And str2 < "" Then
If StrComp(str1, str2, vbTextCompare) = 0 Then
RateLabel = 100
Else
str1 = Application.WorksheetFunction.Trim(str1)
str2 = Application.WorksheetFunction.Trim(str2)
str1Array = Split(str1, " ")
str2Array = Split(str2, " ")
'make sure str1Array is always the smaller of the two
If UBound(str1Array) UBound(str2Array) Then
strTemp = str1Array
str1Array = str2Array
str2Array = strTemp
End If
'count words and determine % that match
For i = 0 To UBound(str1Array)
For j = 0 To UBound(str2Array)
If StrComp(str1Array(i), str2Array(j),
vbTextCompare) = 0 Then
intMatch = intMatch + 1
Exit For
End If
Next j
Next i
intRating = (intMatch / (UBound(str1Array) + 1)) * (100 - 8
* (UBound(str1Array) - UBound(str2Array)))
If intRating 0 Then RateLabel = intRating Else RateLabel
= 0
End If
End If
MsgBox "str1 = " & str1 & ", str2 = " & str2 & ", rating = " &
RateLabel
End Function