View Single Post
  #6   Report Post  
Posted to microsoft.public.excel.misc
Ron Rosenfeld Ron Rosenfeld is offline
external usenet poster
 
Posts: 5,651
Default word frequency counting

On Sat, 08 Mar 2008 22:17:50 -0500, Ron Rosenfeld
wrote:

Private Sub BubbleSort(TempArray As Variant, d As Long, _
bSortDirection As Boolean)
'bSortDirection = True means sort ascending
'bSortDirection = False means sort descending
Dim Temp1 As Variant, Temp2
Dim i As Long
Dim NoExchanges As Boolean
Dim Exchange As Boolean

' Loop until no more "exchanges" are made.
Do
NoExchanges = True

' Loop through each element in the array.
For i = 1 To UBound(TempArray, 2) - 1

' If the element is greater/less than the element
' following it, exchange the two elements.

Exchange = IIf(TempArray(d, i) < TempArray(d, i + 1), True, False)
If bSortDirection = True Then Exchange = Not Exchange
If Exchange Then
NoExchanges = False
Temp1 = TempArray(0, i)
Temp2 = TempArray(1, i)
TempArray(0, i) = TempArray(0, i + 1)
TempArray(1, i) = TempArray(1, i + 1)
TempArray(0, i + 1) = Temp1
TempArray(1, i + 1) = Temp2
End If
Next i
Loop While Not (NoExchanges)
End Sub


It's getting late. Small change in the sort part:

=====================
Option Explicit
Option Compare Text
Function UniqueCount(rg As Range) As Variant
'Requires reference to Microsoft VBScript Regular Expressions 5.5
'Returns a two dimensional array of unique words and count
Dim cWordList As Collection
Dim Str As String
Dim sRes() As Variant
Dim i As Long, j As Long
Dim c As Range
Dim re As RegExp
Dim mc As MatchCollection, m As Match

'Put all words into a single string
For Each c In rg
Str = Str & c.Value & " "
Next c

'get list of unique words
Set re = New RegExp
re.Global = True
re.Pattern = "\b[\w']+\b"
Set cWordList = New Collection
On Error Resume Next
'Add method with index=word will give error on duplicates
Set mc = re.Execute(Str)
For Each m In mc
cWordList.Add m.Value, m.Value
Next m
On Error GoTo 0
ReDim sRes(0 To 1, 1 To cWordList.Count)
For i = 1 To cWordList.Count
sRes(0, i) = cWordList(i)
Next i

'get word count for each word
re.Global = True
re.IgnoreCase = True
For i = 1 To UBound(sRes, 2)
re.Pattern = "\b" & sRes(0, i) & "\b"
Set mc = re.Execute(Str)
sRes(1, i) = mc.Count
Next i
Set re = Nothing

'you can comment out one or both of the sort lines
' depending on your requirements

'Sort words alphabetically A-Z
BubbleSort sRes, 0, True

'then sort by Count highest to lowest
BubbleSort sRes, 1, False

UniqueCount = sRes
End Function
'--------------------------------------------------------------
Private Sub BubbleSort(TempArray As Variant, d As Long, _
bSortDirection As Boolean)
'bSortDirection = True means sort ascending
'bSortDirection = False means sort descending
Dim Temp1 As Variant, Temp2
Dim i As Long
Dim NoExchanges As Boolean
Dim Exchange As Boolean

' Loop until no more "exchanges" are made.
Do
NoExchanges = True

' Loop through each element in the array.
For i = 1 To UBound(TempArray, 2) - 1

' If the element is greater/less than the element
' following it, exchange the two elements.

Exchange = TempArray(d, i) < TempArray(d, i + 1)
If bSortDirection = True Then Exchange = Not Exchange
If Exchange Then
NoExchanges = False
Temp1 = TempArray(0, i)
Temp2 = TempArray(1, i)
TempArray(0, i) = TempArray(0, i + 1)
TempArray(1, i) = TempArray(1, i + 1)
TempArray(0, i + 1) = Temp1
TempArray(1, i + 1) = Temp2
End If
Next i
Loop While Not (NoExchanges)
End Sub
=======================================
--ron