View Single Post
  #11   Report Post  
Posted to microsoft.public.excel.misc
LAN MIND LAN MIND is offline
external usenet poster
 
Posts: 7
Default word frequency counting

Ok I have:

1Opened the VBEditor.

2Selected Insert/Module and pasted this**** (at the bottom of this
post) in the window that opened.

3Selected Tools/References and set a reference to Microsoft VBScript
Regular Expressions 5.5

4Then I went back to my excel sheet and manually highlighted a 2
column x 10 row "box" by clicking on the upper left cell and dragging
and highlighting said "box" (20 cells)

5In the upper left cell I entered:
=transpose(uniquecount(rng))

6Held down <ctrl<shift while you hit <enter

and nothing happens (as I said I am completely a newb on all of this).

So here are some questions:

After step 3 was I supposed to do something besides going straight
back to my excel sheet? Save the whole VBeditor and somehow import it?
Close the VBeditor?

Was I correct to " manually highlighted a 2 column x 10 row "box" "?

Thanks again Ron I appreciate the help. Oh BTW my excel sheet has
10,000 rows and 10 columns, very text heavy.

__________________________________________________ __

****
Option Explicit
Option Compare Text
Function UniqueCount(rg As Range) As Variant
'Requires reference to Microsoft VBScript Regular Expressions 5.5
'Returns a two dimensional array of unique words and count
Dim cWordList As Collection
Dim Str As String
Dim sRes() As Variant
Dim i As Long, j As Long
Dim c As Range
Dim re As RegExp
Dim mc As MatchCollection, m As Match

'Put all words into a single string
For Each c In rg
Str = Str & c.Value & " "
Next c

'get list of unique words
Set re = New RegExp
re.Global = True
re.Pattern = "\b[\w']+\b"
Set cWordList = New Collection
On Error Resume Next
'Add method with index=word will give error on duplicates
Set mc = re.Execute(Str)
For Each m In mc
cWordList.Add m.Value, m.Value
Next m
On Error GoTo 0
ReDim sRes(0 To 1, 1 To cWordList.Count)
For i = 1 To cWordList.Count
sRes(0, i) = cWordList(i)
Next i

'get word count for each word
re.Global = True
re.IgnoreCase = True
For i = 1 To UBound(sRes, 2)
re.Pattern = "\b" & sRes(0, i) & "\b"
Set mc = re.Execute(Str)
sRes(1, i) = mc.Count
Next i
Set re = Nothing

'you can comment out one or both of the sort lines
' depending on your requirements

'Sort words alphabetically A-Z
BubbleSort sRes, 0, True

'then sort by Count highest to lowest
BubbleSort sRes, 1, False

UniqueCount = sRes
End Function
'--------------------------------------------------------------
Private Sub BubbleSort(TempArray As Variant, d As Long, _
bSortDirection As Boolean)
'bSortDirection = True means sort ascending
'bSortDirection = False means sort descending
Dim Temp1 As Variant, Temp2
Dim i As Long
Dim NoExchanges As Boolean
Dim Exchange As Boolean

' Loop until no more "exchanges" are made.
Do
NoExchanges = True

' Loop through each element in the array.
For i = 1 To UBound(TempArray, 2) - 1

' If the element is greater/less than the element
' following it, exchange the two elements.

Exchange = TempArray(d, i) < TempArray(d, i + 1)
If bSortDirection = True Then Exchange = _
TempArray(d, i) TempArray(d, i + 1)
If Exchange Then
NoExchanges = False
Temp1 = TempArray(0, i)
Temp2 = TempArray(1, i)
TempArray(0, i) = TempArray(0, i + 1)
TempArray(1, i) = TempArray(1, i + 1)
TempArray(0, i + 1) = Temp1
TempArray(1, i + 1) = Temp2
End If
Next i
Loop While Not (NoExchanges)
End Sub