View Single Post
  #4   Report Post  
Posted to microsoft.public.excel.misc
Ron Rosenfeld Ron Rosenfeld is offline
external usenet poster
 
Posts: 5,651
Default How to count the frequency of each word in Excel

On Thu, 22 Jan 2009 22:29:20 -0500, Ron Rosenfeld
wrote:

On Thu, 22 Jan 2009 18:06:05 -0800, deedee
wrote:

Hi,

I would like to get the frequency of each word in a string in a single cell

Hope it is more clear like this


Examples along with information as to how you want the data output would make
it more clear.

If you want the *frequency* of each word in the string, the following function
will return a 2 dimensional horizontal array of the unique words in the string
along with a count of each of those words.

If you don't need a unique sorted list of words (sorted alpha or by freq), but
rather a list of every word, the routine can be sped up considerably.

The fastest way to output the results is to enter the single formula:

=WordFreq(cell_ref) as a two-dimensional array of appropriate size.

Or, if you want to output it as a vertical array,
=transpose(WordFreq(cell_ref))

Or you could use the Index function to return each value:

=INDEX(WordFreq($A$1),1,ROWS($1:1))

entered in B1 and filled down will return the words.

=INDEX(WordFreq($A$1),2,ROWS($1:1))

entered in C1 and filled down will return the associated count.

--------------
To enter this UDF, <alt-F11 opens the VB Editor. Ensure your project is
highlighted in the Project Explorer window, then Insert/Module and paste the
code below into the window that opens.

This is a recompilation of other functions that I use, so could probably be
optimized.


I had to make a small change to make the function case insensitive. The first
version did a case insensitive determination of "unique words" but the count
was case-sensitive. This version is case insensitive consistently:

===============================
Option Explicit
Option Compare Text
Function WordFreq(Str As String) As Variant
Dim i As Long
Dim sWords
Dim cWordList As Collection
Dim sRes() As Variant
sWords = Split(Str)

'get list of unique words
Set cWordList = New Collection

On Error Resume Next
For i = 0 To UBound(sWords)
cWordList.Add sWords(i), CStr(sWords(i))
Next i
On Error GoTo 0

ReDim sRes(0 To 1, 1 To cWordList.Count)
For i = 1 To cWordList.Count
sRes(0, i) = cWordList(i)
Next i

'get word count for each word
For i = 1 To UBound(sRes, 2)
sRes(1, i) = RegexCount(Str, "\b" & sRes(0, i) & "\b", False)
Next i

'Reverse sorting order if you want the words alphabetically
'without respect to the counts

'Sort words alphabetically A-Z
BubbleSortX sRes, 0, True

'then sort by Count highest to lowest
BubbleSortX sRes, 1, False
WordFreq = sRes
End Function
'--------------------------------------------------------------
Private Sub BubbleSortX(TempArray As Variant, d As Long, _
bSortDirection As Boolean)
'bSortDirection = True means sort ascending
'bSortDirection = False means sort descending
Dim Temp1 As Variant, Temp2
Dim i As Long
Dim NoExchanges As Boolean
Dim Exchange As Boolean

' Loop until no more "exchanges" are made.
Do
NoExchanges = True

' Loop through each element in the array.
For i = 1 To UBound(TempArray, 2) - 1

' If the element is greater/less than the element
' following it, exchange the two elements.

Exchange = TempArray(d, i) < TempArray(d, i + 1)
If bSortDirection = True Then Exchange = _
TempArray(d, i) TempArray(d, i + 1)
If Exchange Then
NoExchanges = False
Temp1 = TempArray(0, i)
Temp2 = TempArray(1, i)
TempArray(0, i) = TempArray(0, i + 1)
TempArray(1, i) = TempArray(1, i + 1)
TempArray(0, i + 1) = Temp1
TempArray(1, i + 1) = Temp2
End If
Next i
Loop While Not (NoExchanges)
End Sub
'-------------------------------------------------------------
Private Function RegexCount(Str As String, Pattern As String, _
Optional CaseSensitive As Boolean = True) As Long

Dim objRegExp As Object
Dim colMatches As Object

' Create a regular expression object.
Set objRegExp = CreateObject("vbscript.regexp")

'Set the pattern by using the Pattern property.
objRegExp.Pattern = Pattern

' Set Case Insensitivity.
objRegExp.IgnoreCase = Not CaseSensitive

'Set global applicability.
objRegExp.Global = True

'Test whether the String can be compared.
If (objRegExp.Test(Str) = True) Then

'Get the matches.
Set colMatches = objRegExp.Execute(Str) ' Execute search.
RegexCount = colMatches.Count
Else
RegexCount = 0
End If
End Function
=================================
--ron