View Single Post
  #11   Report Post  
Posted to microsoft.public.excel.misc
J741 J741 is offline
external usenet poster
 
Posts: 11
Default How to get a numbered list of unique words in a column?

Thanks Ron. That worked.

Now, how can I refine this to ignore words that are smaller than 3 letters
in length? Words like 'and', 'the', not', etc.

"Ron Rosenfeld" wrote:

On Mon, 22 Jun 2009 16:41:46 -0400, Ron Rosenfeld
wrote:

On Thu, 18 Jun 2009 08:36:02 -0700, J741
wrote:

Hi.

I have a spreadsheet with almost 5000 rows with about 15 columns of data, 2
columns of which contain sentences, phrases, or paragraphs of comments
entered by users.


Small change. The "count" function needs to be made case insensitive. So
change the code to:

=========================
Option Explicit
Sub UniqueWordList()
Dim rSrc As Range, rDest As Range, c As Range
Dim cWordList As Collection
Dim res() As Variant
Dim w() As String
Dim i As Long

Set cWordList = New Collection
Set rSrc = Range("A1:B22")
Set rDest = Range("M1")
rDest.EntireColumn.NumberFormat = "@"
For Each c In rSrc
w = Split(c.Value)
For i = 0 To UBound(w)
w(i) = StripWord(w(i))
If Not w(i) = "" Then
On Error Resume Next
cWordList.Add Item:=w(i), Key:=w(i)
On Error GoTo 0
End If
Next i
Next c

'transfer words to results array
ReDim res(1 To cWordList.Count, 0 To 1)
For i = 1 To cWordList.Count
res(i, 0) = cWordList(i)
Next i

'get counts
For i = LBound(res) To UBound(res)
For Each c In rSrc
res(i, 1) = res(i, 1) + CountWord(c.Value, res(i, 0))
Next c
Next i

'sort alpha: d=0; sort numeric d=1
'there are various ways of sorting
BubbleSort res, 1
For i = LBound(res) To UBound(res)
rDest.Offset(i, 0).Value = res(i, 0)
rDest.Offset(i, 1).Value = res(i, 1)
Next i

End Sub
Private Function StripWord(s As String) As String
Dim re As Object
Set re = CreateObject("vbscript.regexp")
re.Global = True
'allow only letters, digits, slashes and hyphens
re.Pattern = "[^-/A-Za-z0-9]"
StripWord = re.Replace(s, "")
Set re = Nothing
End Function
Private Function CountWord(ByVal s As String, sPat) As Long
Dim re As Object, mc As Object
Set re = CreateObject("vbscript.regexp")
re.Global = True
re.ignorecase = True
re.Pattern = "\b" & sPat & "\b"

Set mc = re.Execute(s)
CountWord = mc.Count
End Function
Private Sub BubbleSort(TempArray As Variant, d As Long) 'd is 0 based dimension
Dim temp(0, 1) As Variant
Dim i As Integer
Dim NoExchanges As Integer

' Loop until no more "exchanges" are made.
Do
NoExchanges = True

' Loop through each element in the array.
For i = LBound(TempArray) To UBound(TempArray) - 1

' If the element is less than the element
' following it, exchange the two elements.
' change "<" to "" to sort ascending
If TempArray(i, d) < TempArray(i + 1, d) Then
NoExchanges = False
temp(0, 0) = TempArray(i, 0)
temp(0, 1) = TempArray(i, 1)
TempArray(i, 0) = TempArray(i + 1, 0)
TempArray(i, 1) = TempArray(i + 1, 1)
TempArray(i + 1, 0) = temp(0, 0)
TempArray(i + 1, 1) = temp(0, 1)

End If
Next i
Loop While Not (NoExchanges)
End Sub
=================================
--ron