View Single Post
  #10   Report Post  
Posted to microsoft.public.excel.programming
Claus Busch Claus Busch is offline
external usenet poster
 
Posts: 3,872
Default Efficient storage

Hi Robert,

Am Thu, 7 May 2015 11:08:48 -0700 schrieb Robert Crandal:

Public objFSO As New Filesystemobject


here is a revised version:

Sub Test()
Dim strPath As String
Dim strFN As String
Dim objReadFile As Object, myDic As Object, objFSO As Object
Dim strText As String
Dim varChr As Variant, varTmp As Variant, varOut As Variant
Dim i As Long, LRow As Long, n As Long

Application.ScreenUpdating = False

'Modify path and file name
strPath = "C:\Users\Claus\Desktop\"
strFN = "Test.txt"
'Array with the expected punctuation marks
varChr = Array(",", ".", ":", "-", "_", "!", "?", "(", ")", "'",
Chr(10))

Set myDic = CreateObject("Scripting.Dictionary")
Set objFSO = CreateObject("Scripting.FileSystemObject")

'Opens the text file and read the text into a string
Set objReadFile = objFSO.opentextfile(strPath & strFN)

Do While objReadFile.atendofstream < True
strText = strText & objReadFile.readline & " "
Loop
objReadFile.Close

'Deletes all punctuation marks
For i = LBound(varChr) To UBound(varChr)
strText = Replace(strText, varChr(i), " ")
Next

'Handles special cases like I'll, it's and so on
'Customize it depending on your text file
strText = Replace(Replace(Replace(Replace(Replace(strText, " s ", " is
"), " ll ", _
" will "), " d ", " had "), " re ", " are "), " ve ", " have ")

'Deletes superfluous spaces
strText = Application.Trim(strText)
'Change all words to lower case
strText = LCase(strText)
'Writes the string into an array
varTmp = Split(strText, " ")

'Creates unique words
For i = LBound(varTmp) To UBound(varTmp)
myDic(varTmp(i)) = varTmp(i)
Next
varOut = myDic.Items

'Writes the unique words to column A with a header for filtering
Range("A1") = "Header"
Range("A2").Resize(UBound(varOut) + 1) = Application.Transpose(varOut)
LRow = Cells(Rows.Count, 1).End(xlUp).Row

'Distrubutes the words in alphabetic order to columns
n = 2
For i = 97 To 122
Range("A1:A" & LRow).AutoFilter field:=1, Criteria1:="=" & Chr(i) &
"*"
If Application.Subtotal(3, Range("A:A")) 1 Then
Range("A2:A" & LRow).Copy Cells(1, n)
n = n + 1
End If
Next
Columns("A").Delete
Columns("A:Z").AutoFit
Application.ScreenUpdating = True
End Sub


Regards
Claus B.
--
Vista Ultimate / Windows7
Office 2007 Ultimate / 2010 Professional