View Single Post
  #11   Report Post  
Posted to microsoft.public.excel.programming
Robert Crandal[_3_] Robert Crandal[_3_] is offline
external usenet poster
 
Posts: 161
Default Extract paragraphs from text file

"GS" wrote:

Sub CleanupProfanity2()
Dim vData, vWord, n&

'Group directly into paragraphs
vData = Split(ReadTextFile(sFilename), String(25, "*"))

'Filter paragraphs with profane words
For Each vWord In Split(sProfaneWords, ",")
For n = LBound(vData) To UBound(vData)
If (InStr(vData(n), vWord) 0) Then
vData(n) = "~": Exit For
End If
Next 'n
Next 'vWord

'Rebuild paragraphs back to lines
'containing a paragraph separator.
' vData = Split(Join(vData, vbCrLf & "<"), vbCrLf)

'containing no paragraph separator.
vData = Split(Join(vData, vbCrLf), vbCrLf) '//no separator

'Filter out any blank lines
For n = UBound(vData) To LBound(vData) Step -1
If Len(vData(n)) = 0 Then vData(n) = "~"
Next 'n
vData = Filter(vData, "~", False)

WriteTextFile Join(vData, vbCrLf), sFilename
End Sub


Hi Gary! Sorry about the long reply. I got busy with other
issues over the last month, so I was not able to test this code
until now.

In a nutshell, the above code did NOT work. Here is the data
I used:

[BEGIN INPUT]

playboys regrows correality requisition droits offered
angeles surfy wile lacrimation aged seignories practicing
hereinto workmanship fuggy municipally asdf underpinnings
brocket unpremeditated pinochle crazier coaeval obviously
able supinated hostler burrows artichoke vivant crosstown

********************

baneful celebrations angle growler landscape beside tzetzes
normal bootery bespoke henhouses tribuneship bouncer
displeasure crewman tenth curarization honestness sensitize
reminisces cometh fuk obscurantists eventualities mechanics
vanity crap nonalignment dowering nephew nonconfidence

********************

chaotically sooners rocketing luckiest holeproof damnableness
soc infertilely supernumerary expertise sulphid frisson
surceases joyously kins drooled agrarianism paraphrases ribby
wittiness grabbiest junketer accumulable hemokonia matriculants
sieged yuio forgoes *staking* nonadjacent offprint mug pawpaw

[END INPUT]

And, here is the code that I used:

Option Explicit

Const sProfaneWords$ = "fuk,****"
Const sFilename$ = "C:\Documents and Settings\user\Desktop\Excel
Projects\book.txt"
Sub CleanupProfanity2()
Dim vData, vWord, n&

'Group directly into paragraphs
vData = Split(ReadTextFile(sFilename), String(25, "*"))

'Filter paragraphs with profane words
For Each vWord In Split(sProfaneWords, ",")
For n = LBound(vData) To UBound(vData)
If (InStr(vData(n), vWord) 0) Then
vData(n) = "~": Exit For
End If
Next 'n
Next 'vWord

'containing no paragraph separator.
vData = Split(Join(vData, vbCrLf), vbCrLf) '//no separator

'Filter out any blank lines
For n = UBound(vData) To LBound(vData) Step -1
If Len(vData(n)) = 0 Then vData(n) = "~"
Next 'n
vData = Filter(vData, "~", False)

WriteTextFile Join(vData, vbCrLf), "C:\Documents and
Settings\user\Desktop\Excel Projects\out.txt"
End Sub
Function ReadTextFile$(Filename$)
' Reads large amounts of data from a text file in one single step.
Dim iNum%
On Error GoTo ErrHandler
iNum = FreeFile(): Open Filename For Input As #iNum
ReadTextFile = Space$(LOF(iNum))
ReadTextFile = Input(LOF(iNum), iNum)

ErrHandler:
Close #iNum: If Err Then Err.Raise Err.Number, , Err.Description
End Function 'ReadTextFile()
Sub WriteTextFile(TextOut$, Filename$, _
Optional AppendMode As Boolean = False)
' Reusable procedure that Writes/Overwrites or Appends
' large amounts of data to a Text file in one single step.
' **Does not create a blank line at the end of the file**
Dim iNum%
On Error GoTo ErrHandler
iNum = FreeFile()
If AppendMode Then
Open Filename For Append As #iNum: Print #iNum, vbCrLf & TextOut;
Else
Open Filename For Output As #iNum: Print #iNum, TextOut;
End If

ErrHandler:
Close #iNum: If Err Then Err.Raise Err.Number, , Err.Description
End Sub 'WriteTextFile()



I am going to rephrase my problem in a separate post.