ExcelBanter

ExcelBanter (https://www.excelbanter.com/)
-   Excel Programming (https://www.excelbanter.com/excel-programming/)
-   -   Extract Text From Multiple Word Files (https://www.excelbanter.com/excel-programming/314557-extract-text-multiple-word-files.html)

andibevan

Extract Text From Multiple Word Files
 

Hi All,

I have 82 MS word files in the same directory which all contain a tabl
and I need to extract a piece of information from each file.

The information I require from each word document is next to the ro
headings "Primary Effect" and "Secondary Effect" i.e.:-

Column 1 Coumn 2
Primary Effect INFORMATION to EXTRACT 1
Secondary Effect INFORMATION to EXTRACT 2

I think I essentially need something that will cycle through each fil
in the directory, open it, find the information in the cell next t
"Primary Effect" and "Secondary Effect" and copy it into th
spreadsheet against the file name.

Any help with this would be greatfully received.

Thanks

Andy
:confused

--
andibeva
-----------------------------------------------------------------------
andibevan's Profile: http://www.excelforum.com/member.php...nfo&userid=988
View this thread: http://www.excelforum.com/showthread.php?threadid=27193


KL[_5_]

Extract Text From Multiple Word Files
 
Hi there,

The below code does seem to work, but I couldn't figure out how to make Word
documents invisible, which, I guess, should spead up the macro
significantly.

Regards, KL

--------------Code Start--------------
Private Function RemoveNoise(InValue) As String
With Application.WorksheetFunction
OutValue = UCase(InValue)
OutValue = .Substitute(OutValue, Chr(7), "")
OutValue = .Substitute(OutValue, Chr(9), "")
OutValue = .Substitute(OutValue, Chr(10), "")
OutValue = .Substitute(OutValue, Chr(13), "")
OutValue = .Substitute(OutValue, Chr(31), "")
OutValue = .Substitute(OutValue, Chr(160), "")
OutValue = .Substitute(OutValue, Chr(172), "")
OutValue = .Substitute(OutValue, Chr(182), "")
OutValue = .Substitute(OutValue, Chr(183), "")
OutValue = .Substitute(OutValue, "€", "")
OutValue = .Substitute(OutValue, " ", "")
End With
RemoveNoise = Trim(OutValue)
End Function

Sub ImportWordData()
Dim oAppWD As Object
Dim strPath As String
Dim FileName As String
strPath = ActiveWorkbook.Path

Set fs = Application.FileSearch
With fs
.LookIn = strPath
.SearchSubFolders = False
.FileName = ".doc"
If .Execute() 0 Then
Application.ScreenUpdating = False
Set oAppWD = CreateObject("Word.Application")
For i = 1 To .FoundFiles.Count
oAppWD.Documents.Open FileName:=.FoundFiles(i)
'oAppWD.Visible = False
FileName = Dir(.FoundFiles(i))
With oAppWD.ActiveDocument.Tables(1)
ActiveSheet.Cells(i, 1) = FileName
ActiveSheet.Cells(i, 2) = _
RemoveNoise(.Rows(1).Cells(2).Range.Text)
ActiveSheet.Cells(i, 3) = _
RemoveNoise(.Rows(2).Cells(2).Range.Text)
End With
oAppWD.Documents.Close
Next i
oAppWD.Application.Quit
Set oAppWD = Nothing
Application.ScreenUpdating = True
End If
End With
End Sub

--------------Code End--------------

"andibevan" wrote in message
...

Hi All,

I have 82 MS word files in the same directory which all contain a table
and I need to extract a piece of information from each file.

The information I require from each word document is next to the row
headings "Primary Effect" and "Secondary Effect" i.e.:-

Column 1 Coumn 2
Primary Effect INFORMATION to EXTRACT 1
Secondary Effect INFORMATION to EXTRACT 2

I think I essentially need something that will cycle through each file
in the directory, open it, find the information in the cell next to
"Primary Effect" and "Secondary Effect" and copy it into the
spreadsheet against the file name.

Any help with this would be greatfully received.

Thanks

Andy
:confused:


--
andibevan
------------------------------------------------------------------------
andibevan's Profile:
http://www.excelforum.com/member.php...fo&userid=9882
View this thread: http://www.excelforum.com/showthread...hreadid=271933




KL[_5_]

Extract Text From Multiple Word Files
 
A slightly corected code:

--------------Code Start--------------
Private Function RemoveNoise(InValue) As String
With Application.WorksheetFunction
OutValue = UCase(InValue)
OutValue = .Substitute(OutValue, Chr(7), "")
OutValue = .Substitute(OutValue, Chr(9), "")
OutValue = .Substitute(OutValue, Chr(10), "")
OutValue = .Substitute(OutValue, Chr(13), "")
OutValue = .Substitute(OutValue, Chr(31), "")
OutValue = .Substitute(OutValue, Chr(160), "")
OutValue = .Substitute(OutValue, Chr(172), "")
OutValue = .Substitute(OutValue, Chr(182), "")
OutValue = .Substitute(OutValue, Chr(183), "")
End With
RemoveNoise = Trim(OutValue)
End Function

Sub ImportWordData()
Dim oAppWD As Object
Dim strPath As String
Dim FileName As String
strPath = ActiveWorkbook.Path

Set fs = Application.FileSearch
With fs
.LookIn = strPath
.SearchSubFolders = False
.FileName = ".doc"
If .Execute() 0 Then
Application.ScreenUpdating = False
Set oAppWD = CreateObject("Word.Application")
For i = 1 To .FoundFiles.Count
oAppWD.Documents.Open FileName:=.FoundFiles(i)
'oAppWD.Visible = False
FileName = Dir(.FoundFiles(i))
With oAppWD.ActiveDocument.Tables(1)
ActiveSheet.Cells(i, 1) = FileName
ActiveSheet.Cells(i, 2) = _
RemoveNoise(.Rows(1).Cells(2).Range.Text)
ActiveSheet.Cells(i, 3) = _
RemoveNoise(.Rows(2).Cells(2).Range.Text)
End With
oAppWD.Documents.Close
Next i
oAppWD.Application.Quit
Set oAppWD = Nothing
Application.ScreenUpdating = True
End If
End With
End Sub
--------------Code End--------------


"KL" wrote in message
...
Hi there,

The below code does seem to work, but I couldn't figure out how to make
Word documents invisible, which, I guess, should spead up the macro
significantly.

Regards, KL

--------------Code Start--------------
Private Function RemoveNoise(InValue) As String
With Application.WorksheetFunction
OutValue = UCase(InValue)
OutValue = .Substitute(OutValue, Chr(7), "")
OutValue = .Substitute(OutValue, Chr(9), "")
OutValue = .Substitute(OutValue, Chr(10), "")
OutValue = .Substitute(OutValue, Chr(13), "")
OutValue = .Substitute(OutValue, Chr(31), "")
OutValue = .Substitute(OutValue, Chr(160), "")
OutValue = .Substitute(OutValue, Chr(172), "")
OutValue = .Substitute(OutValue, Chr(182), "")
OutValue = .Substitute(OutValue, Chr(183), "")
OutValue = .Substitute(OutValue, "€", "")
OutValue = .Substitute(OutValue, " ", "")
End With
RemoveNoise = Trim(OutValue)
End Function

Sub ImportWordData()
Dim oAppWD As Object
Dim strPath As String
Dim FileName As String
strPath = ActiveWorkbook.Path

Set fs = Application.FileSearch
With fs
.LookIn = strPath
.SearchSubFolders = False
.FileName = ".doc"
If .Execute() 0 Then
Application.ScreenUpdating = False
Set oAppWD = CreateObject("Word.Application")
For i = 1 To .FoundFiles.Count
oAppWD.Documents.Open FileName:=.FoundFiles(i)
'oAppWD.Visible = False
FileName = Dir(.FoundFiles(i))
With oAppWD.ActiveDocument.Tables(1)
ActiveSheet.Cells(i, 1) = FileName
ActiveSheet.Cells(i, 2) = _
RemoveNoise(.Rows(1).Cells(2).Range.Text)
ActiveSheet.Cells(i, 3) = _
RemoveNoise(.Rows(2).Cells(2).Range.Text)
End With
oAppWD.Documents.Close
Next i
oAppWD.Application.Quit
Set oAppWD = Nothing
Application.ScreenUpdating = True
End If
End With
End Sub

--------------Code End--------------

"andibevan" wrote in message
...

Hi All,

I have 82 MS word files in the same directory which all contain a table
and I need to extract a piece of information from each file.

The information I require from each word document is next to the row
headings "Primary Effect" and "Secondary Effect" i.e.:-

Column 1 Coumn 2
Primary Effect INFORMATION to EXTRACT 1
Secondary Effect INFORMATION to EXTRACT 2

I think I essentially need something that will cycle through each file
in the directory, open it, find the information in the cell next to
"Primary Effect" and "Secondary Effect" and copy it into the
spreadsheet against the file name.

Any help with this would be greatfully received.

Thanks

Andy
:confused:


--
andibevan
------------------------------------------------------------------------
andibevan's Profile:
http://www.excelforum.com/member.php...fo&userid=9882
View this thread:
http://www.excelforum.com/showthread...hreadid=271933






KL[_5_]

Extract Text From Multiple Word Files
 
This macro in conjunction with the function I posted earlier does figure out
the visibility issue for Word application, but the processing speed seems to
be the same. For this macro to work you need to create a reference to
Microsoft Word Objects Library (in VBA Editor go TOOLSREFERENCES... and
check Microsoft Word 9.0 [or whatever version is applicable] Objects
Library). Also, I forgot to mention that the macro serches for files in the
same folder where the excel file is located.

--------------Code Start--------------
Sub ImportWordData()
Dim oAppWD As Object
Dim wdDoc As Word.Document
Dim strPath As String
Dim FileName As String
strPath = ActiveWorkbook.Path

Set fs = Application.FileSearch
With fs
.LookIn = strPath
.SearchSubFolders = False
.FileName = ".doc"
If .Execute() 0 Then
Application.ScreenUpdating = False
Set oAppWD = New Word.Application
oAppWD.Visible = False
For i = 1 To .FoundFiles.Count
Set wdDoc = oAppWD.Documents.Open(FileName:=.FoundFiles(i))
FileName = Dir(.FoundFiles(i))
With wdDoc.Tables(1)
ActiveSheet.Cells(i, 1) = FileName
ActiveSheet.Cells(i, 2) = _
RemoveNoise(.Rows(1).Cells(2).Range.Text)
ActiveSheet.Cells(i, 3) = _
RemoveNoise(.Rows(2).Cells(2).Range.Text)
End With
wdDoc.Close
Next i
oAppWD.Application.Quit
Set oAppWD = Nothing
Application.ScreenUpdating = True
End If
End With
End Sub
--------------Code End--------------

"KL" wrote in message
...
A slightly corected code:

--------------Code Start--------------
Private Function RemoveNoise(InValue) As String
With Application.WorksheetFunction
OutValue = UCase(InValue)
OutValue = .Substitute(OutValue, Chr(7), "")
OutValue = .Substitute(OutValue, Chr(9), "")
OutValue = .Substitute(OutValue, Chr(10), "")
OutValue = .Substitute(OutValue, Chr(13), "")
OutValue = .Substitute(OutValue, Chr(31), "")
OutValue = .Substitute(OutValue, Chr(160), "")
OutValue = .Substitute(OutValue, Chr(172), "")
OutValue = .Substitute(OutValue, Chr(182), "")
OutValue = .Substitute(OutValue, Chr(183), "")
End With
RemoveNoise = Trim(OutValue)
End Function

Sub ImportWordData()
Dim oAppWD As Object
Dim strPath As String
Dim FileName As String
strPath = ActiveWorkbook.Path

Set fs = Application.FileSearch
With fs
.LookIn = strPath
.SearchSubFolders = False
.FileName = ".doc"
If .Execute() 0 Then
Application.ScreenUpdating = False
Set oAppWD = CreateObject("Word.Application")
For i = 1 To .FoundFiles.Count
oAppWD.Documents.Open FileName:=.FoundFiles(i)
'oAppWD.Visible = False
FileName = Dir(.FoundFiles(i))
With oAppWD.ActiveDocument.Tables(1)
ActiveSheet.Cells(i, 1) = FileName
ActiveSheet.Cells(i, 2) = _
RemoveNoise(.Rows(1).Cells(2).Range.Text)
ActiveSheet.Cells(i, 3) = _
RemoveNoise(.Rows(2).Cells(2).Range.Text)
End With
oAppWD.Documents.Close
Next i
oAppWD.Application.Quit
Set oAppWD = Nothing
Application.ScreenUpdating = True
End If
End With
End Sub
--------------Code End--------------


"KL" wrote in message
...
Hi there,

The below code does seem to work, but I couldn't figure out how to make
Word documents invisible, which, I guess, should spead up the macro
significantly.

Regards, KL

--------------Code Start--------------
Private Function RemoveNoise(InValue) As String
With Application.WorksheetFunction
OutValue = UCase(InValue)
OutValue = .Substitute(OutValue, Chr(7), "")
OutValue = .Substitute(OutValue, Chr(9), "")
OutValue = .Substitute(OutValue, Chr(10), "")
OutValue = .Substitute(OutValue, Chr(13), "")
OutValue = .Substitute(OutValue, Chr(31), "")
OutValue = .Substitute(OutValue, Chr(160), "")
OutValue = .Substitute(OutValue, Chr(172), "")
OutValue = .Substitute(OutValue, Chr(182), "")
OutValue = .Substitute(OutValue, Chr(183), "")
OutValue = .Substitute(OutValue, "€", "")
OutValue = .Substitute(OutValue, " ", "")
End With
RemoveNoise = Trim(OutValue)
End Function

Sub ImportWordData()
Dim oAppWD As Object
Dim strPath As String
Dim FileName As String
strPath = ActiveWorkbook.Path

Set fs = Application.FileSearch
With fs
.LookIn = strPath
.SearchSubFolders = False
.FileName = ".doc"
If .Execute() 0 Then
Application.ScreenUpdating = False
Set oAppWD = CreateObject("Word.Application")
For i = 1 To .FoundFiles.Count
oAppWD.Documents.Open FileName:=.FoundFiles(i)
'oAppWD.Visible = False
FileName = Dir(.FoundFiles(i))
With oAppWD.ActiveDocument.Tables(1)
ActiveSheet.Cells(i, 1) = FileName
ActiveSheet.Cells(i, 2) = _
RemoveNoise(.Rows(1).Cells(2).Range.Text)
ActiveSheet.Cells(i, 3) = _
RemoveNoise(.Rows(2).Cells(2).Range.Text)
End With
oAppWD.Documents.Close
Next i
oAppWD.Application.Quit
Set oAppWD = Nothing
Application.ScreenUpdating = True
End If
End With
End Sub

--------------Code End--------------

"andibevan" wrote in message
...

Hi All,

I have 82 MS word files in the same directory which all contain a table
and I need to extract a piece of information from each file.

The information I require from each word document is next to the row
headings "Primary Effect" and "Secondary Effect" i.e.:-

Column 1 Coumn 2
Primary Effect INFORMATION to EXTRACT 1
Secondary Effect INFORMATION to EXTRACT 2

I think I essentially need something that will cycle through each file
in the directory, open it, find the information in the cell next to
"Primary Effect" and "Secondary Effect" and copy it into the
spreadsheet against the file name.

Any help with this would be greatfully received.

Thanks

Andy
:confused:


--
andibevan
------------------------------------------------------------------------
andibevan's Profile:
http://www.excelforum.com/member.php...fo&userid=9882
View this thread:
http://www.excelforum.com/showthread...hreadid=271933









All times are GMT +1. The time now is 07:35 PM.

Powered by vBulletin® Copyright ©2000 - 2025, Jelsoft Enterprises Ltd.
ExcelBanter.com