View Single Post
  #1   Report Post  
Posted to microsoft.public.excel.programming
ron ron is offline
external usenet poster
 
Posts: 118
Default Extracting web data

Hi Peter...The following code should get you going...Ron

Sub Webpage()
' Retrieve the source code for the first page
J = 1
my_url = "http://content.usatoday.com/news/nation/environment/smokestack/search/FL/~/~/~/rank/~/" & J & "/%5B/url%5D"
Set my_obj = CreateObject("MSXML2.XMLHTTP")
my_obj.Open "GET", my_url, False
my_obj.send
my_var = my_obj.responsetext
Set my_obj = Nothing

' Determine the number of pages to examine
pos_1 = InStr(1, my_var, "search-intro", vbTextCompare)
pos_2 = InStr(pos_1, my_var, "", vbTextCompare)
pos_3 = InStr(1 + pos_2, my_var, "", vbTextCompare)
pos_4 = InStr(pos_3, my_var, "Schools", vbTextCompare)
no_pages = (Mid(my_var, 1 + pos_3, -1 + pos_4 - (1 + pos_3)))
no_pages = Replace(no_pages, ",", "", 1, -1, vbTextCompare)
no_pages = Val(no_pages)
If (no_pages Mod 10) = 0 Then
no_pages = no_pages / 10
Else
no_pages = 1 + Int(no_pages / 10)
End If

' Begin iteration
For J = 1 To no_pages
my_url = "http://content.usatoday.com/news/nation/environment/smokestack/search/FL/~/~/~/rank/~/" & J & "/%5B/url%5D"
Set my_obj = CreateObject("MSXML2.XMLHTTP")
my_obj.Open "GET", my_url, False
my_obj.send
my_var = my_obj.responsetext
Set my_obj = Nothing

' Extract data
yy = 1
Do Until yy = 0
pos_5 = InStr(yy, my_var, "smokestack/school", vbTextCompare)
pos_6 = InStr(pos_5, my_var, "", vbTextCompare)
pos_7 = InStr(pos_6, my_var, "<", vbTextCompare)
sc_name = Mid(my_var, 1 + pos_6, pos_7 - (1 + pos_6))
yy = InStr(pos_7, my_var, "smokestack/school", vbTextCompare)

' Use instr, instrrev and mid to extract other info you're interested in
' '
' '
' '

' Put the current school data into the workbook
ActiveCell = sc_name
ActiveCell.Offset(1, 0).Select
Loop
Next
End Sub