View Single Post
  #1   Report Post  
Posted to microsoft.public.excel.programming
ron ron is offline
external usenet poster
 
Posts: 118
Default Extracting web data

Peter...This will do most of the work. But separating the city from the address is problematical. If the city is more than one word, this macro won't handle it correctly. I'm out of time so see how this works for you...Ron

Sub Webpage()
' Retrieve the source code for the first page
J = 1
my_url = "http://content.usatoday.com/news/nation/environment/smokestack/search/FL/~/~/~/rank/~/" & J & "/%5B/url%5D"
Set my_obj = CreateObject("MSXML2.XMLHTTP")
my_obj.Open "GET", my_url, False
my_obj.send
my_var = my_obj.responsetext
Set my_obj = Nothing

' Determine the number of pages to examine
pos_1 = InStr(1, my_var, "search-intro", vbTextCompare)
pos_2 = InStr(pos_1, my_var, "", vbTextCompare)
pos_3 = InStr(1 + pos_2, my_var, "", vbTextCompare)
pos_4 = InStr(pos_3, my_var, "Schools", vbTextCompare)
no_pages = (Mid(my_var, 1 + pos_3, -1 + pos_4 - (1 + pos_3)))
no_pages = Replace(no_pages, ",", "", 1, -1, vbTextCompare)
no_pages = Val(no_pages)
If (no_pages Mod 10) = 0 Then
no_pages = no_pages / 10
Else
no_pages = 1 + Int(no_pages / 10)
End If

' Begin iteration
For J = 1 To no_pages
my_url = "http://content.usatoday.com/news/nation/environment/smokestack/search/FL/~/~/~/rank/~/" & J & "/%5B/url%5D"
Set my_obj = CreateObject("MSXML2.XMLHTTP")
my_obj.Open "GET", my_url, False
my_obj.send
my_var = my_obj.responsetext
Set my_obj = Nothing

' Extract data
yy = 1
Do Until yy = 0
pos_5 = InStr(yy, my_var, "smokestack/school", vbTextCompare)
pos_6 = InStr(pos_5, my_var, "", vbTextCompare)
pos_7 = InStr(pos_6, my_var, "<", vbTextCompare)
sc_name = Mid(my_var, 1 + pos_6, pos_7 - (1 + pos_6))

pos_10 = InStr(pos_7, my_var, "p", vbTextCompare)
pos_11 = InStr(pos_10, my_var, "</p", vbTextCompare)
addr_city_state = Mid(my_var, 2 + pos_10, pos_11 - (2 + pos_10))
' extract the two letter state designation
sc_state = Right(addr_city_state, 2)
' separate the address and the city
jj = InStrRev(addr_city_state, ",", -1, vbTextCompare)
kk = InStrRev(addr_city_state, " ", jj, vbTextCompare)
sc_address = Left(addr_city_state, kk - 1)
sc_city = Trim(Mid(addr_city_state, kk, jj - kk))

pos_15 = InStrRev(my_var, "air is worse", pos_5, vbTextCompare)
pos_16 = InStr(pos_15, my_var, "at", vbTextCompare)
pos_17 = InStr(pos_16, my_var, "school", vbTextCompare)
sc_rank = Mid(my_var, 3 + pos_16, -1 + pos_17 - (3 + pos_16))

yy = InStr(pos_7, my_var, "smokestack/school", vbTextCompare)

' Put the current school data into the workbook
ActiveCell = sc_name
ActiveCell.Offset(0, 1) = sc_address
ActiveCell.Offset(0, 2) = sc_city
ActiveCell.Offset(0, 3) = sc_state
ActiveCell.Offset(0, 4) = sc_rank

ActiveCell.Offset(1, 0).Select
Loop
Next

Range("A:E").Select
Selection.Columns.AutoFit
Range("A1").Select
End Sub