import requests
# Import the webdriver from selenium
from selenium import webdriver
from selenium.webdriver.common.by import By
Web Scraping of Wave Height Data from Buoys
Wave Data Source: WIS Data Portal
The wave data in this project comes from the buoys’ on Lake Erie. The data gathering process uses web scraping because there are too many bouys to be downloaded manully.
The time period will be 2022 yearly data, and the area of interest is the part of Lake Erie in New York State’s boundary (highlighted in yellow).
from time import sleep
Step 1: Set up driver
= webdriver.Chrome() driver
This website takes a long time to load, so here we will stop for 10 seconds before continue the following steps
= "https://wisportal.erdc.dren.mil/#"
url
driver.get(url)10) sleep(
Step 2: Add wave height data of each buoy to an export group online
Because the buoys buttons on the website are markers generated by leaflet, the ID selectors associated with each marker are in a random numbering. Firstly, we need to have a list of all the buoy ID in the prefered sequence, which is from the south to the north.
Because of the randomness associated with the leaflet marker ID, some of them may not be the same ID for each session (although most of them will stay the same). The ID may be slightly varied from the list below. Make sure to recheck them before reusing them.
= ['#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(3014)',
marker_list '#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2281)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2241)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2278)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2275)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2547)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2886)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2545)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2272)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2881)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(3009)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2269)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2266)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(3012)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(3010)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2263)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2876)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2872)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2543)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(3006)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2867)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(3007)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2541)',
'#map-view > div.leaflet-pane.leaflet-map-pane > div.leaflet-pane.leaflet-marker-pane > img:nth-child(2384)']
Since there are a lot of buoys, a function can help us handle the scraping process of each individual
def add_each_marker_to_export_group(marker_selector):
"""handle the scraping process of each marker"""
# click marker
= driver.find_element(By.CSS_SELECTOR, marker_selector)
buoy_input
buoy_input.click()1)
sleep(
# click general export button
= "#generic_export"
general_export_selector = driver.find_element(By.CSS_SELECTOR, general_export_selector)
general_export_input
general_export_input.click()1)
sleep(
# check wave height checkbox
= "#check-waveHs"
waveheight_selector = driver.find_element(By.CSS_SELECTOR, waveheight_selector)
waveheight_check
waveheight_check.click()1)
sleep(
# add to export group
= "#ep-export-button"
add_to_export_group_selector = driver.find_element(By.CSS_SELECTOR, add_to_export_group_selector)
add_to_export_group_button
add_to_export_group_button.click()1) sleep(
Apply the above function to each marker in the list
for i in range(len(marker_list)):
= marker_list[i]
marker_selector
add_each_marker_to_export_group(marker_selector)1)
sleep(
Step 3: Download the export group
After adding all the buoy data we need, we will download the export group summary, which will be a zip file
# Go to the export group summary page
= "#export-summary"
export_group_selector = driver.find_element(By.CSS_SELECTOR, export_group_selector)
export_group_button
export_group_button.click()
# Download the data we need
= "#ep-download-all-button"
download_selector = driver.find_element(By.CSS_SELECTOR, download_selector)
download_button
download_button.click()10) sleep(
Step 4: Close Driver
driver.close()