import altair as alt
import numpy as np
import pandas as pd
import holoviews as hv
import hvplot.pandas
# Load bokeh
hv.extension("bokeh")
import geopandas as gpd
%matplotlib inlineWave Data Visualization
Wave Data Source: WIS Data Portal
The wave data in this project comes from the web scraping page.
The time period will be 2022 yearly data, and the area of interest is the part of Lake Erie in New York State’s boundary.
import panel as pnStep 1: Read all the downloaded data from csv
The zip file from the web scraping process includes multiple csv (one for each buoy)
ST92023_raw = pd.read_csv("./data/web-scarp/1-ST92023-generic_export-20231218T20_22.csv")
ST92022_raw = pd.read_csv("./data/web-scarp/2-ST92022-generic_export-20231218T20_22.csv")
ST92021_raw = pd.read_csv("./data/web-scarp/3-ST92021-generic_export-20231218T20_22.csv")
ST92020_raw = pd.read_csv("./data/web-scarp/4-ST92020-generic_export-20231218T20_22.csv")
ST92019_raw = pd.read_csv("./data/web-scarp/5-ST92019-generic_export-20231218T20_22.csv")
ST92018_raw = pd.read_csv("./data/web-scarp/6-ST92018-generic_export-20231218T20_22.csv")
ST92017_raw = pd.read_csv("./data/web-scarp/7-ST92017-generic_export-20231218T20_22.csv")
ST92016_raw = pd.read_csv("./data/web-scarp/8-ST92016-generic_export-20231218T20_22.csv")
ST92015_raw = pd.read_csv("./data/web-scarp/9-ST92015-generic_export-20231218T20_22.csv")
ST92014_raw = pd.read_csv("./data/web-scarp/10-ST92014-generic_export-20231218T20_22.csv")
ST92013_raw = pd.read_csv("./data/web-scarp/11-ST92013-generic_export-20231218T20_22.csv")
ST92012_raw = pd.read_csv("./data/web-scarp/12-ST92012-generic_export-20231218T20_22.csv")
ST92011_raw = pd.read_csv("./data/web-scarp/13-ST92011-generic_export-20231218T20_22.csv")
ST92010_raw = pd.read_csv("./data/web-scarp/14-ST92010-generic_export-20231218T20_22.csv")
ST92009_raw = pd.read_csv("./data/web-scarp/15-ST92009-generic_export-20231218T20_22.csv")
ST92008_raw = pd.read_csv("./data/web-scarp/16-ST92008-generic_export-20231218T20_22.csv")
ST92007_raw = pd.read_csv("./data/web-scarp/17-ST92007-generic_export-20231218T20_22.csv")
ST92006_raw = pd.read_csv("./data/web-scarp/18-ST92006-generic_export-20231218T20_22.csv")
ST92005_raw = pd.read_csv("./data/web-scarp/19-ST92005-generic_export-20231218T20_22.csv")
ST92004_raw = pd.read_csv("./data/web-scarp/20-ST92004-generic_export-20231218T20_22.csv")
ST92003_raw = pd.read_csv("./data/web-scarp/21-ST92003-generic_export-20231218T20_22.csv")
ST92002_raw = pd.read_csv("./data/web-scarp/22-ST92002-generic_export-20231218T20_22.csv")
ST92001_raw = pd.read_csv("./data/web-scarp/23-ST92001-generic_export-20231218T20_22.csv")
ST92243_raw = pd.read_csv("./data/web-scarp/24-ST92243-generic_export-20231218T20_22.csv")# A view of the raw data:
ST92023_raw.head()| time | latitude | longitude | waveHs | |
|---|---|---|---|---|
| 0 | 2022-01-01 00:00:00 | 42.32 | -79.88 | 0.132812 |
| 1 | 2022-01-01 01:00:00 | 42.32 | -79.88 | 0.140625 |
| 2 | 2022-01-01 02:00:00 | 42.32 | -79.88 | 0.140625 |
| 3 | 2022-01-01 03:00:00 | 42.32 | -79.88 | 0.140625 |
| 4 | 2022-01-01 04:00:00 | 42.32 | -79.88 | 0.132812 |
# Date type for each column
ST92023_raw.dtypestime object
latitude float64
longitude float64
waveHs float64
dtype: object
Step 2: Clean the raw data
Since there are 24 files, a list will be helpful for later manipulations
buoy_list_raw = [
ST92023_raw, ST92022_raw, ST92021_raw, ST92020_raw, ST92019_raw,
ST92018_raw, ST92017_raw, ST92016_raw, ST92015_raw, ST92014_raw,
ST92013_raw, ST92012_raw, ST92011_raw, ST92010_raw, ST92009_raw,
ST92008_raw, ST92007_raw, ST92006_raw, ST92005_raw, ST92004_raw,
ST92003_raw, ST92002_raw, ST92001_raw, ST92243_raw
]Firstly, clean the raw data. This process includes handling datetime object and drop null value.
buoy_list = []
for buoy in buoy_list_raw:
#Convert date type
buoy["datetime"] = pd.to_datetime(buoy["time"])
#Drop Null value
buoy_clean = buoy.drop(buoy[buoy['waveHs'] < 0].index)
# Save it
buoy_list.append(buoy_clean)len(buoy_list)24
And then, change each buoy’s data to a prefered structure, which includes a column for month and a column for day of year. Each buoy will also have its station name in the column
buoy_name = [
'ST92023', 'ST92022', 'ST92021', 'ST92020', 'ST92019',
'ST92018', 'ST92017', 'ST92016', 'ST92015', 'ST92014',
'ST92013', 'ST92012', 'ST92011', 'ST92010', 'ST92009',
'ST92008', 'ST92007', 'ST92006', 'ST92005', 'ST92004',
'ST92003', 'ST92002', 'ST92001', 'ST92243'
]for i in range(len(buoy_list)):
buoy = buoy_list[i]
#Arrange by month prepare
buoy["month_int"] = buoy["datetime"].dt.month
buoy["month"] = buoy["datetime"].dt.strftime("%b")
#Arrange by date prepare
buoy["day_int"] = buoy["datetime"].dt.dayofyear
# Add station name
buoy["station"] = buoy_name[i]
Comebine seperate dataframes into a single dataframe
buoy_clean_combine = pd.concat(buoy_list)len(buoy_clean_combine)187680
A view of the final dataframe
buoy_clean_combine.head()| time | latitude | longitude | waveHs | datetime | month_int | month | day_int | station | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022-01-01 00:00:00 | 42.32 | -79.88 | 0.132812 | 2022-01-01 00:00:00 | 1 | Jan | 1 | ST92023 |
| 1 | 2022-01-01 01:00:00 | 42.32 | -79.88 | 0.140625 | 2022-01-01 01:00:00 | 1 | Jan | 1 | ST92023 |
| 2 | 2022-01-01 02:00:00 | 42.32 | -79.88 | 0.140625 | 2022-01-01 02:00:00 | 1 | Jan | 1 | ST92023 |
| 3 | 2022-01-01 03:00:00 | 42.32 | -79.88 | 0.140625 | 2022-01-01 03:00:00 | 1 | Jan | 1 | ST92023 |
| 4 | 2022-01-01 04:00:00 | 42.32 | -79.88 | 0.132812 | 2022-01-01 04:00:00 | 1 | Jan | 1 | ST92023 |
Save the dataframe for future usage
buoy_clean_combine.to_csv("./data/wave2022.csv", index=False)Step 3: Line Chart Visualization
The first line chart plot shows the wave height of different buoys in 2022. By dragging the widget from left to right, you can have a look at the buoys located from south to north
clean_combine_chart = buoy_clean_combine.hvplot(
x="datetime",
y="waveHs",
groupby="station",
width=900,
kind="line",
widgets={'station': pn.widgets.DiscreteSlider},
widget_location='bottom',
color='#E07069',
line_width=0.8,
title='Lake Erie NYS Shoreline 2022 Wave Height',
xlabel="Date",
ylabel="Wave Height / m"
)
clean_combine_chartObservations:
The wave height recorded by each buoy is usually turbulent between different days. However, the overall trend is that buoys encounter higher waves in winter.
Note:
After searching for hours I would conclude that Quarto does not support the hvplot widget in the notebook. If want to see the live version of the above widget, please refer to final project’s repository and open it in jupyter notebook instead. There are discussions about this issue. The conclusion is that “The kind of Jupyter interactivity that we can’t easily support directly in Quarto is that which involves a live Jupyter process for every open web browser. In order for ipywidgets.interact to work, a jupyter kernel needs to be live and running, and that is a fundamentally more involved mode of deployment.” Alternatively, there are ways to possibly solve this issue by using shinylive package which involves NodeJS and suported by Quarto website. However, NodeJS is out of the scope of this project and will not be included here.
The second line plot compares the southest buoy to the northest buoy in order to show the trend across the shoreline. ST92023 is the southest buoy along the NYS shoreline of Lake Erie, and ST92243 is the northest buoy along the NYS shoreline of Lake Erie, near Buffalo
two_end = ['ST92243', 'ST92023']
ends = buoy_clean_combine.loc[buoy_clean_combine['station'].isin(two_end)]end_chart = ends.hvplot(
x="datetime",
y="waveHs",
by="station",
kind="line",
color=['#E07069', '#6989E0'],
line_width=0.8,
width=900,
title="Compare North and South End's Wave Height",
xlabel="Date",
ylabel="Wave Height / m"
)
end_chartObservation:
The south buoy has higher waves than the north buoy. This is probably because the prevailing wind of Lake Erie is southwest. Besides, the north buoy has a long period without data (from February to April), which is probably because the lake is frozen during that time.
Step 4: Heat map visua
This step will calculate the average wave height for each month or each day in 2022 and show the results in heatmaps
height_heat = buoy_clean_combine.hvplot.heatmap(x='month_int',
y='station',
C='waveHs',
reduce_function=np.mean,
cmap='Magma',
width=900,
height=300,
colorbar=True,
title="Lake Erie NYS Shoreline 2022 Buoys'Wave Height by Month")
height_heat.redim(station="Stations", month_int="Month")Observation:
The wave height is higher in winter (November to January) and lower in summer (May to August). There is a small time lag for the change of wave height, which means wave height of south shoreline changes earlier than the north shoreline.
height_heat = buoy_clean_combine.hvplot.heatmap(x='day_int',
y='station',
C='waveHs',
reduce_function=np.mean,
width=900,
height=300,
colorbar=True,
cmap='Magma',
title="Lake Erie NYS Shoreline 2022 Buoys'Wave Height by Day")
height_heat.redim(station="Stations", day_int="Day of 2022")Observation:
In Febuary and March, a lot of buoys have missing data, which is very likly because of the frozen lake. In December, there was a large blizzards and seiche event, which is indicated in this plot. In a single day, the wave height was above 7 meters for almost all the buoys along the NYS shoreline of Lake Erie.