import altair as alt
import numpy as np
import pandas as pd
import holoviews as hv
import hvplot.pandas
# Load bokeh
"bokeh")
hv.extension(import geopandas as gpd
%matplotlib inline
Wave Data Visualization
Wave Data Source: WIS Data Portal
The wave data in this project comes from the web scraping page.
The time period will be 2022 yearly data, and the area of interest is the part of Lake Erie in New York State’s boundary.
import panel as pn
Step 1: Read all the downloaded data from csv
The zip file from the web scraping process includes multiple csv (one for each buoy)
= pd.read_csv("./data/web-scarp/1-ST92023-generic_export-20231218T20_22.csv")
ST92023_raw = pd.read_csv("./data/web-scarp/2-ST92022-generic_export-20231218T20_22.csv")
ST92022_raw = pd.read_csv("./data/web-scarp/3-ST92021-generic_export-20231218T20_22.csv")
ST92021_raw = pd.read_csv("./data/web-scarp/4-ST92020-generic_export-20231218T20_22.csv")
ST92020_raw = pd.read_csv("./data/web-scarp/5-ST92019-generic_export-20231218T20_22.csv")
ST92019_raw = pd.read_csv("./data/web-scarp/6-ST92018-generic_export-20231218T20_22.csv")
ST92018_raw = pd.read_csv("./data/web-scarp/7-ST92017-generic_export-20231218T20_22.csv")
ST92017_raw = pd.read_csv("./data/web-scarp/8-ST92016-generic_export-20231218T20_22.csv")
ST92016_raw = pd.read_csv("./data/web-scarp/9-ST92015-generic_export-20231218T20_22.csv")
ST92015_raw = pd.read_csv("./data/web-scarp/10-ST92014-generic_export-20231218T20_22.csv")
ST92014_raw = pd.read_csv("./data/web-scarp/11-ST92013-generic_export-20231218T20_22.csv")
ST92013_raw = pd.read_csv("./data/web-scarp/12-ST92012-generic_export-20231218T20_22.csv")
ST92012_raw = pd.read_csv("./data/web-scarp/13-ST92011-generic_export-20231218T20_22.csv")
ST92011_raw = pd.read_csv("./data/web-scarp/14-ST92010-generic_export-20231218T20_22.csv")
ST92010_raw = pd.read_csv("./data/web-scarp/15-ST92009-generic_export-20231218T20_22.csv")
ST92009_raw = pd.read_csv("./data/web-scarp/16-ST92008-generic_export-20231218T20_22.csv")
ST92008_raw = pd.read_csv("./data/web-scarp/17-ST92007-generic_export-20231218T20_22.csv")
ST92007_raw = pd.read_csv("./data/web-scarp/18-ST92006-generic_export-20231218T20_22.csv")
ST92006_raw = pd.read_csv("./data/web-scarp/19-ST92005-generic_export-20231218T20_22.csv")
ST92005_raw = pd.read_csv("./data/web-scarp/20-ST92004-generic_export-20231218T20_22.csv")
ST92004_raw = pd.read_csv("./data/web-scarp/21-ST92003-generic_export-20231218T20_22.csv")
ST92003_raw = pd.read_csv("./data/web-scarp/22-ST92002-generic_export-20231218T20_22.csv")
ST92002_raw = pd.read_csv("./data/web-scarp/23-ST92001-generic_export-20231218T20_22.csv")
ST92001_raw = pd.read_csv("./data/web-scarp/24-ST92243-generic_export-20231218T20_22.csv") ST92243_raw
# A view of the raw data:
ST92023_raw.head()
time | latitude | longitude | waveHs | |
---|---|---|---|---|
0 | 2022-01-01 00:00:00 | 42.32 | -79.88 | 0.132812 |
1 | 2022-01-01 01:00:00 | 42.32 | -79.88 | 0.140625 |
2 | 2022-01-01 02:00:00 | 42.32 | -79.88 | 0.140625 |
3 | 2022-01-01 03:00:00 | 42.32 | -79.88 | 0.140625 |
4 | 2022-01-01 04:00:00 | 42.32 | -79.88 | 0.132812 |
# Date type for each column
ST92023_raw.dtypes
time object
latitude float64
longitude float64
waveHs float64
dtype: object
Step 2: Clean the raw data
Since there are 24 files, a list will be helpful for later manipulations
= [
buoy_list_raw
ST92023_raw, ST92022_raw, ST92021_raw, ST92020_raw, ST92019_raw,
ST92018_raw, ST92017_raw, ST92016_raw, ST92015_raw, ST92014_raw,
ST92013_raw, ST92012_raw, ST92011_raw, ST92010_raw, ST92009_raw,
ST92008_raw, ST92007_raw, ST92006_raw, ST92005_raw, ST92004_raw,
ST92003_raw, ST92002_raw, ST92001_raw, ST92243_raw ]
Firstly, clean the raw data. This process includes handling datetime object and drop null value.
= []
buoy_list for buoy in buoy_list_raw:
#Convert date type
"datetime"] = pd.to_datetime(buoy["time"])
buoy[
#Drop Null value
= buoy.drop(buoy[buoy['waveHs'] < 0].index)
buoy_clean
# Save it
buoy_list.append(buoy_clean)
len(buoy_list)
24
And then, change each buoy’s data to a prefered structure, which includes a column for month and a column for day of year. Each buoy will also have its station name in the column
= [
buoy_name 'ST92023', 'ST92022', 'ST92021', 'ST92020', 'ST92019',
'ST92018', 'ST92017', 'ST92016', 'ST92015', 'ST92014',
'ST92013', 'ST92012', 'ST92011', 'ST92010', 'ST92009',
'ST92008', 'ST92007', 'ST92006', 'ST92005', 'ST92004',
'ST92003', 'ST92002', 'ST92001', 'ST92243'
]
for i in range(len(buoy_list)):
= buoy_list[i]
buoy
#Arrange by month prepare
"month_int"] = buoy["datetime"].dt.month
buoy["month"] = buoy["datetime"].dt.strftime("%b")
buoy[
#Arrange by date prepare
"day_int"] = buoy["datetime"].dt.dayofyear
buoy[
# Add station name
"station"] = buoy_name[i]
buoy[
Comebine seperate dataframes into a single dataframe
= pd.concat(buoy_list) buoy_clean_combine
len(buoy_clean_combine)
187680
A view of the final dataframe
buoy_clean_combine.head()
time | latitude | longitude | waveHs | datetime | month_int | month | day_int | station | |
---|---|---|---|---|---|---|---|---|---|
0 | 2022-01-01 00:00:00 | 42.32 | -79.88 | 0.132812 | 2022-01-01 00:00:00 | 1 | Jan | 1 | ST92023 |
1 | 2022-01-01 01:00:00 | 42.32 | -79.88 | 0.140625 | 2022-01-01 01:00:00 | 1 | Jan | 1 | ST92023 |
2 | 2022-01-01 02:00:00 | 42.32 | -79.88 | 0.140625 | 2022-01-01 02:00:00 | 1 | Jan | 1 | ST92023 |
3 | 2022-01-01 03:00:00 | 42.32 | -79.88 | 0.140625 | 2022-01-01 03:00:00 | 1 | Jan | 1 | ST92023 |
4 | 2022-01-01 04:00:00 | 42.32 | -79.88 | 0.132812 | 2022-01-01 04:00:00 | 1 | Jan | 1 | ST92023 |
Save the dataframe for future usage
"./data/wave2022.csv", index=False) buoy_clean_combine.to_csv(
Step 3: Line Chart Visualization
The first line chart plot shows the wave height of different buoys in 2022. By dragging the widget from left to right, you can have a look at the buoys located from south to north
= buoy_clean_combine.hvplot(
clean_combine_chart ="datetime",
x="waveHs",
y="station",
groupby=900,
width="line",
kind={'station': pn.widgets.DiscreteSlider},
widgets='bottom',
widget_location='#E07069',
color=0.8,
line_width='Lake Erie NYS Shoreline 2022 Wave Height',
title="Date",
xlabel="Wave Height / m"
ylabel
)
clean_combine_chart
Observations:
The wave height recorded by each buoy is usually turbulent between different days. However, the overall trend is that buoys encounter higher waves in winter.
Note:
After searching for hours I would conclude that Quarto does not support the hvplot widget in the notebook. If want to see the live version of the above widget, please refer to final project’s repository and open it in jupyter notebook instead. There are discussions about this issue. The conclusion is that “The kind of Jupyter interactivity that we can’t easily support directly in Quarto is that which involves a live Jupyter process for every open web browser. In order for ipywidgets.interact to work, a jupyter kernel needs to be live and running, and that is a fundamentally more involved mode of deployment.” Alternatively, there are ways to possibly solve this issue by using shinylive package which involves NodeJS and suported by Quarto website. However, NodeJS is out of the scope of this project and will not be included here.
The second line plot compares the southest buoy to the northest buoy in order to show the trend across the shoreline. ST92023 is the southest buoy along the NYS shoreline of Lake Erie, and ST92243 is the northest buoy along the NYS shoreline of Lake Erie, near Buffalo
= ['ST92243', 'ST92023']
two_end = buoy_clean_combine.loc[buoy_clean_combine['station'].isin(two_end)] ends
= ends.hvplot(
end_chart ="datetime",
x="waveHs",
y="station",
by="line",
kind=['#E07069', '#6989E0'],
color=0.8,
line_width=900,
width="Compare North and South End's Wave Height",
title="Date",
xlabel="Wave Height / m"
ylabel
) end_chart
Observation:
The south buoy has higher waves than the north buoy. This is probably because the prevailing wind of Lake Erie is southwest. Besides, the north buoy has a long period without data (from February to April), which is probably because the lake is frozen during that time.
Step 4: Heat map visua
This step will calculate the average wave height for each month or each day in 2022 and show the results in heatmaps
= buoy_clean_combine.hvplot.heatmap(x='month_int',
height_heat ='station',
y='waveHs',
C=np.mean,
reduce_function='Magma',
cmap=900,
width=300,
height=True,
colorbar="Lake Erie NYS Shoreline 2022 Buoys'Wave Height by Month")
title
="Stations", month_int="Month") height_heat.redim(station
Observation:
The wave height is higher in winter (November to January) and lower in summer (May to August). There is a small time lag for the change of wave height, which means wave height of south shoreline changes earlier than the north shoreline.
= buoy_clean_combine.hvplot.heatmap(x='day_int',
height_heat ='station',
y='waveHs',
C=np.mean,
reduce_function=900,
width=300,
height=True,
colorbar='Magma',
cmap="Lake Erie NYS Shoreline 2022 Buoys'Wave Height by Day")
title="Stations", day_int="Day of 2022") height_heat.redim(station
Observation:
In Febuary and March, a lot of buoys have missing data, which is very likly because of the frozen lake. In December, there was a large blizzards and seiche event, which is indicated in this plot. In a single day, the wave height was above 7 meters for almost all the buoys along the NYS shoreline of Lake Erie.