THREDDS-served GRIB Data: GFS (Global Grid) Model: Eclipse Cloud Cover Forecast

This notebook reads in realtime GFS model output from a THREDDS server.

Overview:

  1. Determine latest available model run

  2. Use dictionaries to deal with coordinate dimension names that change from run-to-run

  3. Subset data and map regions

  4. Create a 4-panel plot

Imports

import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.io.shapereader import Reader
from matplotlib import pyplot as plt
from matplotlib.dates import DateFormatter, AutoDateLocator,HourLocator,DayLocator
from matplotlib.collections import LineCollection
from matplotlib.colors import BoundaryNorm, ListedColormap
import numpy as np
import xarray as xr
import pandas as pd
from datetime import datetime, timedelta
from pyproj import Proj
from metpy.plots import USCOUNTIES
import seaborn as sns
import geopandas as gpd
sns.set()

Access real-time NWP data from Unidata’s THREDDS server

Parse the current time and choose a recent hour when we might expect the model run to be complete. There is normally a ~90 minute lag, but we’ll give it a bit more than that.

now = datetime.now()
year = now.year
month = now.month
day = now.day
hour = now.hour
minute = now.minute
# Allow for a six-hour lag between GFS initialization and current time
if (hour >= 18):
    runHour = 12
    hrDelta = hour - runHour
elif (hour >= 12):
    runHour = 6
    hrDelta = hour - runHour
elif (hour >= 6):
    runHour = 0
    hrDelta = hour - runHour
else:
    runHour = 18
    hrDelta = hour + 6
    
runTime = now - timedelta(hours=hrDelta)
runTimeStr = runTime.strftime('%Y%m%d %H00 UTC')
modelDate = runTime.strftime('%Y%m%d')
modelHour = runTime.strftime('%H')
modelMin = runTime.strftime('%M')
modelDay = runTime.strftime('%D')
titleTime = modelDay + ' ' +  modelHour + '00 UTC'
print (modelDay)
print (modelHour)
print (runTimeStr)
print(titleTime)
04/29/24
18
20240429 1800 UTC
04/29/24 1800 UTC

Open the most recent NCEP real-timeGFS from Unidata’s THREDDS server

ds = xr.open_dataset(f'https://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg/GFS_Global_0p25deg_{modelDate}_{modelHour}00.grib2')
ds
<xarray.Dataset>
Dimensions:                                                                     (
                                                                                 lat: 721,
                                                                                 lon: 1440,
                                                                                 time: 254,
                                                                                 time1: 129,
                                                                                 time2: 128,
                                                                                 ...
                                                                                 height_above_ground_layer_bounds_1: 2,
                                                                                 height_above_ground_layer1_bounds_1: 2,
                                                                                 pressure_difference_layer1_bounds_1: 2,
                                                                                 pressure_difference_layer2_bounds_1: 2,
                                                                                 sigma_layer_bounds_1: 2,
                                                                                 depth_below_surface_layer_bounds_1: 2)
Coordinates: (12/28)
  * lat                                                                         (lat) float32 ...
  * lon                                                                         (lon) float32 ...
    reftime                                                                     datetime64[ns] ...
  * time                                                                        (time) datetime64[ns] ...
  * time1                                                                       (time1) datetime64[ns] ...
  * time2                                                                       (time2) datetime64[ns] ...
    ...                                                                          ...
  * height_above_ground4                                                        (height_above_ground4) float32 ...
  * height_above_ground5                                                        (height_above_ground5) float32 ...
  * potential_vorticity_surface                                                 (potential_vorticity_surface) float32 ...
  * sigma                                                                       (sigma) float32 ...
  * hybrid                                                                      (hybrid) float32 ...
  * hybrid1                                                                     (hybrid1) float32 ...
Dimensions without coordinates: time_bounds_1, time2_bounds_1,
                                pressure_difference_layer_bounds_1,
                                height_above_ground_layer_bounds_1,
                                height_above_ground_layer1_bounds_1,
                                pressure_difference_layer1_bounds_1,
                                pressure_difference_layer2_bounds_1,
                                sigma_layer_bounds_1,
                                depth_below_surface_layer_bounds_1
Data variables: (12/180)
    LatLon_Projection                                                           int32 ...
    time_bounds                                                                 (time, time_bounds_1) datetime64[ns] ...
    time2_bounds                                                                (time2, time2_bounds_1) datetime64[ns] ...
    pressure_difference_layer_bounds                                            (pressure_difference_layer, pressure_difference_layer_bounds_1) float32 ...
    height_above_ground_layer_bounds                                            (height_above_ground_layer, height_above_ground_layer_bounds_1) float32 ...
    height_above_ground_layer1_bounds                                           (height_above_ground_layer1, height_above_ground_layer1_bounds_1) float32 ...
    ...                                                                          ...
    v-component_of_wind_planetary_boundary                                      (time1, lat, lon) float32 ...
    v-component_of_wind_altitude_above_msl                                      (time1, altitude_above_msl, lat, lon) float32 ...
    v-component_of_wind_maximum_wind                                            (time1, lat, lon) float32 ...
    v-component_of_wind_tropopause                                              (time1, lat, lon) float32 ...
    v-component_of_wind_height_above_ground                                     (time1, height_above_ground2, lat, lon) float32 ...
    v-component_of_wind_sigma                                                   (time1, sigma, lat, lon) float32 ...
Attributes:
    Originating_or_generating_Center:                                        ...
    Originating_or_generating_Subcenter:                                     ...
    GRIB_table_version:                                                      ...
    Type_of_generating_process:                                              ...
    Analysis_or_forecast_generating_process_identifier_defined_by_originating...
    file_format:                                                             ...
    Conventions:                                                             ...
    history:                                                                 ...
    featureType:                                                             ...
    _CoordSysBuilder:                                                        ...

Leverage MetPy’s parse_cf function, which among other things will make it easy to extract the dataset’s map projection.

ds = ds.metpy.parse_cf()
ds
<xarray.Dataset>
Dimensions:                                                                     (
                                                                                 time: 254,
                                                                                 time_bounds_1: 2,
                                                                                 time2: 128,
                                                                                 time2_bounds_1: 2,
                                                                                 pressure_difference_layer: 1,
                                                                                 ...
                                                                                 height_above_ground5: 2,
                                                                                 altitude_above_msl: 3,
                                                                                 height_above_ground3: 3,
                                                                                 hybrid1: 2,
                                                                                 height_above_ground1: 2,
                                                                                 height_above_ground2: 7)
Coordinates: (12/29)
    reftime                                                                     datetime64[ns] ...
  * time                                                                        (time) datetime64[ns] ...
  * time2                                                                       (time2) datetime64[ns] ...
  * pressure_difference_layer                                                   (pressure_difference_layer) float32 ...
  * height_above_ground_layer                                                   (height_above_ground_layer) float32 ...
  * height_above_ground_layer1                                                  (height_above_ground_layer1) float32 ...
    ...                                                                          ...
  * height_above_ground5                                                        (height_above_ground5) float32 ...
  * altitude_above_msl                                                          (altitude_above_msl) float32 ...
  * height_above_ground3                                                        (height_above_ground3) float32 ...
  * hybrid1                                                                     (hybrid1) float32 ...
  * height_above_ground1                                                        (height_above_ground1) float32 ...
  * height_above_ground2                                                        (height_above_ground2) float32 ...
Dimensions without coordinates: time_bounds_1, time2_bounds_1,
                                pressure_difference_layer_bounds_1,
                                height_above_ground_layer_bounds_1,
                                height_above_ground_layer1_bounds_1,
                                pressure_difference_layer1_bounds_1,
                                pressure_difference_layer2_bounds_1,
                                sigma_layer_bounds_1,
                                depth_below_surface_layer_bounds_1
Data variables: (12/180)
    LatLon_Projection                                                           int32 ...
    time_bounds                                                                 (time, time_bounds_1) datetime64[ns] ...
    time2_bounds                                                                (time2, time2_bounds_1) datetime64[ns] ...
    pressure_difference_layer_bounds                                            (pressure_difference_layer, pressure_difference_layer_bounds_1) float32 ...
    height_above_ground_layer_bounds                                            (height_above_ground_layer, height_above_ground_layer_bounds_1) float32 ...
    height_above_ground_layer1_bounds                                           (height_above_ground_layer1, height_above_ground_layer1_bounds_1) float32 ...
    ...                                                                          ...
    v-component_of_wind_planetary_boundary                                      (time1, lat, lon) float32 ...
    v-component_of_wind_altitude_above_msl                                      (time1, altitude_above_msl, lat, lon) float32 ...
    v-component_of_wind_maximum_wind                                            (time1, lat, lon) float32 ...
    v-component_of_wind_tropopause                                              (time1, lat, lon) float32 ...
    v-component_of_wind_height_above_ground                                     (time1, height_above_ground2, lat, lon) float32 ...
    v-component_of_wind_sigma                                                   (time1, sigma, lat, lon) float32 ...
Attributes:
    Originating_or_generating_Center:                                        ...
    Originating_or_generating_Subcenter:                                     ...
    GRIB_table_version:                                                      ...
    Type_of_generating_process:                                              ...
    Analysis_or_forecast_generating_process_identifier_defined_by_originating...
    file_format:                                                             ...
    Conventions:                                                             ...
    history:                                                                 ...
    featureType:                                                             ...
    _CoordSysBuilder:                                                        ...

Note that there now exists a metpy_crs coordinate variable, which we’ll soon use to determine and use the Dataset’s map projection.

Select some of the variables from the Dataset; in this case, we’ll choose those relating to cloud cover.

varTotal = ds.Total_cloud_cover_entire_atmosphere
varLow = ds.Low_cloud_cover_low_cloud
varMed = ds.Medium_cloud_cover_middle_cloud
varHigh = ds.High_cloud_cover_high_cloud

Examine one of the grids

varTotal
<xarray.DataArray 'Total_cloud_cover_entire_atmosphere' (time1: 129, lat: 721,
                                                         lon: 1440)>
[133932960 values with dtype=float32]
Coordinates:
    reftime    datetime64[ns] 2024-04-29T18:00:00
  * lat        (lat) float32 90.0 89.75 89.5 89.25 ... -89.25 -89.5 -89.75 -90.0
  * lon        (lon) float32 0.0 0.25 0.5 0.75 1.0 ... 359.0 359.2 359.5 359.8
  * time1      (time1) datetime64[ns] 2024-04-29T18:00:00 ... 2024-05-15T18:0...
    metpy_crs  object Projection: latitude_longitude
Attributes: (12/13)
    long_name:                       Total cloud cover @ Entire atmosphere
    units:                           %
    abbreviation:                    TCDC
    grid_mapping:                    LatLon_Projection
    Grib_Variable_Id:                VAR_0-6-1_L10
    Grib2_Parameter:                 [0 6 1]
    ...                              ...
    Grib2_Parameter_Category:        Cloud
    Grib2_Parameter_Name:            Total cloud cover
    Grib2_Level_Type:                10
    Grib2_Level_Desc:                Entire atmosphere
    Grib2_Generating_Process_Type:   Forecast
    Grib2_Statistical_Process_Type:  UnknownStatType--1

GRIB files that are served by THREDDS are automatically converted into NetCDF. A quirk in this translation can result in certain dimensions (typically those that are time or vertically-based) varying from run-to-run. For example, the time coordinate dimension for a given variable might be time in one run, but time1 (or time2, time3, etc.) in another run. This poses a challenge any time we want to subset a DataArray. For example, if a particular dataset had vertical levels in units of hPa, the dimension name for the geopotential height grid might be isobaric. If we wanted to select only the 500 and 700 hPa levels, we might write the following line of code:

Z = ds.sel(isobaric=[500, 700])

This would fail if we pointed to model output from a different time, and the vertical coordinate name for geopotential height was instead isobaric1.

Via strategic use of Python dictionaries, we can programmatically deal with this!

First, let’s use some MetPy accessors that determine the dimension names for times and vertical levels:

timeDim= varTotal.metpy.time.name
timeDim
'time1'

We next construct dictionaries corresponding to each desired subset of the relevant dimensions. Here, we specify a desired date and time:

verifTime = [datetime(2024, 4, 8, 18), datetime(2024, 4, 8, 21)]

timeDict = {timeDim: verifTime}

timeDict
{'time1': [datetime.datetime(2024, 4, 8, 18, 0),
  datetime.datetime(2024, 4, 8, 21, 0)]}

Get the map projection from one of the DataArrays. They will be the same across the Dataset.

proj_data = varTotal.metpy.cartopy_crs
proj_data
2024-04-30T01:04:27.724878 image/svg+xml Matplotlib v3.8.2, https://matplotlib.org/
<cartopy.crs.PlateCarree object at 0x1468e540ac90>

Create DataArray objects for the horizontal coordinate dimensions. In this case, the model is output on global cylindrical equidistant (PlateCarree) projection, which means the horizontal dimensions are named lon and lat and represent grid point locations in degrees.

x, y = ds.lon, ds.lat
x
<xarray.DataArray 'lon' (lon: 1440)>
array([0.0000e+00, 2.5000e-01, 5.0000e-01, ..., 3.5925e+02, 3.5950e+02,
       3.5975e+02], dtype=float32)
Coordinates:
    reftime    datetime64[ns] 2024-04-29T18:00:00
  * lon        (lon) float32 0.0 0.25 0.5 0.75 1.0 ... 359.0 359.2 359.5 359.8
    metpy_crs  object Projection: latitude_longitude
Attributes:
    units:                degrees_east
    _CoordinateAxisType:  Lon
    _metpy_axis:          x,longitude

Subset the dataset to a smaller region

The longitudes range from 0 to 359.75 degrees East. For the purposes of selecting ranges which include points in the western hemisphere, express these values as positive numbers. The latitudes go from north to south … i.e., in decreasing order.

lonW, lonE, latS, latN = [-80 + 360, -71+360, 40, 45]
mapExtent = lonW, lonE, latS, latN

We can directly select latitude and longitude values to perform regional subset selections; no need to use the Pyproj library.

Determine the grid spacing

delX, delY = ( (x[1] - x[0]), (y[1] - y[0]) )

In order for the data region to fill the plotted region, pad the longitude and latitude ranges (trial and error, depends on the subregion)

xPad = 4
yPad = 4

xRange = np.arange(lonW - xPad, lonE + xPad, delX)
yRange = np.arange(latN + yPad, latS - yPad, delY)

Read in the subsetted data and coordinate variables.

dsSub = ds.sel(lon = xRange, lat = yRange)
xSub, ySub = dsSub.lon, dsSub.lat

TotalSub = dsSub.Total_cloud_cover_entire_atmosphere
LowSub = dsSub.Low_cloud_cover_low_cloud
MedSub = dsSub.Medium_cloud_cover_middle_cloud
HighSub = dsSub.High_cloud_cover_high_cloud
TotalSub = TotalSub.sel(timeDict)
LowSub = LowSub.sel(timeDict)
MedSub = MedSub.sel(timeDict)
HighSub = HighSub.sel(timeDict)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[18], line 8
      6 MedSub = dsSub.Medium_cloud_cover_middle_cloud
      7 HighSub = dsSub.High_cloud_cover_high_cloud
----> 8 TotalSub = TotalSub.sel(timeDict)
      9 LowSub = LowSub.sel(timeDict)
     10 MedSub = MedSub.sel(timeDict)

File /knight/mamba_aug23/envs/jan24_env/lib/python3.11/site-packages/xarray/core/dataarray.py:1622, in DataArray.sel(self, indexers, method, tolerance, drop, **indexers_kwargs)
   1506 def sel(
   1507     self,
   1508     indexers: Mapping[Any, Any] | None = None,
   (...)
   1512     **indexers_kwargs: Any,
   1513 ) -> Self:
   1514     """Return a new DataArray whose data is given by selecting index
   1515     labels along the specified dimension(s).
   1516 
   (...)
   1620     Dimensions without coordinates: points
   1621     """
-> 1622     ds = self._to_temp_dataset().sel(
   1623         indexers=indexers,
   1624         drop=drop,
   1625         method=method,
   1626         tolerance=tolerance,
   1627         **indexers_kwargs,
   1628     )
   1629     return self._from_temp_dataset(ds)

File /knight/mamba_aug23/envs/jan24_env/lib/python3.11/site-packages/xarray/core/dataset.py:3105, in Dataset.sel(self, indexers, method, tolerance, drop, **indexers_kwargs)
   3037 """Returns a new dataset with each array indexed by tick labels
   3038 along the specified dimension(s).
   3039 
   (...)
   3102 
   3103 """
   3104 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "sel")
-> 3105 query_results = map_index_queries(
   3106     self, indexers=indexers, method=method, tolerance=tolerance
   3107 )
   3109 if drop:
   3110     no_scalar_variables = {}

File /knight/mamba_aug23/envs/jan24_env/lib/python3.11/site-packages/xarray/core/indexing.py:193, in map_index_queries(obj, indexers, method, tolerance, **indexers_kwargs)
    191         results.append(IndexSelResult(labels))
    192     else:
--> 193         results.append(index.sel(labels, **options))
    195 merged = merge_sel_results(results)
    197 # drop dimension coordinates found in dimension indexers
    198 # (also drop multi-index if any)
    199 # (.sel() already ensures alignment)

File /knight/mamba_aug23/envs/jan24_env/lib/python3.11/site-packages/xarray/core/indexes.py:784, in PandasIndex.sel(self, labels, method, tolerance)
    782     indexer = get_indexer_nd(self.index, label_array, method, tolerance)
    783     if np.any(indexer < 0):
--> 784         raise KeyError(f"not all values found in index {coord_name!r}")
    786 # attach dimension names and/or coordinates to positional indexer
    787 if isinstance(label, Variable):

KeyError: "not all values found in index 'time1'"

Examine one of the DataArrays.

HighSub
<xarray.DataArray 'High_cloud_cover_high_cloud' (time1: 129, lat: 52, lon: 68)>
[456144 values with dtype=float32]
Coordinates:
    reftime    datetime64[ns] 2024-04-29T18:00:00
  * lat        (lat) float32 49.0 48.75 48.5 48.25 ... 37.0 36.75 36.5 36.25
  * lon        (lon) float32 276.0 276.2 276.5 276.8 ... 292.0 292.2 292.5 292.8
  * time1      (time1) datetime64[ns] 2024-04-29T18:00:00 ... 2024-05-15T18:0...
    metpy_crs  object Projection: latitude_longitude
Attributes: (12/13)
    long_name:                       High cloud cover @ High cloud layer
    units:                           %
    abbreviation:                    HCDC
    grid_mapping:                    LatLon_Projection
    Grib_Variable_Id:                VAR_0-6-5_L234
    Grib2_Parameter:                 [0 6 5]
    ...                              ...
    Grib2_Parameter_Category:        Cloud
    Grib2_Parameter_Name:            High cloud cover
    Grib2_Level_Type:                234
    Grib2_Level_Desc:                High cloud layer
    Grib2_Generating_Process_Type:   Forecast
    Grib2_Statistical_Process_Type:  UnknownStatType--1

Specify the output map projection (Lambert Conformal, centered on New York State)

lon1 = -75
lat1 = 42.5
slat = 42.5

proj_map= ccrs.LambertConformal(central_longitude=lon1,
                             central_latitude=lat1,
                             standard_parallels=[slat,slat])
contourLevels = np.arange(0,101,10)

Create titles for the plots.

Create a 2x2 figure for each of the four cloud cover variables. Omit plotting the land/water features since they tend to obscure the nuances of the temperature field. This also speeds up the creation of the figure.

for times in [0,1]:
#validTime = varTotal.sel(timeDict)[timeDim].values # Returns validTime as a NumPy Datetime 64 object
  validTime = TotalSub[timeDim][times].values # Returns validTime as a NumPy Datetime 64 object
  validTime = pd.Timestamp(validTime) # Converts validTime to a Pandas Timestamp object
  timeStr = validTime.strftime("%Y-%m-%d %H%M UTC") # We can now use Datetime's strftime method to create a time string.
  tlT1 = f"GFS Total Cloud Cover (%)"
  tlL1 = f"GFS Low Cloud Cover (%)"
  tlM1 = f"GFS Medium Cloud Cover (%)"
  tlH1 = f"GFS High Cloud Cover (%)"
  tl2 = str(f'Valid at: {timeStr}')
  title_line_total = (tlT1 + '\n' + tl2 + '\n')
  title_line_low = (tlL1 + '\n' + tl2 + '\n')
  title_line_med = (tlM1 + '\n' + tl2 + '\n')
  title_line_high = (tlH1 + '\n' + tl2 + '\n')

  res = '10m'
  fig = plt.figure(figsize=(18,12))

  ax1 = fig.add_subplot(2,2,1,projection=proj_map)
  ax1.set_extent (mapExtent, crs=ccrs.PlateCarree())
  ax1.add_feature (cfeature.COASTLINE.with_scale(res))
  ax1.add_feature (cfeature.STATES.with_scale(res))
  ax1.add_feature (USCOUNTIES,edgecolor='blue', linewidth=1 );
  CF = ax1.contourf (xSub,ySub,TotalSub[times], levels = contourLevels, cmap='Greys_r',alpha=0.5,transform=proj_data)
  cbar = fig.colorbar (CF, ax = ax1, fraction=0.046, pad=0.03,shrink=0.8)
  title = ax1.set_title(title_line_total,fontsize=16)

  ax2 = fig.add_subplot(2,2,2,projection=proj_map)
  ax2.set_extent (mapExtent, crs=ccrs.PlateCarree())
  ax2.add_feature (cfeature.COASTLINE.with_scale(res))
  ax2.add_feature (cfeature.STATES.with_scale(res))
  ax2.add_feature (USCOUNTIES,edgecolor='blue', linewidth=1 );
  CF = ax2.contourf (xSub,ySub,HighSub[times], levels = contourLevels, cmap='Greys_r',alpha=0.5,transform=proj_data)
  cbar = fig.colorbar (CF, ax = ax2, fraction=0.046, pad=0.03,shrink=0.8)
  title = ax2.set_title(title_line_high,fontsize=16)

  ax3 = fig.add_subplot(2,2,3,projection=proj_map)
  ax3.set_extent (mapExtent, crs=ccrs.PlateCarree())
  ax3.add_feature (cfeature.COASTLINE.with_scale(res))
  ax3.add_feature (cfeature.STATES.with_scale(res))
  ax3.add_feature (USCOUNTIES,edgecolor='blue', linewidth=1 );
  CF = ax3.contourf (xSub,ySub,MedSub[times], levels = contourLevels, cmap='Greys_r',alpha=0.5,transform=proj_data)
  cbar = fig.colorbar (CF, ax = ax3, fraction=0.046, pad=0.03,shrink=0.8)
  title = ax3.set_title(title_line_med,fontsize=16)

  ax4 = fig.add_subplot(2,2,4,projection=proj_map)
  ax4.set_extent (mapExtent, crs=ccrs.PlateCarree())
  ax4.add_feature (cfeature.COASTLINE.with_scale(res))
  ax4.add_feature (cfeature.STATES.with_scale(res))
  ax4.add_feature (USCOUNTIES,edgecolor='blue', linewidth=1 );
  CF = ax4.contourf (xSub,ySub,LowSub[times], levels = contourLevels, cmap='Greys_r',alpha=0.5,transform=proj_data)
  cbar = fig.colorbar (CF, ax = ax4, fraction=0.046, pad=0.03,shrink=0.8)
  title = ax4.set_title(title_line_low,fontsize=16);

  fig.tight_layout()
  plt.show()
../../_images/bf67ca7608591e2cc9152d681fd0de016ce5d6b284f1cbff012ceff08652d389.png ../../_images/6cf48a7254f0e472f3fdb8eaad96a384ab21e5ca2f9d614921a7434a250c6551.png