{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 02_GriddedDiagnostics_Dewpoint_ERA5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## In this notebook, we'll cover the following:\n",
    "1. Select a date and access an ERA5 Dataset\n",
    "2. Subset the desired Datasets along their dimensions\n",
    "3. Calculate and visualize dewpoint."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# <span style=\"color:purple\">0) Imports </span>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import xarray as xr\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from datetime import datetime as dt\n",
    "from metpy.units import units\n",
    "import metpy.calc as mpcalc\n",
    "import cartopy.crs as ccrs\n",
    "import cartopy.feature as cfeature\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# <span style=\"color:purple\">1) Specify a starting and ending date/time, regional extent, vertical levels, and access the ERA5</span>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Areal extent\n",
    "lonW = -105\n",
    "lonE = -90\n",
    "latS = 31\n",
    "latN = 39\n",
    "cLat, cLon = (latS + latN)/2, (lonW + lonE)/2\n",
    "\n",
    "# Recall that in ERA5, longitudes run between 0 and 360, not -180 and 180\n",
    "if (lonW < 0 ):\n",
    "    lonW = lonW + 360\n",
    "if (lonE < 0 ):\n",
    "    lonE = lonE + 360\n",
    "    \n",
    "expand = 1\n",
    "latRange = np.arange(latS - expand,latN + expand,.25) # expand the data range a bit beyond the plot range\n",
    "lonRange = np.arange((lonW - expand),(lonE + expand),.25) # Need to match longitude values to those of the coordinate variable\n",
    "\n",
    "# Vertical level specification\n",
    "pLevel = 925\n",
    "levStr = f'{pLevel}'\n",
    "\n",
    "startYear = 2013\n",
    "startMonth = 5\n",
    "startDay = 31\n",
    "startHour = 12\n",
    "startMinute = 0\n",
    "startDateTime = dt(startYear,startMonth,startDay, startHour, startMinute)\n",
    "\n",
    "endYear = 2013\n",
    "endMonth = 5\n",
    "endDay = 31\n",
    "endHour = 18\n",
    "endMinute = 0\n",
    "endDateTime = dt(endYear,endMonth,endDay, endHour, endMinute)\n",
    "\n",
    "delta_time = endDateTime - startDateTime\n",
    "time_range_max = 7*86400\n",
    "\n",
    "if (delta_time.total_seconds() > time_range_max):\n",
    "        raise RuntimeError(\"Your time range must not exceed 7 days. Go back and try again.\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Wb2EndDate = dt(2023,1,10)\n",
    "if (endDateTime <= Wb2EndDate):\n",
    "    ds = xr.open_dataset(\n",
    "     'gs://weatherbench2/datasets/era5/1959-2023_01_10-wb13-6h-1440x721.zarr', \n",
    "     chunks={'time': 48},\n",
    "     consolidated=True,\n",
    "     engine='zarr'\n",
    ")\n",
    "else: \n",
    "    import glob, os\n",
    "    input_directory = '/free/ktyle/era5'\n",
    "    files = glob.glob(os.path.join(input_directory,'*.nc'))\n",
    "    varDict = {'valid_time': 'time', \n",
    "               'pressure_level': 'level',\n",
    "               'msl': 'mean_sea_level_pressure',\n",
    "               'q': 'specific_humidity',\n",
    "               't': 'temperature',\n",
    "               'u': 'u_component_of_wind',\n",
    "               'v': 'v_component_of_wind',\n",
    "               'w': 'vertical_velocity',\n",
    "               'z': 'geopotential'}\n",
    "    dimDict = {'valid_time': 'time',\n",
    "               'pressure_level': 'level'}\n",
    "    ds = xr.open_mfdataset(files).rename_dims(dimDict).rename_vars(varDict)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Examine the `Dataset`"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# <span style=\"color:purple\">2) Specify a date/time range, and subset the desired `Dataset`s along their dimensions.</span>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a list of date and times based on what we specified for the initial and final times, using Pandas' date_range function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dateList = pd.date_range(startDateTime, endDateTime,freq=\"6h\")\n",
    "dateList"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We will calculate dewpoint, which depends on temperature and specific humidity (and also pressure, as we will see), so read in those arrays. Read in U and V as well if we wish to visualize wind vectors."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Now create objects for our desired DataArrays based on the coordinates we have subsetted."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Data variable selection\n",
    "Q = ds['specific_humidity'].sel(time=dateList,level=pLevel,latitude=latRange,longitude=lonRange).compute()\n",
    "T = ds['temperature'].sel(time=dateList,level=pLevel,latitude=latRange,longitude=lonRange).compute()\n",
    "U = ds['u_component_of_wind'].sel(time=dateList,level=pLevel,latitude=latRange,longitude=lonRange).compute()\n",
    "V = ds['v_component_of_wind'].sel(time=dateList,level=pLevel,latitude=latRange,longitude=lonRange).compute()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Define our subsetted coordinate arrays of lat and lon. Pull them from any of the DataArrays. We'll need to pass these into the contouring functions later on."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "lats = T.latitude\n",
    "lons = T.longitude"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# <span style=\"color:purple\">3) Calculate and visualize dewpoint.</span>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's examine the MetPy diagnostic that calculates dewpoint if specific humidity is available:\n",
    "[Dewpoint from specific humidity](https://unidata.github.io/MetPy/latest/api/generated/metpy.calc.dewpoint_from_specific_humidity.html)<hr>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We can see that this function requires us to pass in arrays of pressure, temperature, and specific humidity. In this case, <i>pressure is constant everywhere</i>, since we are operating on an isobaric surface.\n",
    "\n",
    "As such, we need to be sure we are attaching units to our previously specified pressure level."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Attach units to pLevel for use in MetPy with new variable, P:\n",
    "P = pLevel*units['hPa']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "P"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now, we have everything we need to calculate dewpoint."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Td = mpcalc.dewpoint_from_specific_humidity(P, T, Q)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Td"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Notice the units are in degrees Celsius."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's do a quick visualization. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "Td.sel(time=startDateTime).plot(figsize=(15,10),cmap='summer_r')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Find the min/max values (no scaling necessary). Use these to inform the setting of the contour fill intervals."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "minTd = Td.min().values\n",
    "maxTd = Td.max().values\n",
    "print (minTd, maxTd)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "TdInc = 2\n",
    "TdContours = np.arange (-12, 24, TdInc)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Now, let's plot filled contours of dewpoint, and wind barbs on the map. "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Convert U and V to knots"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "UKts = U.metpy.convert_units('kts')\n",
    "VKts = V.metpy.convert_units('kts')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "constrainLat, constrainLon = (0.5, 4.0)\n",
    "proj_map = ccrs.LambertConformal(central_longitude=cLon, central_latitude=cLat)\n",
    "proj_data = ccrs.PlateCarree() # Our data is lat-lon; thus its native projection is Plate Carree.\n",
    "res = '50m'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for time in dateList:\n",
    "    print(\"Processing\", time)\n",
    "    \n",
    "    # Create the time strings, for the figure title as well as for the file name.\n",
    "    timeStr = dt.strftime(time,format=\"%Y-%m-%d %H%M UTC\")\n",
    "    timeStrFile = dt.strftime(time,format=\"%Y%m%d%H\")\n",
    "    \n",
    "    tl1 = f'ERA5, Valid at: {timeStr}'\n",
    "    tl2 = f'{levStr} hPa dewpoint (°C) and winds (kts)'\n",
    "    \n",
    "    title_line = f'{tl1}\\n{tl2}\\n'\n",
    "    \n",
    "    fig = plt.figure(figsize=(21,15)) # Increase size to adjust for the constrained lats/lons\n",
    "    ax = fig.add_subplot(1,1,1,projection=proj_map)\n",
    "    ax.set_extent ([lonW+constrainLon,lonE-constrainLon,latS+constrainLat,latN-constrainLat])\n",
    "    ax.add_feature(cfeature.COASTLINE.with_scale(res))\n",
    "    ax.add_feature(cfeature.STATES.with_scale(res),edgecolor='brown')\n",
    "    \n",
    "    # Need to use Xarray's sel method here to specify the current time for any DataArray you are plotting.\n",
    "    \n",
    "    # 1. Contour fill of dewpoint.\n",
    "    cTd = ax.contourf(lons, lats, Td.sel(time=time), levels=TdContours, cmap='summer_r', transform=proj_data)\n",
    "    cbar = plt.colorbar(cTd,shrink=0.5)\n",
    "    cbar.ax.tick_params(labelsize=16)\n",
    "    cbar.ax.set_ylabel(\"Dew point temperature (°C)\",fontsize=16)\n",
    "\n",
    "    # 4. wind barbs\n",
    "    # Plotting wind barbs uses the ax.barbs method. Here, you can't pass in the DataArray directly; you can only pass in the array's values.\n",
    "    # Also need to sample (skip) a selected # of points to keep the plot readable.\n",
    "    # Remember to use Xarray's sel method here as well to specify the current time.\n",
    "    skip = 2\n",
    "    ax.barbs(lons[::skip],lats[::skip],UKts.sel(time=time)[::skip,::skip].values, VKts.sel(time=time)[::skip,::skip].values, color='purple',transform=proj_data)\n",
    "    \n",
    "    title = ax.set_title(title_line,fontsize=16)\n",
    "    #Generate a string for the file name and save the graphic to your current directory.\n",
    "    fileName = f'{timeStrFile}_ERA5_{levStr}_Td_Wind.png'\n",
    "    fig.savefig(fileName)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "anaconda-cloud": {},
  "kernelspec": {
   "display_name": "Python 3 Jan. 2025 Environment",
   "language": "python",
   "name": "jan25"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}