import numpy as np import netCDF4 import datetime as dt # Takes original tracks_low.nc files and creates new filtered track file # containing only tracks that meet the following criterion: # 1) TPV must last at least 2 days (eight 6-h time periods) # 2) TPV must spend at least 60% of its lifetime poleward of 65N following # TPV criterion of Cavallo and Hakim (2009) fDirSave = '/keyserlab_rit/kbiernat/track/szapiro_python/erai_0.5_tpv_climatology/' orig_fTracks = fDirSave+'tracks_low.nc' orig_dataTracks = netCDF4.Dataset(orig_fTracks,'r') nTimes = len(orig_dataTracks.dimensions['nTimes']) print "Number of times: {0}".format(nTimes) orig_nTracks = len(orig_dataTracks.dimensions['nTracks']) print "Number of tracks originally: {0}".format(orig_nTracks) filt_fTracks = fDirSave+'tpv_tracks_filtered.nc' metricKeys = 'circ vortMean ampMaxMin rEquiv thetaVol ampMean thetaExtr latExtr lonExtr'.split() def write_tracks_metrics_netcdf_header(filt_fTracks, info, nTimes): ''' For inputs, nTimesInTrackMax: maximum length of track nTimes: number of times in tracking interval nTimesInTrackMax <= nTimes Maybe dumping out the tracks to text file as: iTimeStartTrack1 nTimesTrack1 timeStart timeEnd (time1) metric1 metric2 ... for track1 (time2) metric1 metric2 ... for track1 -1 ends up taking alot of time since we read many small chunks from basinMetrics. Writing tracks out as metric1[iTime,iTrack] will let us load basinMetrics[iTime], but we'll see how costly it is to write to scattered locations within the file. There are probably intermediate buffers and such. "ragged" or "vlen" arrays still don't make sense to me, so we'll use extra padding instead. ''' data = netCDF4.Dataset(filt_fTracks, 'w', format='NETCDF4') data.description = info # dimensions data.createDimension('nTracks', None) data.createDimension('nTimesTrack', None) data.createDimension('nTimes', nTimes) # variables data.createVariable('timeStamp', str, ('nTimes',)) data.createVariable('iTimeStart', 'i4', ('nTracks',)) data.createVariable('lenTrack', 'i4', ('nTracks',)) data.createVariable('siteExtr', 'i4', ('nTracks','nTimesTrack',)) for key in metricKeys: data.createVariable(key, 'f8', ('nTracks','nTimesTrack',)) return data def read_tracks_metrics(orig_dataTracks, metricNames): #Input the name of the track file and list of metricNames strings. #return list of numpy arrays list[iTrack][iTime,iMetric] with the metric properties of the tracked TPVs. nMetrics = len(metricNames) orig_trackList = [] for iTrack in xrange(orig_nTracks): print iTrack nTimes = orig_dataTracks.variables['lenTrack'][iTrack] trackVals = np.empty((nTimes,nMetrics),dtype=float) for iMetric in xrange(nMetrics): key = metricNames[iMetric] for i in xrange(nTimes): trackVals[i,iMetric] = orig_dataTracks.variables[key][iTrack,i] orig_trackList.append(trackVals) return orig_trackList def filter_tracks(orig_dataTracks, orig_trackList, metricNames): #Filter tracks based on track length and metrics of interest and return list #of filtered track indices filt_trackInd_List = [] latInd = metricNames.index('latExtr') for iTrack, track in enumerate(orig_trackList): nTimesTrack = track.shape[0] if (nTimesTrack < 8): continue lat = track[:,latInd] nLat_gt60N = 0 for i in xrange(nTimesTrack): if (lat[i] > 60.1): nLat_gt60N = nLat_gt60N + 1 if (nLat_gt60N == 0): continue filt_trackInd_List.append(iTrack) return filt_trackInd_List filt_dataTracks = write_tracks_metrics_netcdf_header(filt_fTracks, 'filtered', nTimes) metricNames = ['latExtr'] orig_trackList = read_tracks_metrics(orig_dataTracks, metricNames) filt_trackInd_List = filter_tracks(orig_dataTracks, orig_trackList, metricNames) filt_nTracks = len(filt_trackInd_List) print "Number of tracks after filtering: {0}".format(filt_nTracks) for t in xrange(nTimes): filt_dataTracks.variables['timeStamp'][t] = orig_dataTracks.variables['timeStamp'][t] for iTrackFilt, iTrackOrig in enumerate(filt_trackInd_List): print iTrackOrig orig_nTimesTrack = orig_dataTracks.variables['lenTrack'][iTrackOrig] filt_dataTracks.variables['lenTrack'][iTrackFilt] = orig_nTimesTrack filt_dataTracks.variables['iTimeStart'][iTrackFilt] = orig_dataTracks.variables['iTimeStart'][iTrackOrig] for i in xrange(orig_nTimesTrack): filt_dataTracks.variables['siteExtr'][iTrackFilt,i] = orig_dataTracks.variables['siteExtr'][iTrackOrig,i] for key in metricKeys: filt_dataTracks.variables[key][iTrackFilt,i] = orig_dataTracks.variables[key][iTrackOrig,i] filt_dataTracks.close() orig_dataTracks.close()