#!/usr/bin/python
# Thanks to Dr. Brian Fiedler for the bulk of this program. Used with permission.
# Modified by C. Godfrey: 17 May 2010
# Typical usage: summary.py latest.mdf.pickle
# See comments below for "YOUR TURN (PART I)" and "YOUR TURN (PART II)".
# These comments describe the revisions that you must make to this script.
# When everything works satisfactorily, please post your latest.mdf
# file and mdfsummary.txt (or .html) in your password-protected directory
# and notify your instructor that you have finished.
# Note that "print" will print to your screen, which is useful for testing
# and debugging, while "print >>sumfile" will print to your output file
# mdfsummary.txt (or mdfsummary.html if you wish).
# The indentations here are with a TAB, rather than 4 spaces!!
import sys,cPickle,math
mval=-989 # Values lower than this indicate missing data.
# Try to get a name of the pickle file from the command line:
try:
filename=sys.argv[1]
except:
filename=None
print " Oops...you forgot to enter the file name at the command line"
filename=raw_input("Please enter filename=>")
if filename=="": filename="latest.mdf.pickle" #Default
# Try to open the pickle file:
try:
infile=open(filename,'r')
print " Opened",filename
except:
print " Can't open "+filename+"...Aborting"
sys.exit()
h=cPickle.load(infile) #Try to load the dictionary
print " Loaded an object with type:",type(h)
print " Is it a dictionary?",type(h)==type({})
header=cPickle.load(infile) #Load next item from dictionary
# Open the summary file
sumfile=open('mdfsummary.txt','w')
######################################################
###### Uncomment the following lines if you wish to add HTML to your
###### output file and thus make it pretty. Don't forget to add closing
###### HTML tags at the end of your output and rename the output file
###### with an HTML file extension.
#htmlheader="""
#
#
#
#
#"""
#print >>sumfile,htmlheader
######################################################
# Write the header
print >>sumfile,"Summary for Oklahoma Mesonet MDF file:"
print >>sumfile,header #Print header saved from mdf file
######################################################
# Report maximum and minimum temperatures and location
# Prepare a list of (TAIR,STID) for sorting from low to high temperatures:
###### Reverse the dictionary of dictionaries, making a new dictonary z:
keys=h.keys()
keys.sort()
z={} #Initialize empty dictionary
skeys=h[keys[0]].keys() #Get the "sub-keys", i.e., the keys to an inner dictionary
for i in skeys: #Use sub-keys as new main-keys
z[i]={}
for j in keys: #Use main-keys as new sub-keys
z[i][j]=h[j][i]
###### Define a function to return a list of all values in a measurement
def allmeas(meas):
"""\nReturn list of all values in a measurement, ordered by the alphabetic order of the key\n"""
bundle=meas.items()
bundle.sort()
return zip(*bundle)[1]
###### Define a function that sorts a dictionary, ignoring missing values
def dicsort(d,missingval=None):
"""\nConverts dictionary to sorted list of (value, key) tuples.\n"""
listoftuples=[]
for key in d.keys():
if missingval and d[key]>sumfile, "The maximum TAIR is %5.1f C, at %s" % allt[-1]
print >>sumfile, "The minimum TAIR is %5.1f C, at %s" % allt[0]
######################################################
######
#Let's see how the zip function works:
listoflists=[[1,2],[3,4]]
# Use * to expand the list into separate arguments:
print "\n How does zip work?", zip(*listoflists) #Your array should be transposed
######
# Report average of valid temperatures
# Use zip to "transpose" allt:
nomisst=zip(*allt)[0] #First list is now a list of valid temperatures
tmean=sum(nomisst)/len(nomisst)
print >>sumfile, "\nAverage of valid TAIR: %5.1f C" % (tmean)
################################################################################
################################################################################
#*******************************************************************************
# YOUR TURN (PART I)
# Print to sumfile the net wind direction from all sites. The net wind direction
# is the wind direction of the wind vector determined from a vector sum of all of
# the observed wind speeds. You will need to account for the magnitude of each
# observed wind in your calculation of the net wind direction. Pay careful
# attention to how we define wind direction in meteorology. You will probably
# need to use some trigonometry, so note that Python's math module uses radians
# by default.
#
# Also print to sumfile the average wind magnitude (simply average the observed
# wind speeds at all sites). Note that the resulting average wind speed and the
# net wind direction do not necessarily result in a particularly meaningful wind
# vector.
#
# HINT: There are functions defined above (and below) that will make your life
# MUCH easier.
#Your code goes here:
#End your code
#*******************************************************************************
################################################################################
################################################################################
# Define a simple function that gives the number of missing
# values in a mesonet station record:
def numberMissing(alist):
n=0
for i in alist:
if i>sumfile,"\nNumber of missing measurements at the top ten least-reliable sites:"
for nmiss,site in allm[0:10]:
print >>sumfile, site,nmiss
# Report percentage of sites missing a particular measurement
nsites=len(h.keys())
measmiss=[]
skeys.sort()
print >>sumfile, "\nPercentage of sites missing a particular measurement:"
for key in skeys:
fracmiss=1.*numberMissing(z[key].values())/nsites #Note decimal multiplication (1.*)
if fracmiss>0:
outstring="%4.1f%% of all sites are missing %s" % (100*fracmiss,key)
print >>sumfile, outstring
measmiss.append ((fracmiss,key))
# Report the least-reliable measurements
print >>sumfile, "\nTop six least-reliable measurements:"
measmiss.sort()
measmiss.reverse()
for fracmiss,key in measmiss[0:5]:
outstring="%s is missing at %4.1f%% of all sites" % (key,100*fracmiss)
print >>sumfile, outstring
# Define a function to calculate the correlation of any two variables
def corr(a,b,missval):
"Correlation of lists of numbers a and b; invalid pairs are excluded"
# For information on correlation, see http://www.jerrydallal.com/LHSP/corr.htm
if len(a)!=len(b):
print "Correlation not possible. Lengths are not the same: "+len(a)+" "+len(b)
return None
aa=[] #New list for a, including value only when pair a and b are valid
bb=[] #New list for b
for x,y in zip(a,b): #Include pair where both values are valid
if x < missval or y < missval:
print " A missing value in this pair:",x,y
continue
aa.append(x)
bb.append(y)
am=sum(aa)/len(aa)
bm=sum(bb)/len(bb)
ssa=0.
ssb=0.
sab=0.
for x,y in zip(aa,bb):
ssa+=(x-am)**2
ssb+=(y-bm)**2
sab+=(x-am)*(y-bm)
return sab/math.sqrt(ssa*ssb)
# Calculate correlation of WSSD and WDSD
WSSD_WDSD=corr(allmeas(z['WSSD']),allmeas(z['WDSD']),mval)
print >>sumfile, "\nCorrelation of WSSD with WDSD:", WSSD_WDSD
################################################################################
################################################################################
#*******************************************************************************
# YOUR TURN (PART II)
# Print a list of correlations for the following four pairs of measurements,
# sorted from highest to lowest correlation. Make the output look nice by
# including the names of the variables in your output file. You could even
# use some HTML to make it pretty when viewed on your Web page, but you would
# have to uncomment the lines at the top of this program first.
tostudy=[('TAIR','TA9M'),('TAIR','TS10'),('TAIR','RELH'),('WSPD','PRES')]
# If you use a daytime set of observations, it would be interesting to see
# the correlation between TAIR and SRAD as well.
#Your code goes here:
#End your code
#*******************************************************************************
################################################################################
################################################################################
sumfile.close()
print "\n The file mdfsummary.txt has been written."