#!/usr/bin/python # Thanks to Dr. Brian Fiedler for the bulk of this program. Used with permission. # Modified by C. Godfrey: 17 May 2010 # Typical usage: summary.py latest.mdf.pickle # See comments below for "YOUR TURN (PART I)" and "YOUR TURN (PART II)". # These comments describe the revisions that you must make to this script. # When everything works satisfactorily, please post your latest.mdf # file and mdfsummary.txt (or .html) in your password-protected directory # and notify your instructor that you have finished. # Note that "print" will print to your screen, which is useful for testing # and debugging, while "print >>sumfile" will print to your output file # mdfsummary.txt (or mdfsummary.html if you wish). # The indentations here are with a TAB, rather than 4 spaces!! import sys,cPickle,math mval=-989 # Values lower than this indicate missing data. # Try to get a name of the pickle file from the command line: try: filename=sys.argv[1] except: filename=None print " Oops...you forgot to enter the file name at the command line" filename=raw_input("Please enter filename=>") if filename=="": filename="latest.mdf.pickle" #Default # Try to open the pickle file: try: infile=open(filename,'r') print " Opened",filename except: print " Can't open "+filename+"...Aborting" sys.exit() h=cPickle.load(infile) #Try to load the dictionary print " Loaded an object with type:",type(h) print " Is it a dictionary?",type(h)==type({}) header=cPickle.load(infile) #Load next item from dictionary # Open the summary file sumfile=open('mdfsummary.txt','w') ###################################################### ###### Uncomment the following lines if you wish to add HTML to your ###### output file and thus make it pretty. Don't forget to add closing ###### HTML tags at the end of your output and rename the output file ###### with an HTML file extension. #htmlheader=""" # # # # #""" #print >>sumfile,htmlheader ###################################################### # Write the header print >>sumfile,"Summary for Oklahoma Mesonet MDF file:" print >>sumfile,header #Print header saved from mdf file ###################################################### # Report maximum and minimum temperatures and location # Prepare a list of (TAIR,STID) for sorting from low to high temperatures: ###### Reverse the dictionary of dictionaries, making a new dictonary z: keys=h.keys() keys.sort() z={} #Initialize empty dictionary skeys=h[keys[0]].keys() #Get the "sub-keys", i.e., the keys to an inner dictionary for i in skeys: #Use sub-keys as new main-keys z[i]={} for j in keys: #Use main-keys as new sub-keys z[i][j]=h[j][i] ###### Define a function to return a list of all values in a measurement def allmeas(meas): """\nReturn list of all values in a measurement, ordered by the alphabetic order of the key\n""" bundle=meas.items() bundle.sort() return zip(*bundle)[1] ###### Define a function that sorts a dictionary, ignoring missing values def dicsort(d,missingval=None): """\nConverts dictionary to sorted list of (value, key) tuples.\n""" listoftuples=[] for key in d.keys(): if missingval and d[key]>sumfile, "The maximum TAIR is %5.1f C, at %s" % allt[-1] print >>sumfile, "The minimum TAIR is %5.1f C, at %s" % allt[0] ###################################################### ###### #Let's see how the zip function works: listoflists=[[1,2],[3,4]] # Use * to expand the list into separate arguments: print "\n How does zip work?", zip(*listoflists) #Your array should be transposed ###### # Report average of valid temperatures # Use zip to "transpose" allt: nomisst=zip(*allt)[0] #First list is now a list of valid temperatures tmean=sum(nomisst)/len(nomisst) print >>sumfile, "\nAverage of valid TAIR: %5.1f C" % (tmean) ################################################################################ ################################################################################ #******************************************************************************* # YOUR TURN (PART I) # Print to sumfile the net wind direction from all sites. The net wind direction # is the wind direction of the wind vector determined from a vector sum of all of # the observed wind speeds. You will need to account for the magnitude of each # observed wind in your calculation of the net wind direction. Pay careful # attention to how we define wind direction in meteorology. You will probably # need to use some trigonometry, so note that Python's math module uses radians # by default. # # Also print to sumfile the average wind magnitude (simply average the observed # wind speeds at all sites). Note that the resulting average wind speed and the # net wind direction do not necessarily result in a particularly meaningful wind # vector. # # HINT: There are functions defined above (and below) that will make your life # MUCH easier. #Your code goes here: #End your code #******************************************************************************* ################################################################################ ################################################################################ # Define a simple function that gives the number of missing # values in a mesonet station record: def numberMissing(alist): n=0 for i in alist: if i>sumfile,"\nNumber of missing measurements at the top ten least-reliable sites:" for nmiss,site in allm[0:10]: print >>sumfile, site,nmiss # Report percentage of sites missing a particular measurement nsites=len(h.keys()) measmiss=[] skeys.sort() print >>sumfile, "\nPercentage of sites missing a particular measurement:" for key in skeys: fracmiss=1.*numberMissing(z[key].values())/nsites #Note decimal multiplication (1.*) if fracmiss>0: outstring="%4.1f%% of all sites are missing %s" % (100*fracmiss,key) print >>sumfile, outstring measmiss.append ((fracmiss,key)) # Report the least-reliable measurements print >>sumfile, "\nTop six least-reliable measurements:" measmiss.sort() measmiss.reverse() for fracmiss,key in measmiss[0:5]: outstring="%s is missing at %4.1f%% of all sites" % (key,100*fracmiss) print >>sumfile, outstring # Define a function to calculate the correlation of any two variables def corr(a,b,missval): "Correlation of lists of numbers a and b; invalid pairs are excluded" # For information on correlation, see http://www.jerrydallal.com/LHSP/corr.htm if len(a)!=len(b): print "Correlation not possible. Lengths are not the same: "+len(a)+" "+len(b) return None aa=[] #New list for a, including value only when pair a and b are valid bb=[] #New list for b for x,y in zip(a,b): #Include pair where both values are valid if x < missval or y < missval: print " A missing value in this pair:",x,y continue aa.append(x) bb.append(y) am=sum(aa)/len(aa) bm=sum(bb)/len(bb) ssa=0. ssb=0. sab=0. for x,y in zip(aa,bb): ssa+=(x-am)**2 ssb+=(y-bm)**2 sab+=(x-am)*(y-bm) return sab/math.sqrt(ssa*ssb) # Calculate correlation of WSSD and WDSD WSSD_WDSD=corr(allmeas(z['WSSD']),allmeas(z['WDSD']),mval) print >>sumfile, "\nCorrelation of WSSD with WDSD:", WSSD_WDSD ################################################################################ ################################################################################ #******************************************************************************* # YOUR TURN (PART II) # Print a list of correlations for the following four pairs of measurements, # sorted from highest to lowest correlation. Make the output look nice by # including the names of the variables in your output file. You could even # use some HTML to make it pretty when viewed on your Web page, but you would # have to uncomment the lines at the top of this program first. tostudy=[('TAIR','TA9M'),('TAIR','TS10'),('TAIR','RELH'),('WSPD','PRES')] # If you use a daytime set of observations, it would be interesting to see # the correlation between TAIR and SRAD as well. #Your code goes here: #End your code #******************************************************************************* ################################################################################ ################################################################################ sumfile.close() print "\n The file mdfsummary.txt has been written."