#!/usr/bin/python # Thanks to Dr. Brian Fiedler for the bulk of this program. Used with permission. # Modified by C. Godfrey: 19 January 2009 import sys, cPickle, os # This program converts a text file with delimiters (commas, spaces, or whatever) into a # dictionary of dictionaries, and stores it as a pickle. Some of the defaults # for this program apply to mesonet data files, for example "latest.mdf". # From the Linux command line, you could run this script with: # MDFpickle.py [skiplines delimiter] # where the options in brackets are just that - optional. For example, you could # try: # MDFpickle.py 3 , # to skip three header lines and use a comma delimiter or you could just enter: # MDFpickle.py # for the defaults (2 header lines and spaces for delimiters). #Note that the indentations here are with a TAB, rather than 4 spaces. print "Note that sys.argv[0] always contains the name of the script:",sys.argv[0] #Automatically grab the current observations from the Oklahoma Mesonet. #With this option, we need to delete any existing latest.mdf files. os.system('rm latest.mdf*') os.system("wget http://www.mesonet.org/data/public/mesonet/latest/latest.mdf") filename="latest.mdf" #Alternatively, you could try to get a filename from the command line. #try: # filename=sys.argv[1] #First argument, e.g. 'latest.mdf' # print "First argument was the input filename: ",sys.argv[1] #except: #If nothing from command line, ask user for it # print "Filename was not entered from command line, so" # filename=raw_input("Please enter a filename=>") #Uncomment the lines above if you wish, but you'll need to change n in sys.argv[n] below. #Allow specification of the number of lines to skip in the input file: try: skiplines=int(sys.argv[1]) except: skiplines=2 print "Skipping",skiplines,"header lines" #Allow specification of a delimiter in the input file: try: delimiter=sys.argv[2] print "The delimiter in the input file is: ",delimiter except: delimiter=None #This will be interpreted to mean "white space" print "The delimiter in the input file is white space" #Try to open the input file: fileopen=False while not fileopen: try: infile=open(filename,'r') fileopen=True except: print "Trouble opening ",filename,". Please try another name..." filename=raw_input("Please enter a filename=>") #Process the input file into a dictionary of dictionaries: all=infile.readlines() header="" for n in range(skiplines): header+=all.pop(0) #This is the same as header=header+all.pop(0) columntitles=all.pop(0) #Take and then remove (pop) first line ct=columntitles.split(delimiter) #Store column headings in a list ct.pop(0) #Throw out first item; assume it will be a character string that will be a key h={} #Initialize empty dictionary for line in all: if len(line)<1: print "End of file" break v=line.split(delimiter) #Split line at delimiter key=v.pop(0) #First key is a string, remaining items in v are numbers h[key]={} #The value of the outer dictionary will itself be a dictionary, initialized empty assert len(v)==len(ct) # Check that arrays are same length for i in range(len(ct)): #Fill the inner dictionary h[key][ct[i]]=float(v[i]) #Set inner key equal to measurement type and inner value to the measurement #Done making the dictionary of dictionaries, now inspect it: mainkeys=h.keys() print "\nHere are the keys to the outer dictionary: ",mainkeys samplekey=mainkeys[0] subkeys=h[samplekey].keys() print "\nUsing key",samplekey,", we find an inner dictionary with keys: ", subkeys subkey=subkeys[0] print "\nUsing subkey",subkey,", here is a sample value of the inner dictionary: ",h[samplekey][subkey] print "The data type of", h[samplekey][subkey]," is ",type(h[samplekey][subkey]) #Write a pickle file of the dictionary: outfn=filename+'.pickle' print "\nPickling this dictionary of dictionaries in",outfn try: outfile=open(outfn,'w') cPickle.dump(h,outfile) cPickle.dump(header,outfile) except: print "Uh oh...the program could not write the pickle file." print "Congratulations...The pickle file has been written!"