#!/usr/bin/python
# Thanks to Dr. Brian Fiedler for the bulk of this program. Used with permission.
# Modified by C. Godfrey: 19 January 2009

import sys, cPickle, os

# This program converts a text file with delimiters (commas, spaces, or whatever) into a
# dictionary of dictionaries, and stores it as a pickle. Some of the defaults
# for this program apply to mesonet data files, for example "latest.mdf".  
# From the Linux command line, you could run this script with:
# MDFpickle.py [skiplines delimiter]
# where the options in brackets are just that - optional.  For example, you could
# try:
# MDFpickle.py 3 ,
# to skip three header lines and use a comma delimiter or you could just enter:
# MDFpickle.py
# for the defaults (2 header lines and spaces for delimiters).

#Note that the indentations here are with a TAB, rather than 4 spaces.

print "Note that sys.argv[0] always contains the name of the script:",sys.argv[0]

#Automatically grab the current observations from the Oklahoma Mesonet.
#With this option, we need to delete any existing latest.mdf files.
os.system('rm latest.mdf*')
os.system("wget http://www.mesonet.org/data/public/mesonet/latest/latest.mdf")
filename="latest.mdf"

#Alternatively, you could try to get a filename from the command line.
#try:
#	filename=sys.argv[1] #First argument, e.g. 'latest.mdf'
#	print "First argument was the input filename: ",sys.argv[1]
#except: #If nothing from command line, ask user for it
#	print "Filename was not entered from command line, so"
#	filename=raw_input("Please enter a filename=>")
#Uncomment the lines above if you wish, but you'll need to change n in sys.argv[n] below.

#Allow specification of the number of lines to skip in the input file:
try:
	skiplines=int(sys.argv[1])
except:
	skiplines=2
print "Skipping",skiplines,"header lines"

#Allow specification of a delimiter in the input file:
try:
	delimiter=sys.argv[2]
	print "The delimiter in the input file is: ",delimiter 
except:
	delimiter=None #This will be interpreted to mean "white space"
	print "The delimiter in the input file is white space"

#Try to open the input file:
fileopen=False
while not fileopen:
	try:
		infile=open(filename,'r')
		fileopen=True
	except:
		print "Trouble opening ",filename,". Please try another name..."
		filename=raw_input("Please enter a filename=>")

#Process the input file into a dictionary of dictionaries:
all=infile.readlines()
header=""
for n in range(skiplines):
	header+=all.pop(0)	#This is the same as header=header+all.pop(0)
columntitles=all.pop(0) #Take and then remove (pop) first line 
ct=columntitles.split(delimiter) #Store column headings in a list
ct.pop(0) #Throw out first item; assume it will be a character string that will be a key 
h={} #Initialize empty dictionary  
for line in all:
	if len(line)<1:
		print "End of file"
		break
	v=line.split(delimiter) #Split line at delimiter 
	key=v.pop(0) #First key is a string, remaining items in v are numbers
	h[key]={} #The value of the outer dictionary will itself be a dictionary, initialized empty
	assert len(v)==len(ct) # Check that arrays are same length
	for i in range(len(ct)): #Fill the inner dictionary
		h[key][ct[i]]=float(v[i]) #Set inner key equal to measurement type and inner value to the measurement 
		
#Done making the dictionary of dictionaries, now inspect it:
mainkeys=h.keys()
print "\nHere are the keys to the outer dictionary: ",mainkeys
samplekey=mainkeys[0]
subkeys=h[samplekey].keys()
print "\nUsing key",samplekey,", we find an inner dictionary with keys: ", subkeys
subkey=subkeys[0]
print "\nUsing subkey",subkey,", here is a sample value of the inner dictionary: ",h[samplekey][subkey] 
print "The data type of", h[samplekey][subkey]," is ",type(h[samplekey][subkey])

#Write a pickle file of the dictionary: 
outfn=filename+'.pickle'
print "\nPickling this dictionary of dictionaries in",outfn
try:
	outfile=open(outfn,'w')
	cPickle.dump(h,outfile)
	cPickle.dump(header,outfile)
except:
	print "Uh oh...the program could not write the pickle file."
print "Congratulations...The pickle file has been written!"