changed from email address, import email addresses to eq_elder, flush temp files...
[eq/.git] / mls / mlstrimmer.py
1 #! /usr/bin/env python
2
3 import os
4 import sys
5 import csv
6 import zipfile
7 import tempfile
8 from optparse import OptionParser
9 from xml.dom.minidom import parse, parseString
10
11 def ParseFile(xmlNode, zipfile):
12         
13         filename = os.path.join(options.directory, xmlNode.getAttribute("Name"))
14         if options.verbose: print "Trimming %s" % (filename)
15         
16         #create dictionary of header locations
17         headers = xmlNode.getElementsByTagName("MLSField")
18         allColumns = False
19         headerMap = {}
20         for header in headers:
21                 if header.firstChild.data == "*":
22                         allColumns = True
23                 else:
24                         headerMap[header.firstChild.data] = -1
25         if options.verbose: print "xml file says to save: %s" % (headerMap)
26         
27         try:
28                 f = csv.reader(open(filename))
29                 firstLineRead = False
30                 
31                 tf = tempfile.NamedTemporaryFile()
32                 
33                 headersToSave = []
34                 for line in f:
35                         if not firstLineRead:
36                                 s1 = set(item for item in line if item != '')
37                                 s2 = set(headerMap.keys())
38                                 if allColumns:
39                                         s2 = s1
40                                 
41                                 s3 = s1.intersection(s2)
42                                 
43                                 headerMap.clear()
44                                 for i in s3:
45                                         headerMap[i] = line.index(i)
46                                 headersToSave = headerMap.keys()
47                                 if options.verbose: print "The columns that exist in both the xml and csv files are:" % (headersToSave)
48                                 firstLineRead = True
49                         
50                         lineToWrite = ""
51                         for h in headersToSave:
52                                 lineToWrite = "%s\"%s\"," % (lineToWrite, line[headerMap[h]])
53                         if options.verbose: print lineToWrite
54                         tf.write(lineToWrite + "\n")
55                         #if options.verbose: print line
56                 tf.flush()
57                 zipfile.write(tf.name, xmlNode.getAttribute("Name"))
58                 tf.close()
59                 
60         except IOError, e:
61                 print e
62                 sys.exit()
63
64
65
66 if __name__ == '__main__':
67         # read in command line arguments and parse them
68         parser = OptionParser()
69         parser.add_option("-c", dest="configFile", default="MLSRequiredFields.xml",
70                 help="xml config file name")
71         parser.add_option("-d", "--directory", dest="directory", default=os.environ["PWD"],
72                 help="directory where csv files are located")
73         parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
74                 help="show verbose messaging")
75         (options, args) = parser.parse_args()
76
77         # make sure xml file exists and we can read it
78         if not os.access(options.configFile, os.R_OK):
79                 print "%s is not accessible" % (options.configFile)
80                 sys.exit()
81         # make sure we have write perms on the directory
82         if not os.access(options.directory, os.W_OK):
83                 print "%s is not accessible" % (options.directory)
84                 sys.exit()
85         
86         # read in the xml
87         dom1 = parse(options.configFile)
88         mlsFiles = dom1.firstChild.childNodes[1].getElementsByTagName("MLSFile")
89         
90         # make sure all files are accessible (if they are not optional)
91         for csvFile in mlsFiles:
92                 fullname = os.path.join(options.directory, csvFile.getAttribute("Name"))
93                 if not os.access(fullname, os.W_OK):
94                         print "%s does not exist, or has the wrong permissions" % (fullname)
95                         sys.exit()
96                 
97         
98         # create zip file
99         try:
100                 zfile = zipfile.ZipFile(os.path.join(options.directory, "EQZipFile.zip"), "w")
101                 for csvFile in mlsFiles:
102                         ParseFile(csvFile, zfile)
103                 zfile.close()
104         except IOError, e:
105                 print e
106                 sys.exit()
107         
108         # delete original files
109         for csvFile in mlsFiles:
110                 fullname = os.path.join(options.directory, csvFile.getAttribute("Name"))
111                 try:
112                         os.remove(fullname)
113                 except OSError, e:
114                         print "could not delete %s, please delete it manually" % (fullname)
115         
116
117         if options.verbose: print "Finished!"