Updated "Phone 1" field to be "Household Phone" to match MLS
[eq/.git] / mls / mlstrimmer.py
1 #! /usr/bin/env python
2
3 import os
4 import sys
5 import csv
6 import zipfile
7 import tempfile
8 from optparse import OptionParser
9 from xml.dom.minidom import parse, parseString
10
11 def ParseFile(xmlNode, zipfile):
12         
13         filename = os.path.join(options.directory, xmlNode.getAttribute("Name"))
14         if options.verbose: print "Trimming %s" % (filename)
15         
16         #create dictionary of header locations
17         headers = xmlNode.getElementsByTagName("MLSField")
18         allColumns = False
19         headerMap = {}
20         for header in headers:
21                 if header.firstChild.data == "*":
22                         allColumns = True
23                 else:
24                         headerMap[header.firstChild.data] = -1
25         if options.verbose: print "xml file says to save: %s" % (headerMap)
26         
27         try:
28                 f = csv.reader(open(filename))
29                 firstLineRead = False
30                 
31                 tf = tempfile.NamedTemporaryFile()
32                 
33                 headersToSave = []
34                 for line in f:
35                         if not firstLineRead:
36                                 s1 = set(item for item in line if item != '')
37                                 s2 = set(headerMap.keys())
38                                 if allColumns:
39                                         s2 = s1
40                                 
41                                 s3 = s1.intersection(s2)
42                                 
43                                 headerMap.clear()
44                                 for i in s3:
45                                         headerMap[i] = line.index(i)
46                                 headersToSave = headerMap.keys()
47                                 if options.verbose: print "The columns that exist in both the xml and csv files are:" % (headersToSave)
48                                 firstLineRead = True
49                         
50                         lineToWrite = ""
51                         for h in headersToSave:
52                                 lineToWrite = "%s\"%s\"," % (lineToWrite, line[headerMap[h]])
53                         if options.verbose: print lineToWrite
54                         tf.write(lineToWrite + "\n")
55                         #if options.verbose: print line
56                 zipfile.write(tf.name, xmlNode.getAttribute("Name"))
57                 tf.close()
58                 
59         except IOError, e:
60                 print e
61                 sys.exit()
62
63
64
65 if __name__ == '__main__':
66         # read in command line arguments and parse them
67         parser = OptionParser()
68         parser.add_option("-c", dest="configFile", default="MLSRequiredFields.xml",
69                 help="xml config file name")
70         parser.add_option("-d", "--directory", dest="directory", default=os.environ["PWD"],
71                 help="directory where csv files are located")
72         parser.add_option("-v", "--verbose", dest="verbose", action="store_true",
73                 help="show verbose messaging")
74         (options, args) = parser.parse_args()
75
76         # make sure xml file exists and we can read it
77         if not os.access(options.configFile, os.R_OK):
78                 print "%s is not accessible" % (options.configFile)
79                 sys.exit()
80         # make sure we have write perms on the directory
81         if not os.access(options.directory, os.W_OK):
82                 print "%s is not accessible" % (options.directory)
83                 sys.exit()
84         
85         # read in the xml
86         dom1 = parse(options.configFile)
87         mlsFiles = dom1.firstChild.childNodes[1].getElementsByTagName("MLSFile")
88         
89         # make sure all files are accessible (if they are not optional)
90         for csvFile in mlsFiles:
91                 fullname = os.path.join(options.directory, csvFile.getAttribute("Name"))
92                 if not os.access(fullname, os.W_OK):
93                         print "%s does not exist, or has the wrong permissions" % (fullname)
94                         sys.exit()
95                 
96         
97         # create zip file
98         try:
99                 zfile = zipfile.ZipFile(os.path.join(options.directory, "EQZipFile.zip"), "w")
100                 for csvFile in mlsFiles:
101                         ParseFile(csvFile, zfile)
102                 zfile.close()
103         except IOError, e:
104                 print e
105                 sys.exit()
106         
107         # delete original files
108         for csvFile in mlsFiles:
109                 fullname = os.path.join(options.directory, csvFile.getAttribute("Name"))
110                 try:
111                         os.remove(fullname)
112                 except OSError, e:
113                         print "could not delete %s, please delete it manually" % (fullname)
114         
115
116         if options.verbose: print "Finished!"