#!/usr/bin/env python import zipfile, tempfile, xml.dom.minidom, datetime, sys, os.path, re monthTable = { 'jan' : 1, 'feb' : 2, 'mar' : 3, 'apr' : 4, 'may' : 5, 'jun' : 6, 'jul' : 7, 'aug' : 8, 'sep' : 9, 'oct' : 10, 'nov' : 11, 'dec' : 12 } def createManifest(): '''Create the manifest.xml file''' doc = xml.dom.minidom.Document() root = doc.createElementNS('urn:oasis:names:tc:opendocument:xmlns:manifest:1.0', 'manifest:manifest') doc.appendChild(root) def cfe(path, mediaType = 'text/xml'): '''Create and add a single file-entry''' el = doc.createElement('manifest:file-entry') el.setAttribute('manifest:media-type', mediaType) el.setAttribute('manifest:full-path', path) root.appendChild(el) cfe('/', 'application/vnd.oasis.opendocument.spreadsheet') cfe('content.xml') cfe('styles.xml') cfe('meta.xml') cfe('settings.xml') return doc.toprettyxml(' ') def rootElement(doc, str): '''Create a root element, like that which is present in each of the top-level sub-document xml files.''' root = doc.createElement(str) root.setAttribute('office:version', '1.0') ns = { 'office' : 'urn:oasis:names:tc:opendocument:xmlns:office:1.0', 'style' : 'urn:oasis:names:tc:opendocument:xmlns:style:1.0', 'text' : 'urn:oasis:names:tc:opendocument:xmlns:text:1.0', 'table' : 'urn:oasis:names:tc:opendocument:xmlns:table:1.0', 'meta' : 'urn:oasis:names:tc:opendocument:xmlns:meta:1.0', 'number' : 'urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0' } for (a, b) in ns.iteritems(): root.setAttribute('xmlns:' + a, b) doc.appendChild(root) return root def createMeta(): '''Create the meta.xml sub-document.''' doc = xml.dom.minidom.Document() root = rootElement(doc, 'office:document-meta') meta = doc.createElement('office:meta') root.appendChild(meta) generator = doc.createElement('meta:generator') meta.appendChild(generator) genText = doc.createTextNode("csv2ods.py/0.1") generator.appendChild(genText) date = doc.createElement('meta:creation-date') meta.appendChild(date); datetext = doc.createTextNode(datetime.datetime.now().isoformat()) date.appendChild(datetext) return doc.toprettyxml(' ') def createSettings(): '''Create the settings.xml sub-document.''' doc = xml.dom.minidom.Document() root = rootElement(doc, 'office:document-settings') settings = doc.createElement('office:settings') root.appendChild(settings) return doc.toprettyxml(' ') def createStyles(): '''Create the styles.xml sub-document.''' doc = xml.dom.minidom.Document() root = rootElement(doc, 'office:document-styles') styles = doc.createElement('office:styles') root.appendChild(styles) return doc.toprettyxml(' ') def createContent(csvFile): '''Create the content.xml sub-document.''' # Read in the CSV, separating it as we go. data = [ ] for i in csvFile: data += [ i.strip().split(',') ] doc = xml.dom.minidom.Document() root = rootElement(doc, 'office:document-content') autostyles = doc.createElement('office:automatic-styles') root.appendChild(autostyles) date = doc.createElement('number:date-style') date.setAttribute('style:name', 'ISODate') autostyles.appendChild(date) def appendField(date, fieldType): field = doc.createElement('number:%s' % fieldType) field.setAttribute('number:style', 'long') date.appendChild(field) def appendDash(date): f1 = doc.createElement('number:text') t1 = doc.createTextNode('-') f1.appendChild(t1) date.appendChild(f1) appendField(date, 'year') appendDash(date) appendField(date, 'month') appendDash(date) appendField(date, 'day') datestyle = doc.createElement('style:style') autostyles.appendChild(datestyle) datestyle.setAttribute('style:name', 'date') datestyle.setAttribute('style:family', 'table-cell') datestyle.setAttribute('style:parent-style-name', 'Default') datestyle.setAttribute('style:data-style-name', 'ISODate') body = doc.createElement('office:body') root.appendChild(body) spreadsheet = doc.createElement('office:spreadsheet') body.appendChild(spreadsheet) table = doc.createElement('table:table') def prettyName(filename): return os.path.basename(filename).replace('.', '_') table.setAttribute('table:name', prettyName(csvFile.name)) spreadsheet.appendChild(table) def dequote(x): '''If the item is quoted, remove them.''' if (x[0] == '"' and x[-1] == '"') or (x[0] == "'" and x[-1] == "'"): return x[1:len(x) - 1] return x def cellType(x): '''Heuristically guess what type the cell should be.''' v = dequote(x) try: n = float(v) return 'float' except ValueError: pass if v[-1] == '%': try: n = float(v[:len(v) - 1]) return 'percentage' except ValueError: pass if (re.match(r"\d+-\w\w\w-\d\d", v) or re.match(r"\d+/\d+/\d\d", v)): return 'date' return 'string' def cellValueType(x): '''Translate cell types into the appropriate attribute key.''' g = cellType(x) if g == 'float': return 'office:value' elif g == 'percentage': return 'office:value' elif g == 'string': return 'office:string-value' elif g == 'date': return 'office:date-value' def cellValue(x): '''Return the value to insert into the cell.''' v = dequote(x) if cellType(x) == 'date': g = re.match(r"^(\d+)/(\d+)/(\d\d\d\d)$", v) if not g is None: m = int(g.group(1)) d = int(g.group(2)) return "%s-%02d-%02d" % (g.group(3), m, d) g = re.match(r"(\d+)-(\w\w\w)-(\d\d)", v) if not g is None: d = int(g.group(1)) y = int(g.group(3)) if y < 60: y = y + 2000 else: y = y + 1900 m = monthTable[g.group(2).lower()] return "%04d-%02d-%02d" % (y, m, d) return v for i in data: row = doc.createElement('table:table-row') table.appendChild(row) for j in i: cell = doc.createElement('table:table-cell') row.appendChild(cell) cell.setAttribute('office:value-type', cellType(j)) cell.setAttribute(cellValueType(j), cellValue(j)) if cellType(j) == 'date': cell.setAttribute('table:style-name', 'date') ranges = doc.createElement('table:database-ranges') spreadsheet.appendChild(ranges) r = doc.createElement('table:database-range') ranges.appendChild(r) r.setAttribute('table:name', 'EntireSheet') r.setAttribute('table:target-range-address', '%s.A1:%s.IV65536' % (prettyName(csvFile.name), prettyName(csvFile.name))) return doc.toxml() def csv2ods(csvFile, odsFile): '''Given a file objective containing comma-separated values, write an equivalent OpenDocument Spreadsheet to odsFile.''' files = { } files['META-INF/manifest.xml'] = createManifest() files['meta.xml'] = createMeta() files['mimetype'] = 'application/vnd.oasis.opendocument.spreadsheet' files['settings.xml'] = createSettings() files['styles.xml'] = createStyles() files['content.xml'] = createContent(csvFile) outZip = zipfile.ZipFile(odsFile, 'w', compression = zipfile.ZIP_DEFLATED) for (file, str) in files.iteritems(): tmpfile = tempfile.NamedTemporaryFile() tmpfile.write(str) tmpfile.flush() outZip.write(tmpfile.name, file) tmpfile.close() outZip.close() if __name__ == '__main__': infile = sys.argv[1] outfile = sys.argv[2] f = open(outfile, 'w') csv2ods(open(infile), f)