Package translate :: Package storage :: Module csvl10n
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.csvl10n

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2002-2006 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """classes that hold units of comma-separated values (.csv) files (csvunit) 
 23  or entire files (csvfile) for use with localisation 
 24  """ 
 25   
 26  try: 
 27      # try to import the standard csv module, included from Python 2.3 
 28      import csv 
 29  except: 
 30      # if it doesn't work, use our local copy of it... 
 31      from translate.misc import csv 
 32   
 33  from translate.misc import sparse 
 34  from translate.storage import base 
 35   
36 -class SimpleDictReader:
37 - def __init__(self, fileobj, fieldnames):
38 self.fieldnames = fieldnames 39 self.contents = fileobj.read() 40 self.parser = sparse.SimpleParser(defaulttokenlist=[",", "\n"],whitespacechars="\r") 41 self.parser.stringescaping = 0 42 self.parser.quotechars = '"' 43 self.tokens = self.parser.tokenize(self.contents) 44 self.tokenpos = 0
45
46 - def __iter__(self):
47 return self
48
49 - def getvalue(self, value):
50 """returns a value, evaluating strings as neccessary""" 51 if (value.startswith("'") and value.endswith("'")) or (value.startswith('"') and value.endswith('"')): 52 return sparse.stringeval(value) 53 else: 54 return value
55
56 - def next(self):
57 lentokens = len(self.tokens) 58 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n": 59 self.tokenpos += 1 60 if self.tokenpos >= lentokens: 61 raise StopIteration() 62 thistokens = [] 63 while self.tokenpos < lentokens and self.tokens[self.tokenpos] != "\n": 64 thistokens.append(self.tokens[self.tokenpos]) 65 self.tokenpos += 1 66 while self.tokenpos < lentokens and self.tokens[self.tokenpos] == "\n": 67 self.tokenpos += 1 68 fields = [] 69 # patch together fields since we can have quotes inside a field 70 currentfield = '' 71 fieldparts = 0 72 for token in thistokens: 73 if token == ',': 74 # a field is only quoted if the whole thing is quoted 75 if fieldparts == 1: 76 currentfield = self.getvalue(currentfield) 77 fields.append(currentfield) 78 currentfield = '' 79 fieldparts = 0 80 else: 81 currentfield += token 82 fieldparts += 1 83 # things after the last comma... 84 if fieldparts: 85 if fieldparts == 1: 86 currentfield = self.getvalue(currentfield) 87 fields.append(currentfield) 88 values = {} 89 for fieldnum in range(len(self.fieldnames)): 90 if fieldnum >= len(fields): 91 values[self.fieldnames[fieldnum]] = "" 92 else: 93 values[self.fieldnames[fieldnum]] = fields[fieldnum] 94 return values
95
96 -class csvunit(base.TranslationUnit):
97 spreadsheetescapes = [("+", "\\+"), ("-", "\\-"), ("=", "\\="), ("'", "\\'")]
98 - def __init__(self, source=None):
99 super(csvunit, self).__init__(source) 100 self.comment = "" 101 self.source = source 102 self.target = ""
103
104 - def add_spreadsheet_escapes(self, source, target):
105 """add common spreadsheet escapes to two strings""" 106 for unescaped, escaped in self.spreadsheetescapes: 107 if source.startswith(unescaped): 108 source = source.replace(unescaped, escaped, 1) 109 if target.startswith(unescaped): 110 target = target.replace(unescaped, escaped, 1) 111 return source, target
112
113 - def remove_spreadsheet_escapes(self, source, target):
114 """remove common spreadsheet escapes from two strings""" 115 for unescaped, escaped in self.spreadsheetescapes: 116 if source.startswith(escaped): 117 source = source.replace(escaped, unescaped, 1) 118 if target.startswith(escaped): 119 target = target.replace(escaped, unescaped, 1) 120 return source, target
121
122 - def fromdict(self, cedict):
123 self.comment = cedict.get('comment', '').decode('utf-8') 124 self.source = cedict.get('source', '').decode('utf-8') 125 self.target = cedict.get('target', '').decode('utf-8') 126 if self.comment is None: self.comment = '' 127 if self.source is None: self.source = '' 128 if self.target is None: self.target = '' 129 self.source, self.target = self.remove_spreadsheet_escapes(self.source, self.target)
130
131 - def todict(self, encoding='utf-8'):
132 comment, source, target = self.comment, self.source, self.target 133 source, target = self.add_spreadsheet_escapes(source, target) 134 if isinstance(comment, unicode): 135 comment = comment.encode(encoding) 136 if isinstance(source, unicode): 137 source = source.encode(encoding) 138 if isinstance(target, unicode): 139 target = target.encode(encoding) 140 return {'comment':comment, 'source': source, 'target': target}
141
142 -class csvfile(base.TranslationStore):
143 """This class represents a .csv file with various lines. 144 The default format contains three columns: comments, source, target""" 145 UnitClass = csvunit
146 - def __init__(self, inputfile=None, fieldnames=None):
147 base.TranslationStore.__init__(self, unitclass = self.UnitClass) 148 self.units = [] 149 if fieldnames is None: 150 self.fieldnames = ['comment', 'source', 'target'] 151 else: 152 if isinstance(fieldnames, basestring): 153 fieldnames = [fieldname.strip() for fieldname in fieldnames.split(",")] 154 self.fieldnames = fieldnames 155 self.filename = getattr(inputfile, 'name', '') 156 if inputfile is not None: 157 csvsrc = inputfile.read() 158 inputfile.close() 159 self.parse(csvsrc)
160
161 - def parse(self, csvsrc):
162 csvfile = csv.StringIO(csvsrc) 163 reader = SimpleDictReader(csvfile, self.fieldnames) 164 for row in reader: 165 newce = self.UnitClass() 166 newce.fromdict(row) 167 self.addunit(newce)
168
169 - def __str__(self):
170 """convert to a string. double check that unicode is handled somehow here""" 171 source = self.getoutput() 172 if isinstance(source, unicode): 173 return source.encode(getattr(self, "encoding", "UTF-8")) 174 return source
175
176 - def getoutput(self):
177 csvfile = csv.StringIO() 178 writer = csv.DictWriter(csvfile, self.fieldnames) 179 for ce in self.units: 180 cedict = ce.todict() 181 writer.writerow(cedict) 182 csvfile.reset() 183 return "".join(csvfile.readlines())
184 185 186 if __name__ == '__main__': 187 import sys 188 cf = csvfile() 189 cf.parse(sys.stdin.read()) 190 sys.stdout.write(str(cf)) 191