1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 """Module to provide a cache of statistics in a database.
23
24 @organization: Zuza Software Foundation
25 @copyright: 2007 Zuza Software Foundation
26 @license: U{GPL <http://www.fsf.org/licensing/licenses/gpl.html>}
27 """
28
29 from translate import __version__ as toolkitversion
30 from translate.storage import factory
31 from translate.misc.multistring import multistring
32 from translate.lang.common import Common
33
34 try:
35 from sqlite3 import dbapi2
36 except ImportError:
37 from pysqlite2 import dbapi2
38 import os.path
39 import re
40 import sys
41
42 kdepluralre = re.compile("^_n: ")
43 brtagre = re.compile("<br\s*?/?>")
44 xmltagre = re.compile("<[^>]+>")
45 numberre = re.compile("\\D\\.\\D")
46
47 state_strings = {0: "untranslated", 1: "translated", 2: "fuzzy"}
48
58
60 """Counts the words in the unit's source and target, taking plurals into
61 account. The target words are only counted if the unit is translated."""
62 (sourcewords, targetwords) = (0, 0)
63 if isinstance(unit.source, multistring):
64 sourcestrings = unit.source.strings
65 else:
66 sourcestrings = [unit.source or ""]
67 for s in sourcestrings:
68 sourcewords += wordcount(s)
69 if not unit.istranslated():
70 return sourcewords, targetwords
71 if isinstance(unit.target, multistring):
72 targetstrings = unit.target.strings
73 else:
74 targetstrings = [unit.target or ""]
75 for s in targetstrings:
76 targetwords += wordcount(s)
77 return sourcewords, targetwords
78
80 """Returns the numeric database state for the unit."""
81 if unit.istranslated():
82 return 1
83 if unit.isfuzzy() and unit.target:
84 return 2
85 return 0
86
88 """Returns a dictionary with all statistics initalised to 0."""
89 stats = {}
90 for state in ["total", "translated", "fuzzy", "untranslated", "review"]:
91 stats[state] = 0
92 stats[state + "sourcewords"] = 0
93 stats[state + "targetwords"] = 0
94 return stats
95
97 """Provides the filename of the associated file containing suggestions and
98 its mtime, if it exists."""
99 root, ext = os.path.splitext(filename)
100 suggestion_filename = None
101 suggestion_mtime = -1
102 if ext == os.path.extsep + "po":
103
104
105
106 suggestion_filename = filename + os.path.extsep + 'pending'
107 if not os.path.exists(suggestion_filename):
108 suggestion_filename = None
109 else:
110 suggestion_mtime = os.path.getmtime(suggestion_filename)
111 return suggestion_filename, suggestion_mtime
112
114 """An object instantiated as a singleton for each statsfile that provides
115 access to the database cache from a pool of StatsCache objects."""
116 caches = {}
117 defaultfile = None
118 con = None
119 """This cache's connection"""
120 cur = None
121 """The current cursor"""
122
124 if not statsfile:
125 if not cls.defaultfile:
126 userdir = os.path.expanduser("~")
127 cachedir = None
128 if os.name == "nt":
129 cachedir = os.path.join(userdir, "Translate Toolkit")
130 else:
131 cachedir = os.path.join(userdir, ".translate_toolkit")
132 if not os.path.exists(cachedir):
133 os.mkdir(cachedir)
134 cls.defaultfile = os.path.realpath(os.path.join(cachedir, "stats.db"))
135 statsfile = cls.defaultfile
136 else:
137 statsfile = os.path.realpath(statsfile)
138
139 if statsfile in cls.caches:
140 return cls.caches[statsfile]
141
142 cache = cls.caches[statsfile] = object.__new__(cls)
143 cache.con = dbapi2.connect(statsfile)
144 cache.cur = cache.con.cursor()
145 cache.create()
146 return cache
147
149 """Create all tables and indexes."""
150 self.cur.execute("""CREATE TABLE IF NOT EXISTS files(
151 fileid INTEGER PRIMARY KEY AUTOINCREMENT,
152 path VARCHAR NOT NULL UNIQUE,
153 mtime INTEGER NOT NULL,
154 toolkitbuild INTEGER NOT NULL);""")
155
156 self.cur.execute("""CREATE UNIQUE INDEX IF NOT EXISTS filepathindex
157 ON files (path);""")
158
159 self.cur.execute("""CREATE TABLE IF NOT EXISTS units(
160 id INTEGER PRIMARY KEY AUTOINCREMENT,
161 unitid VARCHAR NOT NULL,
162 fileid INTEGER NOT NULL,
163 unitindex INTEGER NOT NULL,
164 source VARCHAR NOT NULL,
165 target VARCHAR,
166 state INTEGER,
167 sourcewords INTEGER,
168 targetwords INTEGER);""")
169
170 self.cur.execute("""CREATE INDEX IF NOT EXISTS fileidindex
171 ON units(fileid);""")
172
173 self.cur.execute("""CREATE TABLE IF NOT EXISTS checkerconfigs(
174 configid INTEGER PRIMARY KEY AUTOINCREMENT,
175 config VARCHAR);""")
176
177 self.cur.execute("""CREATE INDEX IF NOT EXISTS configindex
178 ON checkerconfigs(config);""")
179
180 self.cur.execute("""CREATE TABLE IF NOT EXISTS uniterrors(
181 errorid INTEGER PRIMARY KEY AUTOINCREMENT,
182 unitindex INTEGER NOT NULL,
183 fileid INTEGER NOT NULL,
184 configid INTEGER NOT NULL,
185 name VARCHAR NOT NULL,
186 message VARCHAR);""")
187
188 self.cur.execute("""CREATE INDEX IF NOT EXISTS uniterrorindex
189 ON uniterrors(fileid, configid);""")
190
191 self.con.commit()
192
194 """Attempt to find the fileid of the given file, if it hasn't been
195 updated since the last record update.
196
197 None is returned if either the file's record is not found, or if it is
198 not up to date.
199
200 @param filename: the filename to retrieve the id for
201 @param optmtime: an optional mtime to consider in addition to the mtime of
202 the given file
203 @rtype: String or None
204 """
205 realpath = os.path.realpath(filename)
206 self.cur.execute("""SELECT fileid, mtime FROM files
207 WHERE path=?;""", (realpath,))
208 filerow = self.cur.fetchone()
209 mtime = max(optmtime, os.path.getmtime(realpath))
210 if checkmtime:
211 if not filerow or filerow[1] != mtime:
212 return None
213 if filerow:
214 fileid = filerow[0]
215 if not checkmtime:
216
217 self.cur.execute("""UPDATE files
218 SET mtime=?
219 WHERE fileid=?;""", (mtime, fileid))
220 return fileid
221 return None
222
224 """See if this checker configuration has been used before."""
225 config = str(checker.config.__dict__)
226 self.cur.execute("""SELECT configid, config FROM checkerconfigs WHERE
227 config=?;""", (config,))
228 configrow = self.cur.fetchone()
229 if not configrow or configrow[1] != config:
230 return None
231 else:
232 return configrow[0]
233
235 """Cache the statistics for the supplied unit(s)."""
236 unitvalues = []
237 for index, unit in enumerate(units):
238 if unit.istranslatable():
239 sourcewords, targetwords = wordsinunit(unit)
240 if unitindex:
241 index = unitindex
242
243 unitvalues.append((unit.getid(), fileid, index, \
244 unit.source, unit.target, \
245 sourcewords, targetwords, \
246 statefordb(unit)))
247
248 self.cur.executemany("""INSERT INTO units
249 (unitid, fileid, unitindex, source, target, sourcewords, targetwords, state)
250 values (?, ?, ?, ?, ?, ?, ?, ?);""",
251 unitvalues)
252 self.con.commit()
253 if unitindex:
254 return state_strings[statefordb(units[0])]
255 return ""
256
258 """Calculates and caches the statistics of the given store
259 unconditionally."""
260 realpath = os.path.realpath(store.filename)
261 mtime = os.path.getmtime(realpath)
262 self.cur.execute("""DELETE FROM files WHERE
263 path=?;""", (realpath,))
264 self.cur.execute("""INSERT INTO files
265 (fileid, path, mtime, toolkitbuild) values (NULL, ?, ?, ?);""",
266 (realpath, mtime, toolkitversion.build))
267 fileid = self.cur.lastrowid
268 self.cur.execute("""DELETE FROM units WHERE
269 fileid=?""", (fileid,))
270 self._cacheunitstats(store.units, fileid)
271 return fileid
272
274 """Retrieves the stored statistics for a given directory, all summed.
275
276 Note that this does not check for mtimes or the presence of files."""
277 realpath = os.path.realpath(dirname)
278 self.cur.execute("""SELECT
279 state,
280 count(unitid) as total,
281 sum(sourcewords) as sourcewords,
282 sum(targetwords) as targetwords
283 FROM units WHERE fileid IN
284 (SELECT fileid from files
285 WHERE substr(path, 0, ?)=?)
286 GROUP BY state;""", (len(realpath), realpath))
287 totals = emptystats()
288 return self.cur.fetchall()
289
291 """Retrieves the statistics for the given file if possible, otherwise
292 delegates to cachestore()."""
293 fileid = self._getstoredfileid(filename)
294 if not fileid:
295 try:
296 store = factory.getobject(filename)
297 fileid = self.cachestore(store)
298 except ValueError, e:
299 print >> sys.stderr, str(e)
300 return {}
301
302 self.cur.execute("""SELECT
303 state,
304 count(unitid) as total,
305 sum(sourcewords) as sourcewords,
306 sum(targetwords) as targetwords
307 FROM units WHERE fileid=?
308 GROUP BY state;""", (fileid,))
309 values = self.cur.fetchall()
310
311 totals = emptystats()
312 for stateset in values:
313 state = state_strings[stateset[0]]
314 totals[state] = stateset[1] or 0
315 totals[state + "sourcewords"] = stateset[2]
316 totals[state + "targetwords"] = stateset[3]
317 totals["total"] = totals["untranslated"] + totals["translated"] + totals["fuzzy"]
318 totals["totalsourcewords"] = totals["untranslatedsourcewords"] + \
319 totals["translatedsourcewords"] + \
320 totals["fuzzysourcewords"]
321 return totals
322
324 """Helper method for cachestorechecks() and recacheunit()"""
325
326
327 dummy = (-1, fileid, configid, "noerror", "")
328 unitvalues = [dummy]
329
330 errornames = []
331 for index, unit in enumerate(units):
332 if unit.istranslatable():
333
334 if unitindex:
335 index = unitindex
336 failures = checker.run_filters(unit)
337 for checkname, checkmessage in failures.iteritems():
338 unitvalues.append((index, fileid, configid, checkname, checkmessage))
339 errornames.append("check-" + checkname)
340 checker.setsuggestionstore(None)
341
342
343 if unitindex:
344
345
346 unitvalues.remove(dummy)
347 errornames.append("total")
348
349
350 self.cur.executemany("""INSERT INTO uniterrors
351 (unitindex, fileid, configid, name, message)
352 values (?, ?, ?, ?, ?);""",
353 unitvalues)
354 self.con.commit()
355 return errornames
356
358 """Calculates and caches the error statistics of the given store
359 unconditionally."""
360
361
362 self.cur.execute("""DELETE FROM uniterrors WHERE
363 fileid=?;""", (fileid,))
364 self._cacheunitschecks(store.units, fileid, configid, checker)
365 return fileid
366
368 """Recalculate all information for a specific unit. This is necessary
369 for updating all statistics when a translation of a unit took place,
370 for example.
371
372 This method assumes that everything was up to date before (file totals,
373 checks, checker config, etc."""
374 suggestion_filename, suggestion_mtime = suggestioninfo(filename)
375 fileid = self._getstoredfileid(filename, suggestion_mtime, checkmtime=False)
376 configid = self._getstoredcheckerconfig(checker)
377 unitid = unit.getid()
378
379 self.cur.execute("""SELECT unitindex FROM units WHERE
380 fileid=? AND unitid=?;""", (fileid, unitid))
381 unitindex = self.cur.fetchone()[0]
382 self.cur.execute("""DELETE FROM units WHERE
383 fileid=? AND unitid=?;""", (fileid, unitid))
384 state = [self._cacheunitstats([unit], fileid, unitindex)]
385
386 self.cur.execute("""DELETE FROM uniterrors WHERE
387 fileid=? AND unitindex=?;""", (fileid, unitindex))
388 if suggestion_filename:
389 checker.setsuggestionstore(factory.getobject(suggestion_filename, ignore=os.path.extsep+ 'pending'))
390 state.extend(self._cacheunitschecks([unit], fileid, configid, checker, unitindex))
391 return state
392
393 - def filechecks(self, filename, checker, store=None):
394 """Retrieves the error statistics for the given file if possible,
395 otherwise delegates to cachestorechecks()."""
396 suggestion_filename, suggestion_mtime = suggestioninfo(filename)
397 fileid = self._getstoredfileid(filename, suggestion_mtime)
398 configid = self._getstoredcheckerconfig(checker)
399 try:
400 if not fileid:
401 store = store or factory.getobject(filename)
402 fileid = self.cachestore(store)
403 if not configid:
404 self.cur.execute("""INSERT INTO checkerconfigs
405 (configid, config) values (NULL, ?);""",
406 (str(checker.config.__dict__),))
407 configid = self.cur.lastrowid
408 except ValueError, e:
409 print >> sys.stderr, str(e)
410 return {}
411
412 def geterrors():
413 self.cur.execute("""SELECT
414 name,
415 unitindex
416 FROM uniterrors WHERE fileid=? and configid=?
417 ORDER BY unitindex;""", (fileid, configid))
418 return self.cur.fetchall()
419
420 values = geterrors()
421 if not values:
422
423
424 store = store or factory.getobject(filename)
425 if suggestion_filename:
426 checker.setsuggestionstore(factory.getobject(suggestion_filename, ignore=os.path.extsep+ 'pending'))
427 self.cachestorechecks(fileid, store, checker, configid)
428 values = geterrors()
429
430 errors = {}
431 for value in values:
432 if value[1] == -1:
433 continue
434 checkkey = 'check-' + value[0]
435 if not checkkey in errors:
436 errors[checkkey] = []
437 errors[checkkey].append(value[1])
438
439 return errors
440
441 - def filestats(self, filename, checker, store=None):
442 """complete stats"""
443 stats = {"total": [], "translated": [], "fuzzy": [], "untranslated": []}
444
445 stats.update(self.filechecks(filename, checker, store))
446 fileid = self._getstoredfileid(filename)
447
448 self.cur.execute("""SELECT
449 state,
450 unitindex
451 FROM units WHERE fileid=?
452 ORDER BY unitindex;""", (fileid,))
453
454 values = self.cur.fetchall()
455 for value in values:
456 stats[state_strings[value[0]]].append(value[1])
457 stats["total"].append(value[1])
458
459 return stats
460