1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Module for handling XLIFF files for translation.
22
23 The official recommendation is to use the extention .xlf for XLIFF files.
24 """
25
26 from lxml import etree
27
28 from translate.misc.multistring import multistring
29 from translate.misc.xml_helpers import *
30 from translate.storage import base, lisa
31 from translate.storage.lisa import getXMLspace
32 from translate.storage.placeables.lisa import xml_to_strelem, strelem_to_xml
33
34
35
36 ID_SEPARATOR = u"\04"
37
39 """A single term in the xliff file."""
40
41 rootNode = "trans-unit"
42 languageNode = "source"
43 textNode = ""
44 namespace = 'urn:oasis:names:tc:xliff:document:1.1'
45
46 _default_xml_space = "default"
47
48
49
50 - def __init__(self, source, empty=False, **kwargs):
51 """Override the constructor to set xml:space="preserve"."""
52 super(xliffunit, self).__init__(source, empty, **kwargs)
53 if empty:
54 return
55 lisa.setXMLspace(self.xmlelement, "preserve")
56
58 """Returns an xml Element setup with given parameters."""
59
60
61
62
63 assert purpose
64 langset = etree.Element(self.namespaced(purpose))
65
66
67
68
69 langset.text = text
70 return langset
71
87
89 sourcelanguageNode = self.get_source_dom()
90 if sourcelanguageNode is None:
91 sourcelanguageNode = self.createlanguageNode(sourcelang, u'', "source")
92 self.set_source_dom(sourcelanguageNode)
93
94
95 for i in range(len(sourcelanguageNode)):
96 del sourcelanguageNode[0]
97 sourcelanguageNode.text = None
98
99 strelem_to_xml(sourcelanguageNode, value[0])
100
107 rich_source = property(get_rich_source, set_rich_source)
108
125
130 rich_target = property(get_rich_target, set_rich_target)
131
132 - def addalttrans(self, txt, origin=None, lang=None, sourcetxt=None, matchquality=None):
133 """Adds an alt-trans tag and alt-trans components to the unit.
134
135 @type txt: String
136 @param txt: Alternative translation of the source text.
137 """
138
139
140
141 if isinstance(txt, str):
142 txt = txt.decode("utf-8")
143 alttrans = etree.SubElement(self.xmlelement, self.namespaced("alt-trans"))
144 lisa.setXMLspace(alttrans, "preserve")
145 if sourcetxt:
146 if isinstance(sourcetxt, str):
147 sourcetxt = sourcetxt.decode("utf-8")
148 altsource = etree.SubElement(alttrans, self.namespaced("source"))
149 altsource.text = sourcetxt
150 alttarget = etree.SubElement(alttrans, self.namespaced("target"))
151 alttarget.text = txt
152 if matchquality:
153 alttrans.set("match-quality", matchquality)
154 if origin:
155 alttrans.set("origin", origin)
156 if lang:
157 lisa.setXMLlang(alttrans, lang)
158
185
187 """Removes the supplied alternative from the list of alt-trans tags"""
188 self.xmlelement.remove(alternative.xmlelement)
189
190 - def addnote(self, text, origin=None, position="append"):
191 """Add a note specifically in a "note" tag"""
192 if position != "append":
193 self.removenotes(origin=origin)
194
195 if text:
196 text = text.strip()
197 if not text:
198 return
199 if isinstance(text, str):
200 text = text.decode("utf-8")
201 note = etree.SubElement(self.xmlelement, self.namespaced("note"))
202 note.text = text
203 if origin:
204 note.set("from", origin)
205
207 """Private method that returns the text from notes matching 'origin' or all notes."""
208 notenodes = self.xmlelement.iterdescendants(self.namespaced("note"))
209
210
211
212 initial_list = [lisa.getText(note, getXMLspace(self.xmlelement, self._default_xml_space)) for note in notenodes if self.correctorigin(note, origin)]
213
214
215 dictset = {}
216 notelist = [dictset.setdefault(note, note) for note in initial_list if note not in dictset]
217
218 return notelist
219
222
224 """Remove all the translator notes."""
225 notes = self.xmlelement.iterdescendants(self.namespaced("note"))
226 for note in notes:
227 if self.correctorigin(note, origin=origin):
228 self.xmlelement.remove(note)
229
230 - def adderror(self, errorname, errortext):
231 """Adds an error message to this unit."""
232
233 text = errorname + ': ' + errortext
234 self.addnote(text, origin="pofilter")
235
237 """Get all error messages."""
238
239 notelist = self.getnotelist(origin="pofilter")
240 errordict = {}
241 for note in notelist:
242 errorname, errortext = note.split(': ')
243 errordict[errorname] = errortext
244 return errordict
245
247 """States whether this unit is approved."""
248 return self.xmlelement.get("approved") == "yes"
249
251 """Mark this unit as approved."""
252 if value:
253 self.xmlelement.set("approved", "yes")
254 elif self.isapproved():
255 self.xmlelement.set("approved", "no")
256
258 """States whether this unit needs to be reviewed"""
259 targetnode = self.getlanguageNode(lang=None, index=1)
260 return not targetnode is None and \
261 "needs-review" in targetnode.get("state", "")
262
264 """Marks the unit to indicate whether it needs review. Adds an optional explanation as a note."""
265 targetnode = self.getlanguageNode(lang=None, index=1)
266 if not targetnode is None:
267 if needsreview:
268 targetnode.set("state", "needs-review-translation")
269 if explanation:
270 self.addnote(explanation, origin="translator")
271 else:
272 del targetnode.attrib["state"]
273
280
282 if value:
283 self.markapproved(False)
284 else:
285 self.markapproved(True)
286 targetnode = self.getlanguageNode(lang=None, index=1)
287 if not targetnode is None:
288 if value:
289 targetnode.set("state", "needs-review-translation")
290 else:
291 for attribute in ["state", "state-qualifier"]:
292 if attribute in targetnode.attrib:
293 del targetnode.attrib[attribute]
294
295 - def settarget(self, text, lang='xx', append=False):
300
301
302
303
304
305
306
307
309 value = self.xmlelement.get("translate")
310 if value and value.lower() == 'no':
311 return False
312 return True
313
315 targetnode = self.getlanguageNode(lang=None, index=1)
316 if targetnode is None:
317 return
318 if self.isfuzzy() and "state-qualifier" in targetnode.attrib:
319
320 del targetnode.attrib["state-qualifier"]
321 targetnode.set("state", "translated")
322
324 self.xmlelement.set("id", id)
325
327 uid = ""
328 try:
329 filename = self.xmlelement.iterancestors(self.namespaced('file')).next().get('original')
330 if filename:
331 uid = filename + ID_SEPARATOR
332 except StopIteration:
333
334 pass
335
336 uid += self.xmlelement.get("id") or ""
337 return uid
338
341
343 return [self.getid()]
344
345 - def createcontextgroup(self, name, contexts=None, purpose=None):
346 """Add the context group to the trans-unit with contexts a list with
347 (type, text) tuples describing each context."""
348 assert contexts
349 group = etree.Element(self.namespaced("context-group"))
350
351
352
353 if self.xmlelement.tag == self.namespaced("group"):
354 self.xmlelement.insert(0, group)
355 else:
356 self.xmlelement.append(group)
357 group.set("name", name)
358 if purpose:
359 group.set("purpose", purpose)
360 for type, text in contexts:
361 if isinstance(text, str):
362 text = text.decode("utf-8")
363 context = etree.SubElement(group, self.namespaced("context"))
364 context.text = text
365 context.set("context-type", type)
366
367 - def getcontextgroups(self, name):
368 """Returns the contexts in the context groups with the specified name"""
369 groups = []
370 grouptags = self.xmlelement.iterdescendants(self.namespaced("context-group"))
371
372 for group in grouptags:
373 if group.get("name") == name:
374 contexts = group.iterdescendants(self.namespaced("context"))
375 pairs = []
376 for context in contexts:
377 pairs.append((context.get("context-type"), lisa.getText(context, getXMLspace(self.xmlelement, self._default_xml_space))))
378 groups.append(pairs)
379 return groups
380
382 """returns the restype attribute in the trans-unit tag"""
383 return self.xmlelement.get("restype")
384
385 - def merge(self, otherunit, overwrite=False, comments=True, authoritative=False):
396
398 """Check against node tag's origin (e.g note or alt-trans)"""
399 if origin == None:
400 return True
401 elif origin in node.get("from", ""):
402 return True
403 elif origin in node.get("origin", ""):
404 return True
405 else:
406 return False
407
409 """Override L{TranslationUnit.multistring_to_rich} which is used by the
410 C{rich_source} and C{rich_target} properties."""
411 strings = mstr
412 if isinstance(mstr, multistring):
413 strings = mstr.strings
414 elif isinstance(mstr, basestring):
415 strings = [mstr]
416
417 return [xml_to_strelem(s) for s in strings]
418 multistring_to_rich = classmethod(multistring_to_rich)
419
421 """Override L{TranslationUnit.rich_to_multistring} which is used by the
422 C{rich_source} and C{rich_target} properties."""
423 return multistring([unicode(elem) for elem in elem_list])
424 rich_to_multistring = classmethod(rich_to_multistring)
425
426
428 """Class representing a XLIFF file store."""
429 UnitClass = xliffunit
430 Name = _("XLIFF Translation File")
431 Mimetypes = ["application/x-xliff", "application/x-xliff+xml"]
432 Extensions = ["xlf", "xliff"]
433 rootNode = "xliff"
434 bodyNode = "body"
435 XMLskeleton = '''<?xml version="1.0" ?>
436 <xliff version='1.1' xmlns='urn:oasis:names:tc:xliff:document:1.1'>
437 <file original='NoName' source-language='en' datatype='plaintext'>
438 <body>
439 </body>
440 </file>
441 </xliff>'''
442 namespace = 'urn:oasis:names:tc:xliff:document:1.1'
443 suggestions_in_format = True
444 """xliff units have alttrans tags which can be used to store suggestions"""
445
447 self._filename = None
448 lisa.LISAfile.__init__(self, *args, **kwargs)
449 self._messagenum = 0
450
451 - def initbody(self):
452 self.namespace = self.document.getroot().nsmap.get(None, None)
453
454 if self._filename:
455 filenode = self.getfilenode(self._filename, createifmissing=True)
456 else:
457 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
458 self.body = self.getbodynode(filenode, createifmissing=True)
459
461 """Initialise the file header."""
462 pass
463
464 - def createfilenode(self, filename, sourcelanguage=None, targetlanguage=None, datatype='plaintext'):
489
491 """returns the name of the given file"""
492 return filenode.get("original")
493
495 """set the name of the given file"""
496 return filenode.set("original", filename)
497
499 """returns all filenames in this XLIFF file"""
500 filenodes = self.document.getroot().iterchildren(self.namespaced("file"))
501 filenames = [self.getfilename(filenode) for filenode in filenodes]
502 filenames = filter(None, filenames)
503 if len(filenames) == 1 and filenames[0] == '':
504 filenames = []
505 return filenames
506
507 - def getfilenode(self, filename, createifmissing=False):
508 """finds the filenode with the given name"""
509 filenodes = self.document.getroot().iterchildren(self.namespaced("file"))
510 for filenode in filenodes:
511 if self.getfilename(filenode) == filename:
512 return filenode
513 if createifmissing:
514 filenode = self.createfilenode(filename)
515 return filenode
516 return None
517
518 - def getids(self, filename=None):
519 if not filename:
520 return super(xlifffile, self).getids()
521
522 self.id_index = {}
523 prefix = filename + ID_SEPARATOR
524 units = (unit for unit in self.units if unit.getid().startswith(prefix))
525 for index, unit in enumerate(units):
526 self.id_index[unit.getid()[len(prefix):]] = unit
527 return self.id_index.keys()
528
530 if not language:
531 return
532 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
533 filenode.set("source-language", language)
534
536 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
537 return filenode.get("source-language")
538 sourcelanguage = property(getsourcelanguage, setsourcelanguage)
539
541 if not language:
542 return
543 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
544 filenode.set("target-language", language)
545
547 filenode = self.document.getroot().iterchildren(self.namespaced('file')).next()
548 return filenode.get("target-language")
549 targetlanguage = property(gettargetlanguage, settargetlanguage)
550
552 """Returns the datatype of the stored file. If no filename is given,
553 the datatype of the first file is given."""
554 if filename:
555 node = self.getfilenode(filename)
556 if not node is None:
557 return node.get("datatype")
558 else:
559 filenames = self.getfilenames()
560 if len(filenames) > 0 and filenames[0] != "NoName":
561 return self.getdatatype(filenames[0])
562 return ""
563
565 """Returns the date attribute for the file. If no filename is given,
566 the date of the first file is given. If the date attribute is not
567 specified, None is returned."""
568 if filename:
569 node = self.getfilenode(filename)
570 if not node is None:
571 return node.get("date")
572 else:
573 filenames = self.getfilenames()
574 if len(filenames) > 0 and filenames[0] != "NoName":
575 return self.getdate(filenames[0])
576 return None
577
579 """We want to remove the default file-tag as soon as possible if we
580 know if still present and empty."""
581 filenodes = list(self.document.getroot().iterchildren(self.namespaced("file")))
582 if len(filenodes) > 1:
583 for filenode in filenodes:
584 if filenode.get("original") == "NoName" and \
585 not list(filenode.iterdescendants(self.namespaced(self.UnitClass.rootNode))):
586 self.document.getroot().remove(filenode)
587 break
588
590 """finds the header node for the given filenode"""
591
592 headernode = filenode.iterchildren(self.namespaced("header"))
593 try:
594 return headernode.next()
595 except StopIteration:
596 pass
597 if not createifmissing:
598 return None
599 headernode = etree.SubElement(filenode, self.namespaced("header"))
600 return headernode
601
602 - def getbodynode(self, filenode, createifmissing=False):
603 """finds the body node for the given filenode"""
604 bodynode = filenode.iterchildren(self.namespaced("body"))
605 try:
606 return bodynode.next()
607 except StopIteration:
608 pass
609 if not createifmissing:
610 return None
611 bodynode = etree.SubElement(filenode, self.namespaced("body"))
612 return bodynode
613
614 - def addsourceunit(self, source, filename="NoName", createifmissing=False):
615 """adds the given trans-unit to the last used body node if the
616 filename has changed it uses the slow method instead (will
617 create the nodes required if asked). Returns success"""
618 if self._filename != filename:
619 if not self.switchfile(filename, createifmissing):
620 return None
621 unit = super(xlifffile, self).addsourceunit(source)
622 self._messagenum += 1
623 unit.setid("%d" % self._messagenum)
624 return unit
625
626 - def switchfile(self, filename, createifmissing=False):
627 """adds the given trans-unit (will create the nodes required if asked). Returns success"""
628 self._filename = filename
629 filenode = self.getfilenode(filename)
630 if filenode is None:
631 if not createifmissing:
632 return False
633 filenode = self.createfilenode(filename)
634 self.document.getroot().append(filenode)
635
636 self.body = self.getbodynode(filenode, createifmissing=createifmissing)
637 if self.body is None:
638 return False
639 self._messagenum = len(list(self.body.iterdescendants(self.namespaced("trans-unit"))))
640
641
642
643
644
645 return True
646
647 - def creategroup(self, filename="NoName", createifmissing=False, restype=None):
648 """adds a group tag into the specified file"""
649 if self._filename != filename:
650 if not self.switchfile(filename, createifmissing):
651 return None
652 group = etree.SubElement(self.body, self.namespaced("group"))
653 if restype:
654 group.set("restype", restype)
655 return group
656
660
672 parsestring = classmethod(parsestring)
673