| Home | Trees | Indices | Help |
|---|
|
|
1 """A parser for the Music Metadata XML Format (MMD).
2
3 This module contains L{MbXmlParser}, which parses the U{Music Metadata XML
4 Format (MMD) <http://musicbrainz.org/development/mmd/>} returned by the
5 MusicBrainz webservice.
6
7 There are also DOM helper functions in this module used by the parser which
8 probably aren't useful to users.
9 """
10 __revision__ = '$Id: wsxml.py 12028 2009-09-01 13:15:50Z matt $'
11
12 import re
13 import logging
14 import urlparse
15 import xml.dom.minidom
16 import xml.sax.saxutils as saxutils
17 from xml.parsers.expat import ExpatError
18 from xml.dom import DOMException
19
20 import musicbrainz2.utils as mbutils
21 import musicbrainz2.model as model
22 from musicbrainz2.model import NS_MMD_1, NS_REL_1, NS_EXT_1
23
24 __all__ = [
25 'DefaultFactory', 'Metadata', 'ParseError',
26 'MbXmlParser', 'MbXmlWriter',
27 'AbstractResult',
28 'ArtistResult', 'ReleaseResult', 'TrackResult', 'LabelResult',
29 'ReleaseGroupResult'
30 ]
31
32
34 """A factory to instantiate classes from the domain model.
35
36 This factory may be used to create objects from L{musicbrainz2.model}.
37 """
51
52
54 """Exception to be thrown if a parse error occurs.
55
56 The C{'msg'} attribute contains a printable error message, C{'reason'}
57 is the lower level exception that was raised.
58 """
59
64
67
68
70 """Represents a parsed Music Metadata XML document.
71
72 The Music Metadata XML format is very flexible and may contain a
73 diverse set of data (e.g. an artist, a release and a list of tracks),
74 but usually only a small subset is used (either an artist, a release
75 or a track, or a lists of objects from one class).
76
77 @see: L{MbXmlParser} for reading, and L{MbXmlWriter} for writing
78 Metadata objects
79 """
81 self._artist = None
82 self._release = None
83 self._track = None
84 self._label = None
85 self._releaseGroup = None
86 self._artistResults = [ ]
87 self._artistResultsOffset = None
88 self._artistResultsCount = None
89 self._releaseResults = [ ]
90 self._releaseResultsOffset = None
91 self._releaseResultsCount = None
92 self._releaseGroupResults = [ ]
93 self._releaseGroupResultsOffset = None
94 self._releaseGroupResultsCount = None
95 self._trackResults = [ ]
96 self._trackResultsOffset = None
97 self._trackResultsCount = None
98 self._labelResults = [ ]
99 self._labelResultsOffset = None
100 self._labelResultsCount = None
101 self._tagList = [ ]
102 self._rating = None
103 self._userList = [ ]
104
107
109 self._artist = artist
110
111 artist = property(getArtist, setArtist, doc='An Artist object.')
112
115
117 self._label = label
118
119 label = property(getLabel, setLabel, doc='A Label object.')
120
123
125 self._release = release
126
127 release = property(getRelease, setRelease, doc='A Release object.')
128
131
133 self._releaseGroup = releaseGroup
134
135 releaseGroup = property(getReleaseGroup, setReleaseGroup)
136
139
141 self._track = track
142
143 track = property(getTrack, setTrack, doc='A Track object.')
144
146 """Returns an artist result list.
147
148 @return: a list of L{ArtistResult} objects.
149 """
150 return self._artistResults
151
152 artistResults = property(getArtistResults,
153 doc='A list of ArtistResult objects.')
154
156 """Returns the offset of the artist result list.
157
158 The offset is used for paging through the result list. It
159 is zero-based.
160
161 @return: an integer containing the offset, or None
162
163 @see: L{getArtistResults}, L{getArtistResultsCount}
164 """
165 return self._artistResultsOffset
166
168 """Sets the offset of the artist result list.
169
170 @param value: an integer containing the offset, or None
171
172 @see: L{getArtistResultsOffset}
173 """
174 self._artistResultsOffset = value
175
176 artistResultsOffset = property(
177 getArtistResultsOffset, setArtistResultsOffset,
178 doc='The offset of the artist results.')
179
181 """Returns the total number of results available.
182
183 This may or may not match with the number of elements that
184 L{getArtistResults} returns. If the count is higher than
185 the list, it indicates that the list is incomplete.
186
187 @return: an integer containing the count, or None
188
189 @see: L{setArtistResultsCount}, L{getArtistResultsOffset}
190 """
191 return self._artistResultsCount
192
194 """Sets the total number of available results.
195
196 @param value: an integer containing the count, or None
197
198 @see: L{getArtistResults}, L{setArtistResultsOffset}
199 """
200 self._artistResultsCount = value
201
202 artistResultsCount = property(
203 getArtistResultsCount, setArtistResultsCount,
204 doc='The total number of artists results.')
205
207 """Returns a label result list.
208
209 @return: a list of L{LabelResult} objects.
210 """
211 return self._labelResults
212
213 labelResults = property(getLabelResults,
214 doc='A list of LabelResult objects')
215
217 """Returns the offset of the label result list.
218
219 The offset is used for paging through the result list. It
220 is zero-based.
221
222 @return: an integer containing the offset, or None
223
224 @see: L{getLabelResults}, L{getLabelResultsCount}
225 """
226 return self._labelResultsOffset
227
229 """Sets the offset of the label result list.
230
231 @param value: an integer containing the offset, or None
232
233 @see: L{getLabelResultsOffset}
234 """
235 self._labelResultsOffset = value
236
237 labelResultsOffset = property(
238 getLabelResultsOffset, setLabelResultsOffset,
239 doc='The offset of the label results.')
240
242 """Returns the total number of results available.
243
244 This may or may not match with the number of elements that
245 L{getLabelResults} returns. If the count is higher than
246 the list, it indicates that the list is incomplete.
247
248 @return: an integer containing the count, or None
249
250 @see: L{setLabelResultsCount}, L{getLabelResultsOffset}
251 """
252 return self._labelResultsCount
253
255 """Sets the total number of available results.
256
257 @param value: an integer containing the count, or None
258
259 @see: L{getLabelResults}, L{setLabelResultsOffset}
260 """
261 self._labelResultsCount = value
262
263 labelResultsCount = property(
264 getLabelResultsCount, setLabelResultsCount,
265 doc='The total number of label results.')
266
268 """Returns a release result list.
269
270 @return: a list of L{ReleaseResult} objects.
271 """
272 return self._releaseResults
273
274 releaseResults = property(getReleaseResults,
275 doc='A list of ReleaseResult objects.')
276
278 """Returns the offset of the release result list.
279
280 The offset is used for paging through the result list. It
281 is zero-based.
282
283 @return: an integer containing the offset, or None
284
285 @see: L{getReleaseResults}, L{getReleaseResultsCount}
286 """
287 return self._releaseResultsOffset
288
290 """Sets the offset of the release result list.
291
292 @param value: an integer containing the offset, or None
293
294 @see: L{getReleaseResultsOffset}
295 """
296 self._releaseResultsOffset = value
297
298 releaseResultsOffset = property(
299 getReleaseResultsOffset, setReleaseResultsOffset,
300 doc='The offset of the release results.')
301
303 """Returns the total number of results available.
304
305 This may or may not match with the number of elements that
306 L{getReleaseResults} returns. If the count is higher than
307 the list, it indicates that the list is incomplete.
308
309 @return: an integer containing the count, or None
310
311 @see: L{setReleaseResultsCount}, L{getReleaseResultsOffset}
312 """
313 return self._releaseResultsCount
314
316 """Sets the total number of available results.
317
318 @param value: an integer containing the count, or None
319
320 @see: L{getReleaseResults}, L{setReleaseResultsOffset}
321 """
322 self._releaseResultsCount = value
323
324 releaseResultsCount = property(
325 getReleaseResultsCount, setReleaseResultsCount,
326 doc='The total number of release results.')
327
329 """Returns a release group result list.
330
331 @return: a list of L{ReleaseGroupResult} objects.
332 """
333 return self._releaseGroupResults
334
335 releaseGroupResults = property(getReleaseGroupResults,
336 doc = 'A list of ReleaseGroupResult objects.')
337
339 """Returns the offset of the release group result list.
340
341 The offset is used for paging through the result list. It
342 is zero-based.
343
344 @return: an integer containing the offset, or None.
345
346 @see: L{getReleaseGroupResults}, L{getReleaseGroupResultsCount}
347 """
348 return self._releaseGroupResultsOffset
349
351 """Sets the offset of the release group result list.
352
353 @param value: an integer containing the offset, or None
354
355 @see: L{getReleaseGroupResultsOffset}
356 """
357 self._releaseGroupResultsOffset = value
358
359 releaseGroupResultsOffset = property(
360 getReleaseGroupResultsOffset, setReleaseGroupResultsOffset,
361 doc='The offset of the release group results.')
362
364 """Returns the total number of results available.
365
366 This may or may not match with the number of elements that
367 L{getReleaseGroupResults} returns. If the count is higher than
368 the list, it indicates that the list is incomplete.
369
370 @return: an integer containing the count, or None
371
372 @see: L{setReleaseGroupResultsCount}, L{getReleaseGroupResultsOffset}
373 """
374 return self._releaseGroupResultsCount
375
377 """Sets the total number of available results.
378
379 @param value: an integer containing the count, or None
380
381 @see: L{getReleaseGroupResults}, L{setReleaseGroupResultsOffset}
382 """
383 self._releaseGroupResultsCount = value
384
385 releaseGroupResultsCount = property(
386 getReleaseGroupResultsCount, setReleaseGroupResultsCount,
387 doc='The total number of release group results.')
388
390 """Returns a track result list.
391
392 @return: a list of L{TrackResult} objects.
393 """
394 return self._trackResults
395
396 trackResults = property(getTrackResults,
397 doc='A list of TrackResult objects.')
398
400 """Returns the offset of the track result list.
401
402 The offset is used for paging through the result list. It
403 is zero-based.
404
405 @return: an integer containing the offset, or None
406
407 @see: L{getTrackResults}, L{getTrackResultsCount}
408 """
409 return self._trackResultsOffset
410
412 """Sets the offset of the track result list.
413
414 @param value: an integer containing the offset, or None
415
416 @see: L{getTrackResultsOffset}
417 """
418 self._trackResultsOffset = value
419
420 trackResultsOffset = property(
421 getTrackResultsOffset, setTrackResultsOffset,
422 doc='The offset of the track results.')
423
425 """Returns the total number of results available.
426
427 This may or may not match with the number of elements that
428 L{getTrackResults} returns. If the count is higher than
429 the list, it indicates that the list is incomplete.
430
431 @return: an integer containing the count, or None
432
433 @see: L{setTrackResultsCount}, L{getTrackResultsOffset}
434 """
435 return self._trackResultsCount
436
438 """Sets the total number of available results.
439
440 @param value: an integer containing the count, or None
441
442 @see: L{getTrackResults}, L{setTrackResultsOffset}
443 """
444 self._trackResultsCount = value
445
446 trackResultsCount = property(
447 getTrackResultsCount, setTrackResultsCount,
448 doc='The total number of track results.')
449
450
452 """Returns a list of tags.
453
454 @return: a list of L{model.Tag} objects
455 """
456 return self._tagList
457
458 tagResults = property(getTagList,
459 doc='A list of Tag objects.')
460
467
469 """Sets the rating.
470
471 @param value: a L{model.Rating} object
472 """
473 self._rating = value
474
475 rating = property(getRating, setRating, doc='A Rating object.')
476
477
478 # MusicBrainz extension to the schema
480 """Returns a list of users.
481
482 @return: a list of L{model.User} objects
483
484 @note: This is a MusicBrainz extension.
485 """
486 return self._userList
487
488 userResults = property(getUserList,
489 doc='A list of User objects.')
490
491
493 """The abstract representation of a result.
494
495 A result is an instance of some kind (Artist, Release, ...)
496 associated with a score.
497 """
498
500 self._score = score
501
503 """Returns the result score.
504
505 The score indicates how good this result matches the search
506 parameters. The higher the value, the better the match.
507
508 @return: an int between 0 and 100 (both inclusive), or None
509 """
510 return self._score
511
513 self._score = score
514
515 score = property(getScore, setScore, doc='The relevance score.')
516
517
519 """Represents an artist result.
520
521 An ArtistResult consists of a I{score} and an artist. The score is a
522 number between 0 and 100, where a higher number indicates a better
523 match.
524 """
528
530 """Returns an Artist object.
531
532 @return: a L{musicbrainz2.model.Artist} object
533 """
534 return self._artist
535
537 self._artist = artist
538
539 artist = property(getArtist, setArtist, doc='An Artist object.')
540
541
543 """Represents a release result.
544
545 A ReleaseResult consists of a I{score} and a release. The score is a
546 number between 0 and 100, where a higher number indicates a better
547 match.
548 """
552
554 """Returns a Release object.
555
556 @return: a L{musicbrainz2.model.Release} object
557 """
558 return self._release
559
561 self._release = release
562
563 release = property(getRelease, setRelease, doc='A Release object.')
564
566 """Represents a release group result.
567
568 A ReleaseGroupResult consists of a I{score} and a release group. The
569 score is a number between 0 and 100, where a higher number indicates
570 a better match.
571 """
575
577 """Returns a ReleaseGroup object.
578
579 @return: a L{musicbrainz2.model.ReleaseGroup} object
580 """
581 return self._releaseGroup
582
584 self._releaseGroup = value
585
586 releaseGroup = property(getReleaseGroup, setReleaseGroup, doc='A ReleaseGroup object.')
587
589 """Represents a track result.
590
591 A TrackResult consists of a I{score} and a track. The score is a
592 number between 0 and 100, where a higher number indicates a better
593 match.
594 """
598
600 """Returns a Track object.
601
602 @return: a L{musicbrainz2.model.Track} object
603 """
604 return self._track
605
607 self._track = track
608
609 track = property(getTrack, setTrack, doc='A Track object.')
610
611
613 """Represents a label result.
614
615 An LabelResult consists of a I{score} and a label. The score is a
616 number between 0 and 100, where a higher number indicates a better
617 match.
618 """
622
624 """Returns a Label object.
625
626 @return: a L{musicbrainz2.model.Label} object
627 """
628 return self._label
629
631 self._label = label
632
633 label = property(getLabel, setLabel, doc='A Label object.')
634
635
637 """A parser for the Music Metadata XML format.
638
639 This parser supports all basic features and extensions defined by
640 MusicBrainz, including unlimited document nesting. By default it
641 reads an XML document from a file-like object (stream) and returns
642 an object tree representing the document using classes from
643 L{musicbrainz2.model}.
644
645 The implementation tries to be as permissive as possible. Invalid
646 contents are skipped, but documents have to be well-formed and using
647 the correct namespace. In case of unrecoverable errors, a L{ParseError}
648 exception is raised.
649
650 @see: U{The Music Metadata XML Format
651 <http://musicbrainz.org/development/mmd/>}
652 """
653
655 """Constructor.
656
657 The C{factory} parameter has be an instance of L{DefaultFactory}
658 or a subclass of it. It is used by L{parse} to obtain objects
659 from L{musicbrainz2.model} to build resulting object tree.
660 If you supply your own factory, you have to make sure all
661 returned objects have the same interface as their counterparts
662 from L{musicbrainz2.model}.
663
664 @param factory: an object factory
665 """
666 self._log = logging.getLogger(str(self.__class__))
667 self._factory = factory
668
670 """Parses the MusicBrainz web service XML.
671
672 Returns a L{Metadata} object representing the parsed XML or
673 raises a L{ParseError} exception if the data was malformed.
674 The parser tries to be liberal and skips invalid content if
675 possible.
676
677 Note that an L{IOError} may be raised if there is a problem
678 reading C{inStream}.
679
680 @param inStream: a file-like object
681 @return: a L{Metadata} object (never None)
682 @raise ParseError: if the document is not valid
683 @raise IOError: if reading from the stream failed
684 """
685
686 try:
687 doc = xml.dom.minidom.parse(inStream)
688
689 # Try to find the root element. If this isn't an mmd
690 # XML file or the namespace is wrong, this will fail.
691 elems = doc.getElementsByTagNameNS(NS_MMD_1, 'metadata')
692
693 if len(elems) != 0:
694 md = self._createMetadata(elems[0])
695 else:
696 msg = 'cannot find root element mmd:metadata'
697 self._log.debug('ParseError: ' + msg)
698 raise ParseError(msg)
699
700 doc.unlink()
701
702 return md
703 except ExpatError, e:
704 self._log.debug('ExpatError: ' + str(e))
705 raise ParseError(msg=str(e), reason=e)
706 except DOMException, e:
707 self._log.debug('DOMException: ' + str(e))
708 raise ParseError(msg=str(e), reason=e)
709
710
712 md = Metadata()
713
714 for node in _getChildElements(metadata):
715 if _matches(node, 'artist'):
716 md.artist = self._createArtist(node)
717 elif _matches(node, 'release'):
718 md.release = self._createRelease(node)
719 elif _matches(node, 'release-group'):
720 md.releaseGroup = self._createReleaseGroup(node)
721 elif _matches(node, 'track'):
722 md.track = self._createTrack(node)
723 elif _matches(node, 'label'):
724 md.label = self._createLabel(node)
725 elif _matches(node, 'artist-list'):
726 (offset, count) = self._getListAttrs(node)
727 md.artistResultsOffset = offset
728 md.artistResultsCount = count
729 self._addArtistResults(node, md.getArtistResults())
730 elif _matches(node, 'release-list'):
731 (offset, count) = self._getListAttrs(node)
732 md.releaseResultsOffset = offset
733 md.releaseResultsCount = count
734 self._addReleaseResults(node, md.getReleaseResults())
735 elif _matches(node, 'release-group-list'):
736 (offset, count) = self._getListAttrs(node)
737 md.releaseGroupResultsOffset = offset
738 md.releaseGroupResultsCount = count
739 self._addReleaseGroupResults(node, md.getReleaseGroupResults())
740 elif _matches(node, 'track-list'):
741 (offset, count) = self._getListAttrs(node)
742 md.trackResultsOffset = offset
743 md.trackResultsCount = count
744 self._addTrackResults(node, md.getTrackResults())
745 elif _matches(node, 'label-list'):
746 (offset, count) = self._getListAttrs(node)
747 md.labelResultsOffset = offset
748 md.labelResultsCount = count
749 self._addLabelResults(node, md.getLabelResults())
750 elif _matches(node, 'tag-list'):
751 self._addTagsToList(node, md.getTagList())
752 elif _matches(node, 'user-list', NS_EXT_1):
753 self._addUsersToList(node, md.getUserList())
754
755 return md
756
757
759 for c in _getChildElements(listNode):
760 artist = self._createArtist(c)
761 score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1)
762 if artist is not None:
763 resultList.append(ArtistResult(artist, score))
764
766 for c in _getChildElements(listNode):
767 release = self._createRelease(c)
768 score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1)
769 if release is not None:
770 resultList.append(ReleaseResult(release, score))
771
773 for c in _getChildElements(listNode):
774 releaseGroup = self._createReleaseGroup(c)
775 score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1)
776 if releaseGroup is not None:
777 resultList.append(ReleaseGroupResult(releaseGroup, score))
778
780 for c in _getChildElements(listNode):
781 track = self._createTrack(c)
782 score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1)
783 if track is not None:
784 resultList.append(TrackResult(track, score))
785
787 for c in _getChildElements(listNode):
788 label = self._createLabel(c)
789 score = _getIntAttr(c, 'score', 0, 100, ns=NS_EXT_1)
790 if label is not None:
791 resultList.append(LabelResult(label, score))
792
795
798
801
804
807
812
816
820
822 offset = _getIntAttr(listNode, 'offset')
823 count = _getIntAttr(listNode, 'count')
824 return (offset, count)
825
826
828 artist = self._factory.newArtist()
829 artist.setId(_getIdAttr(artistNode, 'id', 'artist'))
830 artist.setType(_getUriAttr(artistNode, 'type'))
831
832 for node in _getChildElements(artistNode):
833 if _matches(node, 'name'):
834 artist.setName(_getText(node))
835 elif _matches(node, 'sort-name'):
836 artist.setSortName(_getText(node))
837 elif _matches(node, 'disambiguation'):
838 artist.setDisambiguation(_getText(node))
839 elif _matches(node, 'life-span'):
840 artist.setBeginDate(_getDateAttr(node, 'begin'))
841 artist.setEndDate(_getDateAttr(node, 'end'))
842 elif _matches(node, 'alias-list'):
843 self._addArtistAliases(node, artist)
844 elif _matches(node, 'release-list'):
845 (offset, count) = self._getListAttrs(node)
846 artist.setReleasesOffset(offset)
847 artist.setReleasesCount(count)
848 self._addReleasesToList(node, artist.getReleases())
849 elif _matches(node, 'release-group-list'):
850 (offset, count) = self._getListAttrs(node)
851 artist.setReleaseGroupsOffset(offset)
852 artist.setReleaseGroupsCount(count)
853 self._addReleaseGroupsToList(node, artist.getReleaseGroups())
854 elif _matches(node, 'relation-list'):
855 self._addRelationsToEntity(node, artist)
856 elif _matches(node, 'tag-list'):
857 self._addTagsToEntity(node, artist)
858 elif _matches(node, 'rating'):
859 self._addRatingToEntity(node, artist)
860
861 return artist
862
864 label = self._factory.newLabel()
865 label.setId(_getIdAttr(labelNode, 'id', 'label'))
866 label.setType(_getUriAttr(labelNode, 'type'))
867
868 for node in _getChildElements(labelNode):
869 if _matches(node, 'name'):
870 label.setName(_getText(node))
871 if _matches(node, 'sort-name'):
872 label.setSortName(_getText(node))
873 elif _matches(node, 'disambiguation'):
874 label.setDisambiguation(_getText(node))
875 elif _matches(node, 'label-code'):
876 label.setCode(_getText(node))
877 elif _matches(node, 'country'):
878 country = _getText(node, '^[A-Z]{2}$')
879 label.setCountry(country)
880 elif _matches(node, 'life-span'):
881 label.setBeginDate(_getDateAttr(node, 'begin'))
882 label.setEndDate(_getDateAttr(node, 'end'))
883 elif _matches(node, 'alias-list'):
884 self._addLabelAliases(node, label)
885 elif _matches(node, 'tag-list'):
886 self._addTagsToEntity(node, label)
887 elif _matches(node, 'rating'):
888 self._addRatingToEntity(node, label)
889
890 return label
891
893 release = self._factory.newRelease()
894 release.setId(_getIdAttr(releaseNode, 'id', 'release'))
895 for t in _getUriListAttr(releaseNode, 'type'):
896 release.addType(t)
897
898 for node in _getChildElements(releaseNode):
899 if _matches(node, 'title'):
900 release.setTitle(_getText(node))
901 elif _matches(node, 'text-representation'):
902 lang = _getAttr(node, 'language', '^[A-Z]{3}$')
903 release.setTextLanguage(lang)
904 script = _getAttr(node, 'script', '^[A-Z][a-z]{3}$')
905 release.setTextScript(script)
906 elif _matches(node, 'asin'):
907 release.setAsin(_getText(node))
908 elif _matches(node, 'artist'):
909 release.setArtist(self._createArtist(node))
910 elif _matches(node, 'release-event-list'):
911 self._addReleaseEvents(node, release)
912 elif _matches(node, 'release-group'):
913 release.setReleaseGroup(self._createReleaseGroup(node))
914 elif _matches(node, 'disc-list'):
915 self._addDiscs(node, release)
916 elif _matches(node, 'track-list'):
917 (offset, count) = self._getListAttrs(node)
918 release.setTracksOffset(offset)
919 release.setTracksCount(count)
920 self._addTracksToList(node, release.getTracks())
921 elif _matches(node, 'relation-list'):
922 self._addRelationsToEntity(node, release)
923 elif _matches(node, 'tag-list'):
924 self._addTagsToEntity(node, release)
925 elif _matches(node, 'rating'):
926 self._addRatingToEntity(node, release)
927
928 return release
929
931 rg = self._factory.newReleaseGroup()
932 rg.setId(_getIdAttr(node, 'id', 'release-group'))
933 rg.setType(_getUriAttr(node, 'type'))
934
935 for child in _getChildElements(node):
936 if _matches(child, 'title'):
937 rg.setTitle(_getText(child))
938 elif _matches(child, 'artist'):
939 rg.setArtist(self._createArtist(child))
940 elif _matches(child, 'release-list'):
941 (offset, count) = self._getListAttrs(child)
942 rg.setReleasesOffset(offset)
943 rg.setReleasesCount(count)
944 self._addReleasesToList(child, rg.getReleases())
945
946 return rg
947
949 for node in _getChildElements(releaseListNode):
950 if _matches(node, 'event'):
951 country = _getAttr(node, 'country', '^[A-Z]{2}$')
952 date = _getDateAttr(node, 'date')
953 catalogNumber = _getAttr(node, 'catalog-number')
954 barcode = _getAttr(node, 'barcode')
955 format = _getUriAttr(node, 'format')
956
957 # The date attribute is mandatory. If it isn't present,
958 # we don't add anything from this release event.
959 if date is not None:
960 event = self._factory.newReleaseEvent()
961 event.setCountry(country)
962 event.setDate(date)
963 event.setCatalogNumber(catalogNumber)
964 event.setBarcode(barcode)
965 event.setFormat(format)
966
967 for subNode in _getChildElements(node):
968 if _matches(subNode, 'label'):
969 event.setLabel(self._createLabel(subNode))
970
971 release.addReleaseEvent(event)
972
973
975 for node in _getChildElements(discIdListNode):
976 if _matches(node, 'disc') and node.hasAttribute('id'):
977 d = self._factory.newDisc()
978 d.setId(node.getAttribute('id'))
979 d.setSectors(_getIntAttr(node, 'sectors', 0))
980 release.addDisc(d)
981
982
984 for node in _getChildElements(aliasListNode):
985 if _matches(node, 'alias'):
986 alias = self._factory.newArtistAlias()
987 self._initializeAlias(alias, node)
988 artist.addAlias(alias)
989
990
992 for node in _getChildElements(aliasListNode):
993 if _matches(node, 'alias'):
994 alias = self._factory.newLabelAlias()
995 self._initializeAlias(alias, node)
996 label.addAlias(alias)
997
998
1000 alias.setValue(_getText(node))
1001 alias.setType(_getUriAttr(node, 'type'))
1002 alias.setScript(_getAttr(node, 'script',
1003 '^[A-Z][a-z]{3}$'))
1004
1005
1007 track = self._factory.newTrack()
1008 track.setId(_getIdAttr(trackNode, 'id', 'track'))
1009
1010 for node in _getChildElements(trackNode):
1011 if _matches(node, 'title'):
1012 track.setTitle(_getText(node))
1013 elif _matches(node, 'artist'):
1014 track.setArtist(self._createArtist(node))
1015 elif _matches(node, 'duration'):
1016 track.setDuration(_getPositiveIntText(node))
1017 elif _matches(node, 'release-list'):
1018 self._addReleasesToList(node, track.getReleases())
1019 elif _matches(node, 'puid-list'):
1020 self._addPuids(node, track)
1021 elif _matches(node, 'isrc-list'):
1022 self._addISRCs(node, track)
1023 elif _matches(node, 'relation-list'):
1024 self._addRelationsToEntity(node, track)
1025 elif _matches(node, 'tag-list'):
1026 self._addTagsToEntity(node, track)
1027 elif _matches(node, 'rating'):
1028 self._addRatingToEntity(node, track)
1029
1030 return track
1031
1032 # MusicBrainz extension
1034 user = self._factory.newUser()
1035 for t in _getUriListAttr(userNode, 'type', NS_EXT_1):
1036 user.addType(t)
1037
1038 for node in _getChildElements(userNode):
1039 if _matches(node, 'name'):
1040 user.setName(_getText(node))
1041 elif _matches(node, 'nag', NS_EXT_1):
1042 user.setShowNag(_getBooleanAttr(node, 'show'))
1043
1044 return user
1045
1047 rating = self._factory.newRating()
1048 rating.value = _getText(ratingNode)
1049 rating.count = _getIntAttr(ratingNode, 'votes-count')
1050 return rating
1051
1053 tag = self._factory.newTag()
1054 tag.value = _getText(tagNode)
1055 tag.count = _getIntAttr(tagNode, 'count')
1056 return tag
1057
1058
1060 for node in _getChildElements(puidListNode):
1061 if _matches(node, 'puid') and node.hasAttribute('id'):
1062 track.addPuid(node.getAttribute('id'))
1063
1065 for node in _getChildElements(isrcListNode):
1066 if _matches(node, 'isrc') and node.hasAttribute('id'):
1067 track.addISRC(node.getAttribute('id'))
1068
1070 targetType = _getUriAttr(relationListNode, 'target-type', NS_REL_1)
1071
1072 if targetType is None:
1073 return
1074
1075 for node in _getChildElements(relationListNode):
1076 if _matches(node, 'relation'):
1077 rel = self._createRelation(node, targetType)
1078 if rel is not None:
1079 entity.addRelation(rel)
1080
1081
1083 relation = self._factory.newRelation()
1084
1085 relation.setType(_getUriAttr(relationNode, 'type', NS_REL_1))
1086 relation.setTargetType(targetType)
1087 resType = _getResourceType(targetType)
1088 relation.setTargetId(_getIdAttr(relationNode, 'target', resType))
1089
1090 if relation.getType() is None \
1091 or relation.getTargetType() is None \
1092 or relation.getTargetId() is None:
1093 return None
1094
1095 relation.setDirection(_getDirectionAttr(relationNode, 'direction'))
1096 relation.setBeginDate(_getDateAttr(relationNode, 'begin'))
1097 relation.setEndDate(_getDateAttr(relationNode, 'end'))
1098
1099 for a in _getUriListAttr(relationNode, 'attributes', NS_REL_1):
1100 relation.addAttribute(a)
1101
1102 target = None
1103 children = _getChildElements(relationNode)
1104 if len(children) > 0:
1105 node = children[0]
1106 if _matches(node, 'artist'):
1107 target = self._createArtist(node)
1108 elif _matches(node, 'release'):
1109 target = self._createRelease(node)
1110 elif _matches(node, 'track'):
1111 target = self._createTrack(node)
1112
1113 relation.setTarget(target)
1114
1115 return relation
1116
1117
1118 #
1119 # XML output
1120 #
1121
1124 self._out = outStream
1125 self._indentAmount = indentAmount
1126 self._stack = [ ]
1127 self._newline = newline
1128
1130 pi = '<?xml version="%s" encoding="%s"?>' % (version, encoding)
1131 self._out.write(pi + self._newline)
1132
1134 indent = self._getIndention()
1135 self._stack.append(name)
1136 self._out.write(indent + self._makeTag(name, attrs) + self._newline)
1137
1139 name = self._stack.pop()
1140 indent = self._getIndention()
1141 self._out.write('%s</%s>\n' % (indent, name))
1142
1144 # delete attributes with an unset value
1145 for (k, v) in attrs.items():
1146 if v is None or v == '':
1147 del attrs[k]
1148
1149 if value is None or value == '':
1150 if len(attrs) == 0:
1151 return
1152 self._out.write(self._getIndention())
1153 self._out.write(self._makeTag(name, attrs, True) + '\n')
1154 else:
1155 escValue = saxutils.escape(value or '')
1156 self._out.write(self._getIndention())
1157 self._out.write(self._makeTag(name, attrs))
1158 self._out.write(escValue)
1159 self._out.write('</%s>\n' % name)
1160
1163
1165 ret = '<' + name
1166
1167 for (k, v) in attrs.iteritems():
1168 if v is not None:
1169 v = saxutils.quoteattr(str(v))
1170 ret += ' %s=%s' % (k, v)
1171
1172 if close:
1173 return ret + '/>'
1174 else:
1175 return ret + '>'
1176
1177
1178
1180 """Write XML in the Music Metadata XML format."""
1181
1183 """Constructor.
1184
1185 @param indentAmount: the amount of whitespace to use per level
1186 """
1187 self._indentAmount = indentAmount
1188 self._newline = newline
1189
1190
1192 """Writes the XML representation of a Metadata object to a file.
1193
1194 @param outStream: an open file-like object
1195 @param metadata: a L{Metadata} object
1196 """
1197 xml = _XmlWriter(outStream, self._indentAmount, self._newline)
1198
1199 xml.prolog()
1200 xml.start('metadata', {
1201 'xmlns': NS_MMD_1,
1202 'xmlns:ext': NS_EXT_1,
1203 })
1204
1205 self._writeArtist(xml, metadata.getArtist())
1206 self._writeRelease(xml, metadata.getRelease())
1207 self._writeReleaseGroup(xml, metadata.getReleaseGroup())
1208 self._writeTrack(xml, metadata.getTrack())
1209 self._writeLabel(xml, metadata.getLabel())
1210
1211 if len(metadata.getArtistResults()) > 0:
1212 xml.start('artist-list', {
1213 'offset': metadata.artistResultsOffset,
1214 'count': metadata.artistResultsCount,
1215 })
1216 for result in metadata.getArtistResults():
1217 self._writeArtist(xml, result.getArtist(),
1218 result.getScore())
1219 xml.end()
1220
1221 if len(metadata.getReleaseResults()) > 0:
1222 xml.start('release-list', {
1223 'offset': metadata.releaseResultsOffset,
1224 'count': metadata.releaseResultsCount,
1225 })
1226 for result in metadata.getReleaseResults():
1227 self._writeRelease(xml, result.getRelease(),
1228 result.getScore())
1229 xml.end()
1230
1231 if len(metadata.getReleaseGroupResults()) > 0:
1232 xml.start('release-group-list', {
1233 'offset': metadata.releaseGroupResultsOffset,
1234 'count': metadata.releaseGroupResultsCount
1235 })
1236 for result in metadata.getReleaseGroupResults():
1237 self._writeReleaseGroup(xml, result.getReleaseGroup(),
1238 result.getScore())
1239 xml.end()
1240
1241 if len(metadata.getTrackResults()) > 0:
1242 xml.start('track-list', {
1243 'offset': metadata.trackResultsOffset,
1244 'count': metadata.trackResultsCount,
1245 })
1246 for result in metadata.getTrackResults():
1247 self._writeTrack(xml, result.getTrack(),
1248 result.getScore())
1249 xml.end()
1250
1251 if len(metadata.getLabelResults()) > 0:
1252 xml.start('label-list', {
1253 'offset': metadata.labelResultsOffset,
1254 'count': metadata.labelResultsCount,
1255 })
1256 for result in metadata.getLabelResults():
1257 self._writeLabel(xml, result.getLabel(),
1258 result.getScore())
1259 xml.end()
1260
1261 xml.end()
1262
1263
1265 if artist is None:
1266 return
1267
1268 xml.start('artist', {
1269 'id': mbutils.extractUuid(artist.getId()),
1270 'type': mbutils.extractFragment(artist.getType()),
1271 'ext:score': score,
1272 })
1273
1274 xml.elem('name', artist.getName())
1275 xml.elem('sort-name', artist.getSortName())
1276 xml.elem('disambiguation', artist.getDisambiguation())
1277 xml.elem('life-span', None, {
1278 'begin': artist.getBeginDate(),
1279 'end': artist.getEndDate(),
1280 })
1281
1282 if len(artist.getAliases()) > 0:
1283 xml.start('alias-list')
1284 for alias in artist.getAliases():
1285 xml.elem('alias', alias.getValue(), {
1286 'type': alias.getType(),
1287 'script': alias.getScript(),
1288 })
1289 xml.end()
1290
1291 if len(artist.getReleases()) > 0:
1292 xml.start('release-list')
1293 for release in artist.getReleases():
1294 self._writeRelease(xml, release)
1295 xml.end()
1296
1297 if len(artist.getReleaseGroups()) > 0:
1298 xml.start('release-group-list')
1299 for releaseGroup in artist.getReleaseGroups():
1300 self._writeReleaseGroup(xml, releaseGroup)
1301 xml.end()
1302
1303 self._writeRelationList(xml, artist)
1304 # TODO: extensions
1305
1306 xml.end()
1307
1308
1310 if release is None:
1311 return
1312
1313 types = [mbutils.extractFragment(t) for t in release.getTypes()]
1314 typesStr = None
1315 if len(types) > 0:
1316 typesStr = ' '.join(types)
1317
1318 xml.start('release', {
1319 'id': mbutils.extractUuid(release.getId()),
1320 'type': typesStr,
1321 'ext:score': score,
1322 })
1323
1324 xml.elem('title', release.getTitle())
1325 xml.elem('text-representation', None, {
1326 'language': release.getTextLanguage(),
1327 'script': release.getTextScript()
1328 })
1329 xml.elem('asin', release.getAsin())
1330
1331 self._writeArtist(xml, release.getArtist())
1332 self._writeReleaseGroup(xml, release.getReleaseGroup())
1333
1334 if len(release.getReleaseEvents()) > 0:
1335 xml.start('release-event-list')
1336 for event in release.getReleaseEvents():
1337 self._writeReleaseEvent(xml, event)
1338 xml.end()
1339
1340 if len(release.getDiscs()) > 0:
1341 xml.start('disc-list')
1342 for disc in release.getDiscs():
1343 xml.elem('disc', None, { 'id': disc.getId() })
1344 xml.end()
1345
1346 if len(release.getTracks()) > 0:
1347 # TODO: count attribute
1348 xml.start('track-list', {
1349 'offset': release.getTracksOffset()
1350 })
1351 for track in release.getTracks():
1352 self._writeTrack(xml, track)
1353 xml.end()
1354
1355 self._writeRelationList(xml, release)
1356 # TODO: extensions
1357
1358 xml.end()
1359
1361 if rg is None:
1362 return
1363
1364 xml.start('release-group', {
1365 'id': mbutils.extractUuid(rg.getId()),
1366 'type': mbutils.extractFragment(rg.getType()),
1367 'ext:score': score,
1368 })
1369
1370 xml.elem('title', rg.getTitle())
1371 self._writeArtist(xml, rg.getArtist())
1372
1373 if len(rg.getReleases()) > 0:
1374 xml.start('release-list')
1375 for rel in rg.getReleases():
1376 self._writeRelease(xml, rel)
1377 xml.end()
1378
1379 xml.end()
1380
1382 xml.start('event', {
1383 'country': event.getCountry(),
1384 'date': event.getDate(),
1385 'catalog-number': event.getCatalogNumber(),
1386 'barcode': event.getBarcode(),
1387 'format': event.getFormat()
1388 })
1389
1390 self._writeLabel(xml, event.getLabel())
1391
1392 xml.end()
1393
1394
1396 if track is None:
1397 return
1398
1399 xml.start('track', {
1400 'id': mbutils.extractUuid(track.getId()),
1401 'ext:score': score,
1402 })
1403
1404 xml.elem('title', track.getTitle())
1405 xml.elem('duration', str(track.getDuration()))
1406 self._writeArtist(xml, track.getArtist())
1407
1408 if len(track.getReleases()) > 0:
1409 # TODO: offset + count
1410 xml.start('release-list')
1411 for release in track.getReleases():
1412 self._writeRelease(xml, release)
1413 xml.end()
1414
1415 if len(track.getPuids()) > 0:
1416 xml.start('puid-list')
1417 for puid in track.getPuids():
1418 xml.elem('puid', None, { 'id': puid })
1419 xml.end()
1420
1421 self._writeRelationList(xml, track)
1422 # TODO: extensions
1423
1424 xml.end()
1425
1426
1428 if label is None:
1429 return
1430
1431 xml.start('label', {
1432 'id': mbutils.extractUuid(label.getId()),
1433 'type': mbutils.extractFragment(label.getType()),
1434 'ext:score': score,
1435 })
1436
1437 xml.elem('name', label.getName())
1438 xml.elem('sort-name', label.getSortName())
1439 xml.elem('disambiguation', label.getDisambiguation())
1440 xml.elem('life-span', None, {
1441 'begin': label.getBeginDate(),
1442 'end': label.getEndDate(),
1443 })
1444
1445 if len(label.getAliases()) > 0:
1446 xml.start('alias-list')
1447 for alias in label.getAliases():
1448 xml.elem('alias', alias.getValue(), {
1449 'type': alias.getType(),
1450 'script': alias.getScript(),
1451 })
1452 xml.end()
1453
1454 # TODO: releases, artists
1455
1456 self._writeRelationList(xml, label)
1457 # TODO: extensions
1458
1459 xml.end()
1460
1461
1463 for tt in entity.getRelationTargetTypes():
1464 xml.start('relation-list', {
1465 'target-type': mbutils.extractFragment(tt),
1466 })
1467 for rel in entity.getRelations(targetType=tt):
1468 self._writeRelation(xml, rel, tt)
1469 xml.end()
1470
1471
1473 relAttrs = ' '.join([mbutils.extractFragment(a)
1474 for a in rel.getAttributes()])
1475
1476 if relAttrs == '':
1477 relAttrs = None
1478
1479 attrs = {
1480 'type': mbutils.extractFragment(rel.getType()),
1481 'target': rel.getTargetId(),
1482 'direction': rel.getDirection(),
1483 'begin': rel.getBeginDate(),
1484 'end': rel.getBeginDate(),
1485 'attributes': relAttrs,
1486 }
1487
1488 if rel.getTarget() is None:
1489 xml.elem('relation', None, attrs)
1490 else:
1491 xml.start('relation', attrs)
1492 if targetType == NS_REL_1 + 'Artist':
1493 self._writeArtist(xml, rel.getTarget())
1494 elif targetType == NS_REL_1 + 'Release':
1495 self._writeRelease(xml, rel.getTarget())
1496 elif targetType == NS_REL_1 + 'Track':
1497 self._writeTrack(xml, rel.getTarget())
1498 xml.end()
1499
1500
1501 #
1502 # DOM Utilities
1503 #
1504
1506 """Checks if an xml.dom.Node and a given name and namespace match."""
1507
1508 if node.localName == name and node.namespaceURI == namespace:
1509 return True
1510 else:
1511 return False
1512
1513
1515 """Returns all direct child elements of the given xml.dom.Node."""
1516
1517 children = [ ]
1518 for node in parentNode.childNodes:
1519 if node.nodeType == node.ELEMENT_NODE:
1520 children.append(node)
1521
1522 return children
1523
1524
1526 """Returns the text content of the given xml.dom.Element.
1527
1528 This function simply fetches all contained text nodes, so the element
1529 should not contain child elements.
1530 """
1531 res = ''
1532 for node in element.childNodes:
1533 if node.nodeType == node.TEXT_NODE:
1534 res += node.data
1535
1536 if regex is None or re.match(regex, res):
1537 return res
1538 else:
1539 return default
1540
1541
1543 """Returns the text content of the given xml.dom.Element as an int."""
1544
1545 res = _getText(element)
1546
1547 if res is None:
1548 return None
1549
1550 try:
1551 return int(res)
1552 except ValueError:
1553 return None
1554
1555
1557 """Returns an attribute of the given element.
1558
1559 If there is no attribute with that name or the attribute doesn't
1560 match the regular expression, default is returned.
1561 """
1562 if element.hasAttributeNS(ns, attrName):
1563 content = element.getAttributeNS(ns, attrName)
1564
1565 if regex is None or re.match(regex, content):
1566 return content
1567 else:
1568 return default
1569 else:
1570 return default
1571
1572
1574 """Gets an incomplete date from an attribute."""
1575 return _getAttr(element, attrName, '^\d+(-\d\d)?(-\d\d)?$')
1576
1577
1579 """Gets an ID from an attribute and turns it into an absolute URI."""
1580 value = _getAttr(element, attrName)
1581
1582 return _makeAbsoluteUri('http://musicbrainz.org/' + typeName + '/', value)
1583
1584
1585
1587 """Gets an int from an attribute, or None."""
1588 try:
1589 val = int(_getAttr(element, attrName, ns=ns))
1590
1591 if max is None:
1592 max = val
1593
1594 if min <= val <= max:
1595 return val
1596 else:
1597 return None
1598 except ValueError:
1599 return None # raised if conversion to int fails
1600 except TypeError:
1601 return None # raised if no such attribute exists
1602
1603
1605 """Gets a list of URIs from an attribute."""
1606 if not element.hasAttribute(attrName):
1607 return [ ]
1608
1609 f = lambda x: x != ''
1610 uris = filter(f, re.split('\s+', element.getAttribute(attrName)))
1611
1612 m = lambda x: _makeAbsoluteUri(prefix, x)
1613 uris = map(m, uris)
1614
1615 return uris
1616
1617
1619 """Gets a URI from an attribute.
1620
1621 This also works for space-separated URI lists. In this case, the
1622 first URI is returned.
1623 """
1624 uris = _getUriListAttr(element, attrName, prefix)
1625 if len(uris) > 0:
1626 return uris[0]
1627 else:
1628 return None
1629
1630
1632 """Gets a boolean value from an attribute."""
1633 value = _getAttr(element, attrName)
1634 if value == 'true':
1635 return True
1636 elif value == 'false':
1637 return False
1638 else:
1639 return None
1640
1641
1643 """Gets the Relation reading direction from an attribute."""
1644 regex = '^\s*(' + '|'.join((
1645 model.Relation.DIR_FORWARD,
1646 model.Relation.DIR_BACKWARD)) + ')\s*$'
1647 return _getAttr(element, 'direction', regex, model.Relation.DIR_NONE)
1648
1649
1651 """Creates an absolute URI adding prefix, if necessary."""
1652 if uriStr is None:
1653 return None
1654
1655 (scheme, netloc, path, params, query, frag) = urlparse.urlparse(uriStr)
1656
1657 if scheme == '' and netloc == '':
1658 return prefix + uriStr
1659 else:
1660 return uriStr
1661
1662
1664 """Gets the resource type from a URI.
1665
1666 The resource type is the basename of the URI's path.
1667 """
1668 m = re.match('^' + NS_REL_1 + '(.*)$', uri)
1669
1670 if m:
1671 return m.group(1).lower()
1672 else:
1673 return None
1674
1675 # EOF
1676
| Home | Trees | Indices | Help |
|---|
| Generated by Epydoc 3.0.1 on Sat Nov 19 22:07:32 2011 | http://epydoc.sourceforge.net |