| Class | HTML5::XMLParser |
| In: |
lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb
|
| Parent: | HTMLParser |
liberal XML parser
# File lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb, line 22
22: def initialize(options = {})
23: super options
24: @phases[:initial] = XmlRootPhase.new(self, @tree)
25: end
# File lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb, line 27
27: def normalize_token(token)
28: case token[:type]
29: when :StartTag, :EmptyTag
30: # We need to remove the duplicate attributes and convert attributes
31: # to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}
32:
33: token[:data] = Hash[*token[:data].reverse.flatten]
34:
35: # For EmptyTags, process both a Start and an End tag
36: if token[:type] == :EmptyTag
37: save = @tokenizer.content_model_flag
38: @phase.processStartTag(token[:name], token[:data])
39: @tokenizer.content_model_flag = save
40: token[:data] = {}
41: token[:type] = :EndTag
42: end
43:
44: when :Characters
45: # un-escape RCDATA_ELEMENTS (e.g. style, script)
46: if @tokenizer.content_model_flag == :CDATA
47: token[:data] = token[:data].
48: gsub('<','<').gsub('>','>').gsub('&','&')
49: end
50:
51: when :EndTag
52: if token[:data]
53: parse_error("attributes-in-end-tag")
54: end
55:
56: when :Comment
57: # Rescue CDATA from the comments
58: if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
59: token[:type] = :Characters
60: token[:data] = token[:data][7 ... -2]
61: end
62: end
63:
64: return token
65: end