| Class | HTML5::Filters::OptionalTagFilter |
| In: |
lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb
|
| Parent: | Base |
# File lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb, line 18
18: def each
19: slider do |previous, token, nexttok|
20: type = token[:type]
21: if type == :StartTag
22: yield token unless token[:data].empty? and is_optional_start(token[:name], previous, nexttok)
23: elsif type == :EndTag
24: yield token unless is_optional_end(token[:name], nexttok)
25: else
26: yield token
27: end
28: end
29: end
# File lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb, line 90
90: def is_optional_end(tagname, nexttok)
91: type = nexttok ? nexttok[:type] : nil
92: if %w[html head body].include?(tagname)
93: # An html element's end tag may be omitted if the html element
94: # is not immediately followed by a space character or a comment.
95: return ![:Comment, :SpaceCharacters].include?(type)
96: elsif %w[li optgroup option tr].include?(tagname)
97: # A li element's end tag may be omitted if the li element is
98: # immediately followed by another li element or if there is
99: # no more content in the parent element.
100: # An optgroup element's end tag may be omitted if the optgroup
101: # element is immediately followed by another optgroup element,
102: # or if there is no more content in the parent element.
103: # An option element's end tag may be omitted if the option
104: # element is immediately followed by another option element,
105: # or if there is no more content in the parent element.
106: # A tr element's end tag may be omitted if the tr element is
107: # immediately followed by another tr element, or if there is
108: # no more content in the parent element.
109: if type == :StartTag
110: return nexttok[:name] == tagname
111: else
112: return type == :EndTag || type == nil
113: end
114: elsif %w(dt dd).include?(tagname)
115: # A dt element's end tag may be omitted if the dt element is
116: # immediately followed by another dt element or a dd element.
117: # A dd element's end tag may be omitted if the dd element is
118: # immediately followed by another dd element or a dt element,
119: # or if there is no more content in the parent element.
120: if type == :StartTag
121: return %w(dt dd).include?(nexttok[:name])
122: elsif tagname == 'dd'
123: return type == :EndTag || type == nil
124: else
125: return false
126: end
127: elsif tagname == 'p'
128: # A p element's end tag may be omitted if the p element is
129: # immediately followed by an address, blockquote, dl, fieldset,
130: # form, h1, h2, h3, h4, h5, h6, hr, menu, ol, p, pre, table,
131: # or ul element, or if there is no more content in the parent
132: # element.
133: if type == :StartTag
134: return %w(address blockquote dl fieldset form h1 h2 h3 h4 h5
135: h6 hr menu ol p pre table ul).include?(nexttok[:name])
136: else
137: return type == :EndTag || type == nil
138: end
139: elsif tagname == 'colgroup'
140: # A colgroup element's end tag may be omitted if the colgroup
141: # element is not immediately followed by a space character or
142: # a comment.
143: if [:Comment, :SpaceCharacters].include?(type)
144: return false
145: elsif type == :StartTag
146: # XXX: we also look for an immediately following colgroup
147: # element. See is_optional_start.
148: return nexttok[:name] != 'colgroup'
149: else
150: return true
151: end
152: elsif %w(thead tbody).include? tagname
153: # A thead element's end tag may be omitted if the thead element
154: # is immediately followed by a tbody or tfoot element.
155: # A tbody element's end tag may be omitted if the tbody element
156: # is immediately followed by a tbody or tfoot element, or if
157: # there is no more content in the parent element.
158: # A tfoot element's end tag may be omitted if the tfoot element
159: # is immediately followed by a tbody element, or if there is no
160: # more content in the parent element.
161: # XXX: we never omit the end tag when the following element is
162: # a tbody. See is_optional_start.
163: if type == :StartTag
164: return %w(tbody tfoot).include?(nexttok[:name])
165: elsif tagname == 'tbody'
166: return (type == :EndTag or type == nil)
167: else
168: return false
169: end
170: elsif tagname == 'tfoot'
171: # A tfoot element's end tag may be omitted if the tfoot element
172: # is immediately followed by a tbody element, or if there is no
173: # more content in the parent element.
174: # XXX: we never omit the end tag when the following element is
175: # a tbody. See is_optional_start.
176: if type == :StartTag
177: return nexttok[:name] == 'tbody'
178: else
179: return type == :EndTag || type == nil
180: end
181: elsif %w(td th).include? tagname
182: # A td element's end tag may be omitted if the td element is
183: # immediately followed by a td or th element, or if there is
184: # no more content in the parent element.
185: # A th element's end tag may be omitted if the th element is
186: # immediately followed by a td or th element, or if there is
187: # no more content in the parent element.
188: if type == :StartTag
189: return %w(td th).include?(nexttok[:name])
190: else
191: return type == :EndTag || type == nil
192: end
193: end
194: return false
195: end
# File lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb, line 31
31: def is_optional_start(tagname, previous, nexttok)
32: type = nexttok ? nexttok[:type] : nil
33: if tagname == 'html'
34: # An html element's start tag may be omitted if the first thing
35: # inside the html element is not a space character or a comment.
36: return ![:Comment, :SpaceCharacters].include?(type)
37: elsif tagname == 'head'
38: # A head element's start tag may be omitted if the first thing
39: # inside the head element is an element.
40: return type == :StartTag
41: elsif tagname == 'body'
42: # A body element's start tag may be omitted if the first thing
43: # inside the body element is not a space character or a comment,
44: # except if the first thing inside the body element is a script
45: # or style element and the node immediately preceding the body
46: # element is a head element whose end tag has been omitted.
47: if [:Comment, :SpaceCharacters].include?(type)
48: return false
49: elsif type == :StartTag
50: # XXX: we do not look at the preceding event, so we never omit
51: # the body element's start tag if it's followed by a script or
52: # a style element.
53: return !%w[script style].include?(nexttok[:name])
54: else
55: return true
56: end
57: elsif tagname == 'colgroup'
58: # A colgroup element's start tag may be omitted if the first thing
59: # inside the colgroup element is a col element, and if the element
60: # is not immediately preceeded by another colgroup element whose
61: # end tag has been omitted.
62: if type == :StartTag
63: # XXX: we do not look at the preceding event, so instead we never
64: # omit the colgroup element's end tag when it is immediately
65: # followed by another colgroup element. See is_optional_end.
66: return nexttok[:name] == "col"
67: else
68: return false
69: end
70: elsif tagname == 'tbody'
71: # A tbody element's start tag may be omitted if the first thing
72: # inside the tbody element is a tr element, and if the element is
73: # not immediately preceeded by a tbody, thead, or tfoot element
74: # whose end tag has been omitted.
75: if type == :StartTag
76: # omit the thead and tfoot elements' end tag when they are
77: # immediately followed by a tbody element. See is_optional_end.
78: if previous and previous[:type] == :EndTag && %w(tbody thead tfoot).include?(previous[:name])
79: return false
80: end
81:
82: return nexttok[:name] == 'tr'
83: else
84: return false
85: end
86: end
87: return false
88: end
# File lib/feed_tools/vendor/html5/lib/html5/filters/optionaltags.rb, line 8
8: def slider
9: previous1 = previous2 = nil
10: __getobj__.each do |token|
11: yield previous2, previous1, token if previous1 != nil
12: previous2 = previous1
13: previous1 = token
14: end
15: yield previous2, previous1, nil
16: end