This class encapsulates an HTML page. If Mechanize finds a content type of ‘text/html’, this class will be instantiated and returned.
require 'rubygems'
require 'mechanize'
agent = Mechanize.new
agent.get('http://google.com/').class #=> Mechanize::Page
| pretty_inspect | -> | inspect |
| mech | [RW] |
# File lib/mechanize/page.rb, line 26
26: def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
27: @encoding = nil
28:
29: method = response.respond_to?(:each_header) ? :each_header : :each
30: response.send(method) do |header,v|
31: next unless v =~ /charset/i
32: encoding = v[/charset=([^; ]+)/, 1]
33: @encoding = encoding unless encoding == 'none'
34: end
35:
36: # Force the encoding to be 8BIT so we can perform regular expressions.
37: # We'll set it to the detected encoding later
38: body.force_encoding('ASCII-8BIT') if body && body.respond_to?(:force_encoding)
39:
40: @encoding ||= Util.detect_charset(body)
41:
42: super(uri, response, body, code)
43: @mech ||= mech
44:
45: @encoding = nil if html_body =~ /<meta[^>]*charset[^>]*>/i
46:
47: raise Mechanize::ContentTypeError.new(response['content-type']) unless
48: response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
49: @parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
50: end
Return a list of all base tags
# File lib/mechanize/page.rb, line 243
243: def bases
244: @bases ||=
245: search('base').map { |node| Base.new(node, @mech, self) }
246: end
Get the content type
# File lib/mechanize/page.rb, line 92
92: def content_type
93: response['content-type']
94: end
# File lib/mechanize/page.rb, line 72
72: def encoding
73: parser.respond_to?(:encoding) ? parser.encoding : nil
74: end
# File lib/mechanize/page.rb, line 58
58: def encoding=(encoding)
59: @encoding = encoding
60:
61: if @parser
62: parser_encoding = @parser.encoding
63: if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
64: # lazy reinitialize the parser with the new encoding
65: @parser = nil
66: end
67: end
68:
69: encoding
70: end
Return a list of all form tags
# File lib/mechanize/page.rb, line 217
217: def forms
218: @forms ||= search('form').map do |html_form|
219: form = Form.new(html_form, @mech, self)
220: form.action ||= @uri.to_s
221: form
222: end
223: end
Return a list of all frame tags
# File lib/mechanize/page.rb, line 250
250: def frames
251: @frames ||=
252: search('frame').map { |node| Frame.new(node, @mech, self) }
253: end
Return a list of all iframe tags
# File lib/mechanize/page.rb, line 257
257: def iframes
258: @iframes ||=
259: search('iframe').map { |node| Frame.new(node, @mech, self) }
260: end
# File lib/mechanize/page.rb, line 269
269: def image_urls
270: @image_urls ||= images.map(&:url).uniq
271: end
Return a list of all img tags
# File lib/mechanize/page.rb, line 264
264: def images
265: @images ||=
266: search('img').map { |node| Image.new(node, self) }
267: end
Return a list of all label tags
# File lib/mechanize/page.rb, line 275
275: def labels
276: @labels ||=
277: search('label').map { |node| Label.new(node, self) }
278: end
# File lib/mechanize/page.rb, line 280
280: def labels_hash
281: unless @labels_hash
282: hash = {}
283: labels.each do |label|
284: hash[label.node['for']] = label if label.for
285: end
286: @labels_hash = hash
287: end
288: return @labels_hash
289: end
Return a list of all link and area tags
# File lib/mechanize/page.rb, line 207
207: def links
208: @links ||= %w{ a area }.map do |tag|
209: search(tag).map do |node|
210: Link.new(node, @mech, self)
211: end
212: end.flatten
213: end
Return a list of all meta tags
# File lib/mechanize/page.rb, line 227
227: def meta
228: @meta ||= search('head > meta').map do |node|
229: next unless node['http-equiv'] && node['content']
230: (equiv, content) = node['http-equiv'], node['content']
231: if equiv && equiv.downcase == 'refresh'
232: Meta.parse(content, uri) do |delay, href|
233: node['delay'] = delay
234: node['href'] = href
235: Meta.new(node, @mech, self)
236: end
237: end
238: end.compact
239: end
# File lib/mechanize/page.rb, line 76
76: def parser
77: return @parser if @parser
78:
79: if body && response
80: if mech.html_parser == Nokogiri::HTML
81: @parser = mech.html_parser.parse(html_body, nil, @encoding)
82: else
83: @parser = mech.html_parser.parse(html_body)
84: end
85: end
86:
87: @parser
88: end
# File lib/mechanize/page.rb, line 52
52: def title
53: @title ||= if parser && search('title').inner_text.length > 0
54: search('title').inner_text
55: end
56: end