| Class | ::Utils::HttpUtil |
| In: |
lib/rbot/core/utils/httputil.rb
|
| Parent: | Object |
class for making http requests easier (mainly for plugins to use) this class can check the bot proxy configuration to determine if a proxy needs to be used, which includes support for per-url proxy configuration.
Create the HttpUtil instance, associating it with Bot bot
# File lib/rbot/core/utils/httputil.rb, line 291
291: def initialize(bot)
292: @bot = bot
293: @cache = Hash.new
294: @headers = {
295: 'Accept-Charset' => 'utf-8;q=1.0, *;q=0.8',
296: 'Accept-Encoding' => 'gzip;q=1, deflate;q=1, identity;q=0.8, *;q=0.2',
297: 'User-Agent' =>
298: "rbot http util #{$version} (#{Irc::Bot::SOURCE_URL})"
299: }
300: debug "starting http cache cleanup timer"
301: @timer = @bot.timer.add(300) {
302: self.remove_stale_cache unless @bot.config['http.no_expire_cache']
303: }
304: end
| uri: | uri to query (URI object or String) |
Simple GET request, returns (if possible) response body following redirs and caching if requested, yielding the actual response(s) to the optional block. See get_response for details on the supported options
# File lib/rbot/core/utils/httputil.rb, line 635
635: def get(uri, options = {}, &block) # :yields: resp
636: begin
637: resp = get_response(uri, options, &block)
638: raise "http error: #{resp}" unless Net::HTTPOK === resp ||
639: Net::HTTPPartialContent === resp
640: return resp.body
641: rescue Exception => e
642: error e
643: end
644: return nil
645: end
| uri: | uri to query (URI object or String) |
| nbytes: | number of bytes to get |
Partial GET request, returns (if possible) the first nbytes bytes of the response body, following redirs and caching if requested, yielding the actual response(s) to the optional block. See get_response for details on the supported options
# File lib/rbot/core/utils/httputil.rb, line 693
693: def get_partial(uri, nbytes = @bot.config['http.info_bytes'], options = {}, &block) # :yields: resp
694: opts = {:range => "bytes=0-#{nbytes}"}.merge(options)
695: return get(uri, opts, &block)
696: end
| uri: | URI to create a proxy for |
Return a net/http Proxy object, configured for proxying based on the bot‘s proxy configuration. See proxy_required for more details on this.
# File lib/rbot/core/utils/httputil.rb, line 362
362: def get_proxy(uri, options = {})
363: opts = {
364: :read_timeout => @bot.config["http.read_timeout"],
365: :open_timeout => @bot.config["http.open_timeout"]
366: }.merge(options)
367:
368: proxy = nil
369: proxy_host = nil
370: proxy_port = nil
371: proxy_user = nil
372: proxy_pass = nil
373:
374: if @bot.config["http.use_proxy"]
375: if (ENV['http_proxy'])
376: proxy = URI.parse ENV['http_proxy'] rescue nil
377: end
378: if (@bot.config["http.proxy_uri"])
379: proxy = URI.parse @bot.config["http.proxy_uri"] rescue nil
380: end
381: if proxy
382: debug "proxy is set to #{proxy.host} port #{proxy.port}"
383: if proxy_required(uri)
384: proxy_host = proxy.host
385: proxy_port = proxy.port
386: proxy_user = @bot.config["http.proxy_user"]
387: proxy_pass = @bot.config["http.proxy_pass"]
388: end
389: end
390: end
391:
392: h = Net::HTTP.new(uri.host, uri.port, proxy_host, proxy_port, proxy_user, proxy_pass)
393: h.use_ssl = true if uri.scheme == "https"
394:
395: h.read_timeout = opts[:read_timeout]
396: h.open_timeout = opts[:open_timeout]
397: return h
398: end
| uri: | uri to query (URI object or String) |
Generic http transaction method. It will return a Net::HTTPResponse object or raise an exception
If a block is given, it will yield the response (see :yield option)
Currently supported options:
| method: | request method [:get (default), :post or :head] |
| open_timeout: | open timeout for the proxy |
| read_timeout: | read timeout for the proxy |
| cache: | should we cache results? |
| yield: | if :final [default], calls the block for the response object; if :all, call the block for all intermediate redirects, too |
| max_redir: | how many redirects to follow before raising the exception if -1, don‘t follow redirects, just return them |
| range: | make a ranged request (usually GET). accepts a string for HTTP/1.1 "Range:" header (i.e. "bytes=0-1000") |
| body: | request body (usually for POST requests) |
| headers: | additional headers to be set for the request. Its value must be a Hash in the form { ‘Header’ => ‘value’ } |
# File lib/rbot/core/utils/httputil.rb, line 517
517: def get_response(uri_or_s, options = {}, &block) # :yields: resp
518: uri = uri_or_s.kind_of?(URI) ? uri_or_s : URI.parse(uri_or_s.to_s)
519: unless URI::HTTP === uri
520: if uri.scheme
521: raise "#{uri.scheme.inspect} URI scheme is not supported"
522: else
523: raise "don't know what to do with #{uri.to_s.inspect}"
524: end
525: end
526:
527: opts = {
528: :max_redir => @bot.config['http.max_redir'],
529: :yield => :final,
530: :cache => true,
531: :method => :GET
532: }.merge(options)
533:
534: resp = nil
535:
536: req_class = case opts[:method].to_s.downcase.intern
537: when :head, "net::http::head""net::http::head"
538: opts[:max_redir] = -1
539: Net::HTTP::Head
540: when :get, "net::http::get""net::http::get"
541: Net::HTTP::Get
542: when :post, "net::http::post""net::http::post"
543: opts[:cache] = false
544: opts[:body] or raise 'post request w/o a body?'
545: warning "refusing to cache POST request" if options[:cache]
546: Net::HTTP::Post
547: else
548: warning "unsupported method #{opts[:method]}, doing GET"
549: Net::HTTP::Get
550: end
551:
552: if req_class != Net::HTTP::Get && opts[:range]
553: warning "can't request ranges for #{req_class}"
554: opts.delete(:range)
555: end
556:
557: cache_key = "#{opts[:range]}|#{req_class}|#{uri.to_s}"
558:
559: if req_class != Net::HTTP::Get && req_class != Net::HTTP::Head
560: if opts[:cache]
561: warning "can't cache #{req_class.inspect} requests, working w/o cache"
562: opts[:cache] = false
563: end
564: end
565:
566: debug "get_response(#{uri}, #{opts.inspect})"
567:
568: cached = @cache[cache_key]
569:
570: if opts[:cache] && cached
571: debug "got cached"
572: if !cached.expired?
573: debug "using cached"
574: cached.use
575: return handle_response(uri, cached.response, opts, &block)
576: end
577: end
578:
579: headers = @headers.dup.merge(opts[:headers] || {})
580: headers['Range'] = opts[:range] if opts[:range]
581: headers['Authorization'] = opts[:auth_head] if opts[:auth_head]
582:
583: if opts[:cache] && cached && (req_class == Net::HTTP::Get)
584: cached.setup_headers headers
585: end
586:
587: req = req_class.new(uri.request_uri, headers)
588: if uri.user && uri.password
589: req.basic_auth(uri.user, uri.password)
590: opts[:auth_head] = req['Authorization']
591: end
592: req.body = opts[:body] if req_class == Net::HTTP::Post
593: debug "prepared request: #{req.to_hash.inspect}"
594:
595: begin
596: get_proxy(uri, opts).start do |http|
597: http.request(req) do |resp|
598: resp['x-rbot-location'] = uri.to_s
599: if Net::HTTPNotModified === resp
600: debug "not modified"
601: begin
602: cached.revalidate(resp)
603: rescue Exception => e
604: error e
605: end
606: debug "reusing cached"
607: resp = cached.response
608: elsif Net::HTTPServerError === resp || Net::HTTPClientError === resp
609: debug "http error, deleting cached obj" if cached
610: @cache.delete(cache_key)
611: end
612:
613: begin
614: return handle_response(uri, resp, opts, &block)
615: ensure
616: if cached = CachedObject.maybe_new(resp) rescue nil
617: debug "storing to cache"
618: @cache[cache_key] = cached
619: end
620: end
621: end
622: end
623: rescue Exception => e
624: error e
625: raise e.message
626: end
627: end
Internal method used to hanlde response resp received when making a request for URI uri.
It follows redirects, optionally yielding them if option :yield is :all.
Also yields and returns the final resp.
# File lib/rbot/core/utils/httputil.rb, line 407
407: def handle_response(uri, resp, opts, &block) # :yields: resp
408: if Net::HTTPRedirection === resp && opts[:max_redir] >= 0
409: if resp.key?('location')
410: raise 'Too many redirections' if opts[:max_redir] <= 0
411: yield resp if opts[:yield] == :all && block_given?
412: # some servers actually provide unescaped location, e.g.
413: # http://ulysses.soup.io/post/60734021/Image%20curve%20ball
414: # rediects to something like
415: # http://ulysses.soup.io/post/60734021/Image curve ball?sessid=8457b2a3752085cca3fb1d79b9965446
416: # causing the URI parser to (obviously) complain. We cannot just
417: # escape blindly, as this would make a mess of already-escaped
418: # locations, so we only do it if the URI.parse fails
419: loc = resp['location']
420: escaped = false
421: debug "redirect location: #{loc.inspect}"
422: begin
423: new_loc = URI.join(uri.to_s, loc) rescue URI.parse(loc)
424: rescue
425: if escaped
426: raise $!
427: else
428: loc = URI.escape(loc)
429: escaped = true
430: debug "escaped redirect location: #{loc.inspect}"
431: retry
432: end
433: end
434: new_opts = opts.dup
435: new_opts[:max_redir] -= 1
436: case opts[:method].to_s.downcase.intern
437: when :post, "net::http::post""net::http::post"
438: new_opts[:method] = :get
439: end
440: if resp['set-cookie']
441: debug "set cookie request for #{resp['set-cookie']}"
442: cookie, cookie_flags = (resp['set-cookie']+'; ').split('; ', 2)
443: domain = uri.host
444: cookie_flags.scan(/(\S+)=(\S+);/) { |key, val|
445: if key.intern == :domain
446: domain = val
447: break
448: end
449: }
450: debug "cookie domain #{domain} / #{new_loc.host}"
451: if new_loc.host.rindex(domain) == new_loc.host.length - domain.length
452: debug "setting cookie"
453: new_opts[:headers] ||= Hash.new
454: new_opts[:headers]['Cookie'] = cookie
455: else
456: debug "cookie is for another domain, ignoring"
457: end
458: end
459: debug "following the redirect to #{new_loc}"
460: return get_response(new_loc, new_opts, &block)
461: else
462: warning ":| redirect w/o location?"
463: end
464: end
465: class << resp
466: undef_method :body
467: alias :body :cooked_body
468: end
469: unless resp['content-type']
470: debug "No content type, guessing"
471: resp['content-type'] =
472: case resp['x-rbot-location']
473: when /.html?$/i
474: 'text/html'
475: when /.xml$/i
476: 'application/xml'
477: when /.xhtml$/i
478: 'application/xml+xhtml'
479: when /.(gif|png|jpe?g|jp2|tiff?)$/i
480: "image/#{$1.sub(/^jpg$/,'jpeg').sub(/^tif$/,'tiff')}"
481: else
482: 'application/octetstream'
483: end
484: end
485: if block_given?
486: yield(resp)
487: else
488: # Net::HTTP wants us to read the whole body here
489: resp.raw_body
490: end
491: return resp
492: end
| uri: | uri to query (URI object or String) |
Simple HEAD request, returns (if possible) response head following redirs and caching if requested, yielding the actual response(s) to the optional block. See get_response for details on the supported options
# File lib/rbot/core/utils/httputil.rb, line 653
653: def head(uri, options = {}, &block) # :yields: resp
654: opts = {:method => :head}.merge(options)
655: begin
656: resp = get_response(uri, opts, &block)
657: # raise "http error #{resp}" if Net::HTTPClientError === resp ||
658: # Net::HTTPServerError == resp
659: return resp
660: rescue Exception => e
661: error e
662: end
663: return nil
664: end
| uri: | uri to query (URI object or String) |
| data: | body of the POST |
Simple POST request, returns (if possible) response following redirs and caching if requested, yielding the response(s) to the optional block. See get_response for details on the supported options
# File lib/rbot/core/utils/httputil.rb, line 673
673: def post(uri, data, options = {}, &block) # :yields: resp
674: opts = {:method => :post, :body => data, :cache => false}.merge(options)
675: begin
676: resp = get_response(uri, opts, &block)
677: raise 'http error' unless Net::HTTPOK === resp or Net::HTTPCreated === resp
678: return resp
679: rescue Exception => e
680: error e
681: end
682: return nil
683: end
This method checks if a proxy is required to access uri, by looking at the values of config values +http.proxy_include+ and +http.proxy_exclude+.
Each of these config values, if set, should be a Regexp the server name and IP address should be checked against.
# File lib/rbot/core/utils/httputil.rb, line 318
318: def proxy_required(uri)
319: use_proxy = true
320: if @bot.config["http.proxy_exclude"].empty? && @bot.config["http.proxy_include"].empty?
321: return use_proxy
322: end
323:
324: list = [uri.host]
325: begin
326: list.concat Resolv.getaddresses(uri.host)
327: rescue StandardError => err
328: warning "couldn't resolve host uri.host"
329: end
330:
331: unless @bot.config["http.proxy_exclude"].empty?
332: re = @bot.config["http.proxy_exclude"].collect{|r| Regexp.new(r)}
333: re.each do |r|
334: list.each do |item|
335: if r.match(item)
336: use_proxy = false
337: break
338: end
339: end
340: end
341: end
342: unless @bot.config["http.proxy_include"].empty?
343: re = @bot.config["http.proxy_include"].collect{|r| Regexp.new(r)}
344: re.each do |r|
345: list.each do |item|
346: if r.match(item)
347: use_proxy = true
348: break
349: end
350: end
351: end
352: end
353: debug "using proxy for uri #{uri}?: #{use_proxy}"
354: return use_proxy
355: end
# File lib/rbot/core/utils/httputil.rb, line 698
698: def remove_stale_cache
699: debug "Removing stale cache"
700: now = Time.new
701: max_last = @bot.config['http.expire_time'] * 60
702: max_first = @bot.config['http.max_cache_time'] * 60
703: debug "#{@cache.size} pages before"
704: begin
705: @cache.reject! { |k, val|
706: (now - val.last_used > max_last) || (now - val.first_used > max_first)
707: }
708: rescue => e
709: error "Failed to remove stale cache: #{e.pretty_inspect}"
710: end
711: debug "#{@cache.size} pages after"
712: end