| Module | FeedTools::RetrievalHelper |
| In: |
lib/feed_tools/helpers/retrieval_helper.rb
|
Methods for pulling remote data
| ACCEPT_HEADER | = | "application/atom+xml,application/rdf+xml," + "application/rss+xml,application/x-netcdf,application/xml;" + "q=0.9,text/xml;q=0.2,*/*;q=0.1" | Stolen from the Universal Feed Parser |
Makes an HTTP GET request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.
# File lib/feed_tools/helpers/retrieval_helper.rb, line 250
250: def self.http_get(url, options={}, &block)
251: return FeedTools::RetrievalHelper.http_request(
252: :get, url, options, &block)
253: end
Makes an HTTP POST request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.
# File lib/feed_tools/helpers/retrieval_helper.rb, line 258
258: def self.http_post(url, options={}, &block)
259: return FeedTools::RetrievalHelper.http_request(
260: :post, url, options, &block)
261: end
Makes an HTTP request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.
# File lib/feed_tools/helpers/retrieval_helper.rb, line 43
43: def self.http_request(http_operation, url, options={}, &block)
44: response = nil
45:
46: options = {
47: :feed_object => nil,
48: :form_data => nil,
49: :request_headers => {},
50: :follow_redirects => true,
51: :redirect_limit => 10,
52: :response_chain => []
53: }.merge(options)
54:
55: if options[:redirect_limit] == 0
56: raise FeedAccessError, 'Redirect too deep'
57: end
58:
59: if options[:response_chain].blank? ||
60: !options[:response_chain].kind_of?(Array)
61: options[:response_chain] = []
62: end
63:
64: if !options[:request_headers].kind_of?(Hash)
65: options[:request_headers] = {}
66: end
67: if !options[:form_data].kind_of?(Hash)
68: options[:form_data] = nil
69: end
70:
71: if options[:request_headers].blank? && options[:feed_object] != nil
72: options[:request_headers] = {}
73: unless options[:feed_object].http_headers.nil?
74: unless options[:feed_object].http_headers['etag'].nil?
75: options[:request_headers]["If-None-Match"] =
76: options[:feed_object].http_headers['etag']
77: end
78: unless options[:feed_object].http_headers['last-modified'].nil?
79: options[:request_headers]["If-Modified-Since"] =
80: options[:feed_object].http_headers['last-modified']
81: end
82: end
83: unless options[:feed_object].configurations[:user_agent].nil?
84: options[:request_headers]["User-Agent"] =
85: options[:feed_object].configurations[:user_agent]
86: end
87: end
88: if options[:request_headers]["Accept"].nil?
89: options[:request_headers]["Accept"] =
90: FeedTools::RetrievalHelper::ACCEPT_HEADER
91: end
92: if options[:request_headers]["User-Agent"].nil?
93: options[:request_headers]["User-Agent"] =
94: FeedTools.configurations[:user_agent]
95: end
96:
97: uri = nil
98: begin
99: uri = URI.parse(url)
100: rescue URI::InvalidURIError
101: # Uh, maybe try to fix it?
102: uri = URI.parse(FeedTools::UriHelper.normalize_url(url))
103: end
104:
105: begin
106: proxy_address = nil
107: proxy_port = nil
108: proxy_user = nil
109: proxy_password = nil
110:
111: auth_user = nil
112: auth_password = nil
113: auth_scheme = nil
114:
115: if options[:feed_object] != nil
116: proxy_address =
117: options[:feed_object].configurations[:proxy_address] || nil
118: proxy_port =
119: options[:feed_object].configurations[:proxy_port].to_i || nil
120: proxy_user =
121: options[:feed_object].configurations[:proxy_user] || nil
122: proxy_password =
123: options[:feed_object].configurations[:proxy_password] || nil
124:
125: auth_user =
126: options[:feed_object].configurations[:auth_user] || nil
127: auth_password =
128: options[:feed_object].configurations[:auth_password] || nil
129: auth_scheme =
130: options[:feed_object].configurations[:auth_scheme] || nil
131: end
132:
133: if (auth_user &&
134: (auth_scheme == nil || auth_scheme.to_s.to_sym == :basic))
135: options[:request_headers]["Authorization"] =
136: "Basic " + [
137: "#{auth_user}:#{auth_password}"
138: ].pack('m').delete("\r\n")
139: end
140:
141: # No need to check for nil
142: http = Net::HTTP::Proxy(
143: proxy_address, proxy_port, proxy_user, proxy_password).new(
144: uri.host, (uri.port or 80))
145:
146: if options[:feed_object] != nil &&
147: options[:feed_object].configurations[:http_timeout] != nil
148: http.open_timeout =
149: options[:feed_object].configurations[:http_timeout].to_f
150: elsif FeedTools.configurations[:http_timeout] != nil
151: http.open_timeout = FeedTools.configurations[:http_timeout].to_f
152: end
153: if http.open_timeout != nil && http.open_timeout == 0
154: http.open_timeout = nil
155: end
156:
157: path = uri.path
158: path += ('?' + uri.query) if uri.query
159:
160: request_params = [path, options[:request_headers]]
161: if http_operation == :post
162: options[:form_data] = {} if options[:form_data].blank?
163: request_params << options[:form_data]
164: end
165: Thread.pass
166: response = http.send(http_operation, *request_params)
167: Thread.pass
168:
169: case response
170: when Net::HTTPSuccess
171: if options[:feed_object] != nil
172: # We've reached the final destination, process all previous
173: # redirections, and see if we need to update the url.
174: for redirected_response in options[:response_chain]
175: if redirected_response.last.code.to_i == 301
176: # Reset the cache object or we may get duplicate entries
177:
178: # TODO: verify this line is necessary!
179: #=============================================================================
180: options[:feed_object].cache_object = nil
181:
182: options[:feed_object].href =
183: redirected_response.last['location']
184: else
185: # Jump out as soon as we hit anything that isn't a
186: # permanently moved redirection.
187: break
188: end
189: end
190: end
191: when Net::HTTPNotModified
192: # Do nothing, we just don't want it processed as a redirection
193: when Net::HTTPRedirection
194: if response['location'].nil?
195: raise FeedAccessError,
196: "No location to redirect to supplied for " + response.code
197: end
198: options[:response_chain] << [url, response]
199:
200: redirected_location = response['location']
201: redirected_location = FeedTools::UriHelper.resolve_relative_uri(
202: redirected_location, [uri.to_s])
203:
204: if options[:response_chain].assoc(redirected_location) != nil
205: raise FeedAccessError,
206: "Redirection loop detected: #{redirected_location}"
207: end
208:
209: # Let the block handle redirects
210: follow_redirect = true
211: if block != nil
212: follow_redirect = block.call(redirected_location, response)
213: end
214:
215: if follow_redirect
216: response = FeedTools::RetrievalHelper.http_request(
217: http_operation,
218: redirected_location,
219: options.merge(
220: {:redirect_limit => (options[:redirect_limit] - 1)}),
221: &block)
222: end
223: end
224: rescue SocketError
225: raise FeedAccessError, 'Socket error prevented feed retrieval'
226: rescue Timeout::Error, Errno::ETIMEDOUT
227: raise FeedAccessError, 'Timeout while attempting to retrieve feed'
228: rescue Errno::ENETUNREACH
229: raise FeedAccessError, 'Network was unreachable'
230: rescue Errno::ECONNRESET
231: raise FeedAccessError, 'Connection was reset by peer'
232: end
233:
234: if response != nil
235: class << response
236: def response_chain
237: return @response_chain
238: end
239: end
240: response.instance_variable_set("@response_chain",
241: options[:response_chain])
242: end
243:
244: return response
245: end
Makes an HTTP HEAD request and returns the HTTP response. Optionally takes a block that determines whether or not to follow a redirect. The block will be passed the HTTP redirect response as an argument.
# File lib/feed_tools/helpers/retrieval_helper.rb, line 266
266: def http_head(url, options={}, &block)
267: return FeedTools::RetrievalHelper.http_request(
268: :head, url, options, &block)
269: end