| Module | FeedTools::UriHelper |
| In: |
lib/feed_tools/helpers/uri_helper.rb
|
Generic url processing methods needed in numerous places throughout FeedTools
Converts a url into a tag uri
# File lib/feed_tools/helpers/uri_helper.rb, line 176
176: def self.build_tag_uri(url, date)
177: unless url.kind_of? String
178: raise ArgumentError, "Expected String, got #{url.class.name}"
179: end
180: unless date.kind_of? Time
181: raise ArgumentError, "Expected Time, got #{date.class.name}"
182: end
183: tag_uri = normalize_url(url)
184: unless FeedTools::UriHelper.is_uri?(tag_uri)
185: raise ArgumentError, "Must supply a valid URL."
186: end
187: host = URI.parse(tag_uri).host
188: tag_uri.gsub!(/^(http|ftp|file):\/*/, "")
189: tag_uri.gsub!(/#/, "/")
190: tag_uri = "tag:#{host},#{date.strftime('%Y-%m-%d')}:" +
191: "#{tag_uri[(tag_uri.index(host) + host.size)..-1]}"
192: return tag_uri
193: end
Converts a url into a urn:uuid: uri
# File lib/feed_tools/helpers/uri_helper.rb, line 196
196: def self.build_urn_uri(url)
197: unless url.kind_of? String
198: raise ArgumentError, "Expected String, got #{url.class.name}"
199: end
200: normalized_url = normalize_url(url)
201: require 'uuidtools'
202: return UUID.sha1_create(UUID_URL_NAMESPACE, normalized_url).to_uri.to_s
203: end
Returns true if the idn module can be used.
# File lib/feed_tools/helpers/uri_helper.rb, line 33
33: def self.idn_enabled?
34: # This is an override variable to keep idn from being used even if it
35: # is available.
36: if FeedTools.configurations[:idn_enabled] == false
37: return false
38: end
39: if @idn_enabled.nil? || @idn_enabled == false
40: @idn_enabled = false
41: begin
42: require 'idn'
43: if IDN::Idna.toASCII('http://www.詹姆斯.com/') ==
44: "http://www.xn--8ws00zhy3a.com/"
45: @idn_enabled = true
46: else
47: @idn_enabled = false
48: end
49: rescue LoadError
50: # Tidy not installed, disable features that rely on tidy.
51: @idn_enabled = false
52: end
53: end
54: return @idn_enabled
55: end
Returns true if the parameter appears to be a valid uri
# File lib/feed_tools/helpers/uri_helper.rb, line 206
206: def self.is_uri?(url)
207: return false if url.nil?
208: begin
209: uri = URI.parse(url)
210: if uri.scheme.blank?
211: return false
212: end
213: rescue URI::InvalidURIError
214: return false
215: end
216: return true
217: end
Attempts to ensures that the passed url is valid and sane. Accepts very, very ugly urls and makes every effort to figure out what it was supposed to be. Also translates from the feed: and rss: pseudo-protocols to the http: protocol.
# File lib/feed_tools/helpers/uri_helper.rb, line 61
61: def self.normalize_url(url)
62: if url.nil?
63: return nil
64: end
65: if !url.kind_of?(String)
66: url = url.to_s
67: end
68: if url.blank?
69: return ""
70: end
71: normalized_url = url.strip
72:
73: begin
74: normalized_url =
75: FeedTools::URI.convert_path(normalized_url.strip).normalize.to_s
76: rescue Exception
77: end
78:
79: begin
80: begin
81: normalized_url =
82: FeedTools::URI.parse(normalized_url.strip).normalize.to_s
83: rescue Exception
84: normalized_url = CGI.unescape(url.strip)
85: end
86: rescue Exception
87: normalized_url = url.strip
88: end
89:
90: # if a url begins with the '/' character, it only makes sense that they
91: # meant to be using a file:// url. Fix it for them.
92: if normalized_url.length > 0 && normalized_url[0..0] == "/"
93: normalized_url = "file://" + normalized_url
94: end
95:
96: # if a url begins with a drive letter followed by a colon, we're looking at
97: # a file:// url. Fix it for them.
98: if normalized_url.length > 0 &&
99: normalized_url.scan(/^[a-zA-Z]:[\\\/]/).size > 0
100: normalized_url = "file:///" + normalized_url
101: end
102:
103: # if a url begins with javascript:, it's quite possibly an attempt at
104: # doing something malicious. Let's keep that from getting anywhere,
105: # shall we?
106: if (normalized_url.downcase =~ /javascript:/) != nil
107: return "#"
108: end
109:
110: # deal with all of the many ugly possibilities involved in the rss:
111: # and feed: pseudo-protocols (incidentally, whose crazy idea was this
112: # mess?)
113: normalized_url.gsub!(/^htp:\/*/i, "http://")
114: normalized_url.gsub!(/^http:\/*(feed:\/*)?/i, "http://")
115: normalized_url.gsub!(/^http:\/*(rss:\/*)?/i, "http://")
116: normalized_url.gsub!(/^feed:\/*(http:\/*)?/i, "http://")
117: normalized_url.gsub!(/^rss:\/*(http:\/*)?/i, "http://")
118: normalized_url.gsub!(/^file:\/*/i, "file:///")
119: normalized_url.gsub!(/^https:\/*/i, "https://")
120: normalized_url.gsub!(/^mms:\/*/i, "http://")
121: # fix (very) bad urls (usually of the user-entered sort)
122: normalized_url.gsub!(/^http:\/*(http:\/*)*/i, "http://")
123: normalized_url.gsub!(/^http:\/*$/i, "")
124:
125: if (normalized_url =~ /^file:/i) == 0
126: # Adjust windows-style urls
127: normalized_url.gsub!(/^file:\/\/\/([a-zA-Z])\|/i, 'file:///\1:')
128: normalized_url.gsub!(/\\/, '/')
129: else
130: if FeedTools::URI.parse(normalized_url).scheme == nil &&
131: normalized_url =~ /\./ &&
132: normalized_url = "http://" + normalized_url
133: end
134: if normalized_url == "http://"
135: return nil
136: end
137: end
138: if normalized_url =~ /^https?:\/\/#/i
139: normalized_url.gsub!(/^https?:\/\/#/i, "#")
140: end
141: if normalized_url =~ /^https?:\/\/\?/i
142: normalized_url.gsub!(/^https?:\/\/\?/i, "?")
143: end
144:
145: normalized_url =
146: FeedTools::URI.parse(normalized_url.strip).normalize.to_s
147: return normalized_url
148: end
Resolves a relative uri
# File lib/feed_tools/helpers/uri_helper.rb, line 151
151: def self.resolve_relative_uri(relative_uri, base_uri_sources=[])
152: return relative_uri if base_uri_sources.blank?
153: return nil if relative_uri.nil?
154: begin
155: # Massive HACK to get around file protocol URIs being used to
156: # resolve relative URIs on feeds in the local file system.
157: # Better to leave these URIs unresolved and hope some other
158: # tool resolves them correctly.
159: base_uri_sources.reject! do |base_uri|
160: base_uri == nil ||
161: FeedTools::URI.parse(base_uri).scheme == "file"
162: end
163: base_uri = FeedTools::URI.parse(
164: FeedTools::XmlHelper.select_not_blank(base_uri_sources))
165: resolved_uri = base_uri
166: if relative_uri.to_s != ''
167: resolved_uri = base_uri + relative_uri.to_s
168: end
169: return FeedTools::UriHelper.normalize_url(resolved_uri.to_s)
170: rescue
171: return relative_uri
172: end
173: end