| Module | ::Utils |
| In: |
lib/rbot/core/utils/httputil.rb
lib/rbot/core/utils/utils.rb lib/rbot/core/utils/parse_time.rb |
Miscellaneous useful functions
| UNESCAPE_TABLE | = | { 'laquo' => '«', 'raquo' => '»', 'quot' => '"', 'apos' => '\'', 'micro' => 'µ', 'copy' => '©', 'trade' => '™', 'reg' => '®', 'amp' => '&', 'lt' => '<', 'gt' => '>', 'hellip' => '…', 'nbsp' => ' ', 'Agrave' => 'À', 'Aacute' => 'Á', 'Acirc' => 'Â', 'Atilde' => 'Ã', 'Auml' => 'Ä', 'Aring' => 'Å', 'AElig' => 'Æ', 'OElig' => 'Œ', 'Ccedil' => 'Ç', 'Egrave' => 'È', 'Eacute' => 'É', 'Ecirc' => 'Ê', 'Euml' => 'Ë', 'Igrave' => 'Ì', 'Iacute' => 'Í', 'Icirc' => 'Î', 'Iuml' => 'Ï', 'ETH' => 'Ð', 'Ntilde' => 'Ñ', 'Ograve' => 'Ò', 'Oacute' => 'Ó', 'Ocirc' => 'Ô', 'Otilde' => 'Õ', 'Ouml' => 'Ö', 'Oslash' => 'Ø', 'Ugrave' => 'Ù', 'Uacute' => 'Ú', 'Ucirc' => 'Û', 'Uuml' => 'Ü', 'Yacute' => 'Ý', 'THORN' => 'Þ', 'szlig' => 'ß', 'agrave' => 'à', 'aacute' => 'á', 'acirc' => 'â', 'atilde' => 'ã', 'auml' => 'ä', 'aring' => 'å', 'aelig' => 'æ', 'oelig' => 'œ', 'ccedil' => 'ç', 'egrave' => 'è', 'eacute' => 'é', 'ecirc' => 'ê', 'euml' => 'ë', 'igrave' => 'ì', 'iacute' => 'í', 'icirc' => 'î', 'iuml' => 'ï', 'eth' => 'ð', 'ntilde' => 'ñ', 'ograve' => 'ò', 'oacute' => 'ó', 'ocirc' => 'ô', 'otilde' => 'õ', 'ouml' => 'ö', 'oslash' => 'ø', 'ugrave' => 'ù', 'uacute' => 'ú', 'ucirc' => 'û', 'uuml' => 'ü', 'yacute' => 'ý', 'thorn' => 'þ', 'yuml' => 'ÿ' | ||
| AFTER_PAR_PATH | = | /^(?:div|span)$/ | ||
| AFTER_PAR_EX | = | /^(?:td|tr|tbody|table)$/ | ||
| AFTER_PAR_CLASS | = | /body|message|text/i | ||
| TITLE_REGEX | = | /<\s*?title\s*?>(.+?)<\s*?\/title\s*?>/im | Title | |
| HX_REGEX | = | /<h(\d)(?:\s+[^>]*)?>(.*?)<\/h\1>/im | H1, H2, etc | |
| PAR_REGEX | = | /<p(?:\s+[^>]*)?>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im | A paragraph | |
| AFTER_PAR1_REGEX | = | /<\w+\s+[^>]*(?:body|message|text)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im | Some blogging and forum platforms use spans or divs with a ‘body’ or ‘message’ or ‘text’ in their class to mark actual text | |
| AFTER_PAR2_REGEX | = | /<br(?:\s+[^>]*)?\/?>.*?<\/?(?:br|p|div|html|body|table|td|tr)(?:\s+[^>]*)?\/?>/im | At worst, we can try stuff which is comprised between two <br> | |
| SEC_PER_MIN | = | 60 | Seconds per minute | |
| SEC_PER_HR | = | SEC_PER_MIN * 60 | Seconds per hour | |
| SEC_PER_DAY | = | SEC_PER_HR * 24 | Seconds per day | |
| SEC_PER_WK | = | SEC_PER_DAY * 7 | Seconds per week | |
| SEC_PER_MNTH | = | SEC_PER_DAY * 30 | Seconds per (30-day) month | |
| SEC_PER_YR | = | SEC_PER_DAY * 365 | Second per (non-leap) year |
Converts age in seconds to "nn units". Inspired by previous attempts but also gitweb‘s age_string() sub
# File lib/rbot/core/utils/utils.rb, line 245
245: def Utils.age_string(secs)
246: case
247: when secs < 0
248: Utils.age_string(-secs)
249: when secs > 2*SEC_PER_YR
250: _("%{m} years") % { :m => secs/SEC_PER_YR }
251: when secs > 2*SEC_PER_MNTH
252: _("%{m} months") % { :m => secs/SEC_PER_MNTH }
253: when secs > 2*SEC_PER_WK
254: _("%{m} weeks") % { :m => secs/SEC_PER_WK }
255: when secs > 2*SEC_PER_DAY
256: _("%{m} days") % { :m => secs/SEC_PER_DAY }
257: when secs > 2*SEC_PER_HR
258: _("%{m} hours") % { :m => secs/SEC_PER_HR }
259: when (20*SEC_PER_MIN..40*SEC_PER_MIN).include?(secs)
260: _("half an hour")
261: when (50*SEC_PER_MIN..70*SEC_PER_MIN).include?(secs)
262: # _("about one hour")
263: _("an hour")
264: when (80*SEC_PER_MIN..100*SEC_PER_MIN).include?(secs)
265: _("an hour and a half")
266: when secs > 2*SEC_PER_MIN
267: _("%{m} minutes") % { :m => secs/SEC_PER_MIN }
268: when secs > 1
269: _("%{m} seconds") % { :m => secs }
270: else
271: _("one second")
272: end
273: end
HTML info filters often need to check if the webpage location of a passed DataStream ds matches a given Regexp.
# File lib/rbot/core/utils/utils.rb, line 653
653: def Utils.check_location(ds, rx)
654: debug ds[:headers]
655: if h = ds[:headers]
656: loc = [h['x-rbot-location'],h['location']].flatten.grep(rx)
657: end
658: loc ||= []
659: debug loc
660: return loc.empty? ? nil : loc
661: end
Returns a comma separated list except for the last element which is joined in with specified conjunction
# File lib/rbot/core/utils/utils.rb, line 728
728: def Utils.comma_list(words, options={})
729: defaults = { :join_with => ", ", :join_last_with => _(" and ") }
730: opts = defaults.merge(options)
731:
732: if words.size < 2
733: words.last
734: else
735: [words[0..-2].join(opts[:join_with]), words.last].join(opts[:join_last_with])
736: end
737: end
Decode HTML entities in the String str, using HTMLEntities if the package was found, or UNESCAPE_TABLE otherwise.
# File lib/rbot/core/utils/utils.rb, line 335
335: def Utils.decode_html_entities(str)
336: if defined? ::HTMLEntities
337: return HTMLEntities.decode_entities(str)
338: else
339: str.gsub(/(&(.+?);)/) {
340: symbol = $2
341: # remove the 0-paddng from unicode integers
342: if symbol =~ /^#(\d+)$/
343: symbol = $1.to_i.to_s
344: end
345:
346: # output the symbol's irc-translated character, or a * if it's unknown
347: UNESCAPE_TABLE[symbol] || (symbol.match(/^\d+$/) ? [symbol.to_i].pack("U") : '*')
348: }
349: end
350: end
Get the first pars of the first count urls. The pages are downloaded using the bot httputil service. Returns an array of the first paragraphs fetched. If (optional) opts :message is specified, those paragraphs are echoed as replies to the IRC message passed as opts :message
# File lib/rbot/core/utils/utils.rb, line 699
699: def Utils.get_first_pars(urls, count, opts={})
700: idx = 0
701: msg = opts[:message]
702: retval = Array.new
703: while count > 0 and urls.length > 0
704: url = urls.shift
705: idx += 1
706:
707: begin
708: info = Utils.get_html_info(URI.parse(url), opts)
709:
710: par = info[:content]
711: retval.push(par)
712:
713: if par
714: msg.reply "[#{idx}] #{par}", :overlong => :truncate if msg
715: count -=1
716: end
717: rescue
718: debug "Unable to retrieve #{url}: #{$!}"
719: next
720: end
721: end
722: return retval
723: end
This method extracts title, content (first par) and extra information from the given document doc.
doc can be an URI, a Net::HTTPResponse or a String.
If doc is a String, only title and content information are retrieved (if possible), using standard methods.
If doc is an URI or a Net::HTTPResponse, additional information is retrieved, and special title/summary extraction routines are used if possible.
# File lib/rbot/core/utils/utils.rb, line 568
568: def Utils.get_html_info(doc, opts={})
569: case doc
570: when String
571: Utils.get_string_html_info(doc, opts)
572: when Net::HTTPResponse
573: Utils.get_resp_html_info(doc, opts)
574: when URI
575: ret = DataStream.new
576: @@bot.httputil.get_response(doc) { |resp|
577: ret.replace Utils.get_resp_html_info(resp, opts)
578: }
579: return ret
580: else
581: raise
582: end
583: end
This method extracts title, content (first par) and extra information from the given Net::HTTPResponse resp.
Currently, the only accepted options (in opts) are
| uri_fragment: | the URI fragment of the original request |
| full_body: | get the whole body instead of @@bot.config bytes only |
Returns a DataStream with the following keys:
| text: | the (partial) body |
| title: | the title of the document (if any) |
| content: | the first paragraph of the document (if any) |
| headers: | the headers of the Net::HTTPResponse. The value is a Hash whose keys are lowercase forms of the HTTP header fields, and whose values are Arrays. |
# File lib/rbot/core/utils/utils.rb, line 605
605: def Utils.get_resp_html_info(resp, opts={})
606: case resp
607: when Net::HTTPSuccess
608: loc = URI.parse(resp['x-rbot-location'] || resp['location']) rescue nil
609: if loc and loc.fragment and not loc.fragment.empty?
610: opts[:uri_fragment] ||= loc.fragment
611: end
612: ret = DataStream.new(opts.dup)
613: ret[:headers] = resp.to_hash
614: ret[:text] = partial = opts[:full_body] ? resp.body : resp.partial_body(@@bot.config['http.info_bytes'])
615:
616: filtered = Utils.try_htmlinfo_filters(ret)
617:
618: if filtered
619: return filtered
620: elsif resp['content-type'] =~ /^text\/|(?:x|ht)ml/
621: ret.merge!(Utils.get_string_html_info(partial, opts))
622: end
623: return ret
624: else
625: raise UrlLinkError, "getting link (#{resp.code} - #{resp.message})"
626: end
627: end
This method extracts title and content (first par) from the given HTML or XML document text, using standard methods (String#ircify_html_title, Utils.ircify_first_html_par)
Currently, the only accepted option (in opts) is
| uri_fragment: | the URI fragment of the original request |
# File lib/rbot/core/utils/utils.rb, line 671
671: def Utils.get_string_html_info(text, opts={})
672: debug "getting string html info"
673: txt = text.dup
674: title = txt.ircify_html_title
675: debug opts
676: if frag = opts[:uri_fragment] and not frag.empty?
677: fragreg = /<a\s+(?:[^>]+\s+)?(?:name|id)=["']?#{frag}["']?[^>]*>/im
678: debug fragreg
679: debug txt
680: if txt.match(fragreg)
681: # grab the post-match
682: txt = $'
683: end
684: debug txt
685: end
686: c_opts = opts.dup
687: c_opts[:strip] ||= title
688: content = Utils.ircify_first_html_par(txt, c_opts)
689: content = nil if content.empty?
690: return {:title => title, :content => content}
691: end
Try to grab and IRCify the first HTML par (<p> tag) in the given string. If possible, grab the one after the first heading
It is possible to pass some options to determine how the stripping occurs. Currently supported options are
| strip: | Regex or String to strip at the beginning of the obtained text |
| min_spaces: | minimum number of spaces a paragraph should have |
# File lib/rbot/core/utils/utils.rb, line 361
361: def Utils.ircify_first_html_par(xml_org, opts={})
362: if defined? ::Hpricot
363: Utils.ircify_first_html_par_wh(xml_org, opts)
364: else
365: Utils.ircify_first_html_par_woh(xml_org, opts)
366: end
367: end
HTML first par grabber using hpricot
# File lib/rbot/core/utils/utils.rb, line 370
370: def Utils.ircify_first_html_par_wh(xml_org, opts={})
371: doc = Hpricot(xml_org)
372:
373: # Strip styles and scripts
374: (doc/"style|script").remove
375:
376: debug doc
377:
378: strip = opts[:strip]
379: strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String)
380:
381: min_spaces = opts[:min_spaces] || 8
382: min_spaces = 0 if min_spaces < 0
383:
384: txt = String.new
385:
386: pre_h = pars = by_span = nil
387:
388: while true
389: debug "Minimum number of spaces: #{min_spaces}"
390:
391: # Initial attempt: <p> that follows <h\d>
392: if pre_h.nil?
393: pre_h = Hpricot::Elements[]
394: found_h = false
395: doc.search("*") { |e|
396: next if e.bogusetag?
397: case e.pathname
398: when /^h\d/
399: found_h = true
400: when 'p'
401: pre_h << e if found_h
402: end
403: }
404: debug "Hx: found: #{pre_h.pretty_inspect}"
405: end
406:
407: pre_h.each { |p|
408: debug p
409: txt = p.to_html.ircify_html
410: txt.sub!(strip, '') if strip
411: debug "(Hx attempt) #{txt.inspect} has #{txt.count(" ")} spaces"
412: break unless txt.empty? or txt.count(" ") < min_spaces
413: }
414:
415: return txt unless txt.empty? or txt.count(" ") < min_spaces
416:
417: # Second natural attempt: just get any <p>
418: pars = doc/"p" if pars.nil?
419: debug "par: found: #{pars.pretty_inspect}"
420: pars.each { |p|
421: debug p
422: txt = p.to_html.ircify_html
423: txt.sub!(strip, '') if strip
424: debug "(par attempt) #{txt.inspect} has #{txt.count(" ")} spaces"
425: break unless txt.empty? or txt.count(" ") < min_spaces
426: }
427:
428: return txt unless txt.empty? or txt.count(" ") < min_spaces
429:
430: # Nothing yet ... let's get drastic: we look for non-par elements too,
431: # but only for those that match something that we know is likely to
432: # contain text
433:
434: # Some blogging and forum platforms use spans or divs with a 'body' or
435: # 'message' or 'text' in their class to mark actual text. Since we want
436: # the class match to be partial and case insensitive, we collect
437: # the common elements that may have this class and then filter out those
438: # we don't need. If no divs or spans are found, we'll accept additional
439: # elements too (td, tr, tbody, table).
440: if by_span.nil?
441: by_span = Hpricot::Elements[]
442: extra = Hpricot::Elements[]
443: doc.search("*") { |el|
444: next if el.bogusetag?
445: case el.pathname
446: when AFTER_PAR_PATH
447: by_span.push el if el[:class] =~ AFTER_PAR_CLASS or el[:id] =~ AFTER_PAR_CLASS
448: when AFTER_PAR_EX
449: extra.push el if el[:class] =~ AFTER_PAR_CLASS or el[:id] =~ AFTER_PAR_CLASS
450: end
451: }
452: if by_span.empty? and not extra.empty?
453: by_span.concat extra
454: end
455: debug "other \#1: found: #{by_span.pretty_inspect}"
456: end
457:
458: by_span.each { |p|
459: debug p
460: txt = p.to_html.ircify_html
461: txt.sub!(strip, '') if strip
462: debug "(other attempt \#1) #{txt.inspect} has #{txt.count(" ")} spaces"
463: break unless txt.empty? or txt.count(" ") < min_spaces
464: }
465:
466: return txt unless txt.empty? or txt.count(" ") < min_spaces
467:
468: # At worst, we can try stuff which is comprised between two <br>
469: # TODO
470:
471: debug "Last candidate #{txt.inspect} has #{txt.count(" ")} spaces"
472: return txt unless txt.count(" ") < min_spaces
473: break if min_spaces == 0
474: min_spaces /= 2
475: end
476: end
HTML first par grabber without hpricot
# File lib/rbot/core/utils/utils.rb, line 479
479: def Utils.ircify_first_html_par_woh(xml_org, opts={})
480: xml = xml_org.gsub(/<!--.*?-->/m, '').gsub(/<script(?:\s+[^>]*)?>.*?<\/script>/im, "").gsub(/<style(?:\s+[^>]*)?>.*?<\/style>/im, "")
481:
482: strip = opts[:strip]
483: strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String)
484:
485: min_spaces = opts[:min_spaces] || 8
486: min_spaces = 0 if min_spaces < 0
487:
488: txt = String.new
489:
490: while true
491: debug "Minimum number of spaces: #{min_spaces}"
492: header_found = xml.match(HX_REGEX)
493: if header_found
494: header_found = $'
495: while txt.empty? or txt.count(" ") < min_spaces
496: candidate = header_found[PAR_REGEX]
497: break unless candidate
498: txt = candidate.ircify_html
499: header_found = $'
500: txt.sub!(strip, '') if strip
501: debug "(Hx attempt) #{txt.inspect} has #{txt.count(" ")} spaces"
502: end
503: end
504:
505: return txt unless txt.empty? or txt.count(" ") < min_spaces
506:
507: # If we haven't found a first par yet, try to get it from the whole
508: # document
509: header_found = xml
510: while txt.empty? or txt.count(" ") < min_spaces
511: candidate = header_found[PAR_REGEX]
512: break unless candidate
513: txt = candidate.ircify_html
514: header_found = $'
515: txt.sub!(strip, '') if strip
516: debug "(par attempt) #{txt.inspect} has #{txt.count(" ")} spaces"
517: end
518:
519: return txt unless txt.empty? or txt.count(" ") < min_spaces
520:
521: # Nothing yet ... let's get drastic: we look for non-par elements too,
522: # but only for those that match something that we know is likely to
523: # contain text
524:
525: # Attempt #1
526: header_found = xml
527: while txt.empty? or txt.count(" ") < min_spaces
528: candidate = header_found[AFTER_PAR1_REGEX]
529: break unless candidate
530: txt = candidate.ircify_html
531: header_found = $'
532: txt.sub!(strip, '') if strip
533: debug "(other attempt \#1) #{txt.inspect} has #{txt.count(" ")} spaces"
534: end
535:
536: return txt unless txt.empty? or txt.count(" ") < min_spaces
537:
538: # Attempt #2
539: header_found = xml
540: while txt.empty? or txt.count(" ") < min_spaces
541: candidate = header_found[AFTER_PAR2_REGEX]
542: break unless candidate
543: txt = candidate.ircify_html
544: header_found = $'
545: txt.sub!(strip, '') if strip
546: debug "(other attempt \#2) #{txt.inspect} has #{txt.count(" ")} spaces"
547: end
548:
549: debug "Last candidate #{txt.inspect} has #{txt.count(" ")} spaces"
550: return txt unless txt.count(" ") < min_spaces
551: break if min_spaces == 0
552: min_spaces /= 2
553: end
554: end
# File lib/rbot/core/utils/parse_time.rb, line 160
160: def Utils.parse_time_offset(str)
161: case str
162: when /^(\d+):(\d+)(?:\:(\d+))?$/ # TODO refactor
163: hour = $1.to_i
164: min = $2.to_i
165: sec = $3.to_i
166: now = Time.now
167: later = Time.mktime(now.year, now.month, now.day, hour, min, sec)
168:
169: # if the given hour is earlier than current hour, given timestr
170: # must have been meant to be in the future
171: if hour < now.hour || hour <= now.hour && min < now.min
172: later += 60*60*24
173: end
174:
175: return later - now
176: when /^(\d+):(\d+)(am|pm)$/ # TODO refactor
177: hour = $1.to_i
178: min = $2.to_i
179: ampm = $3
180: if ampm == "pm"
181: hour += 12
182: end
183: now = Time.now
184: later = Time.mktime(now.year, now.month, now.day, hour, min, now.sec)
185: return later - now
186: else
187: ParseTime.parse_period(str)
188: end
189: end
Execute an external program, returning a String obtained by redirecting the program‘s standards errors and output
# File lib/rbot/core/utils/utils.rb, line 278
278: def Utils.safe_exec(command, *args)
279: IO.popen("-") { |p|
280: if p
281: return p.readlines.join("\n")
282: else
283: begin
284: $stderr.reopen($stdout)
285: exec(command, *args)
286: rescue Exception => e
287: puts "exception #{e.pretty_inspect} trying to run #{command}"
288: Kernel::exit! 1
289: end
290: puts "exec of #{command} failed"
291: Kernel::exit! 1
292: end
293: }
294: end
Safely (atomically) save to file, by passing a tempfile to the block and then moving the tempfile to its final location when done.
# File lib/rbot/core/utils/utils.rb, line 321
321: def Utils.safe_save(file)
322: raise 'No safe save directory defined!' if @@safe_save_dir.nil?
323: basename = File.basename(file)
324: temp = Tempfile.new(basename,@@safe_save_dir)
325: temp.binmode
326: yield temp if block_given?
327: temp.close
328: File.rename(temp.path, file)
329: end
Turn a number of seconds into a hours:minutes:seconds e.g. 3:18:10 or 5‘12" or 7s
# File lib/rbot/core/utils/utils.rb, line 208
208: def Utils.secs_to_short(seconds)
209: secs = seconds.to_i # make sure it's an integer
210: mins, secs = secs.divmod 60
211: hours, mins = mins.divmod 60
212: if hours > 0
213: return ("%s:%s:%s" % [hours, mins, secs])
214: elsif mins > 0
215: return ("%s'%s\"" % [mins, secs])
216: else
217: return ("%ss" % [secs])
218: end
219: end
Turn a number of seconds into a human readable string, e.g 2 days, 3 hours, 18 minutes and 10 seconds
# File lib/rbot/core/utils/utils.rb, line 181
181: def Utils.secs_to_string(secs)
182: ret = []
183: years, secs = secs.divmod SEC_PER_YR
184: secs_to_string_case(ret, years, _("year"), _("years")) if years > 0
185: months, secs = secs.divmod SEC_PER_MNTH
186: secs_to_string_case(ret, months, _("month"), _("months")) if months > 0
187: days, secs = secs.divmod SEC_PER_DAY
188: secs_to_string_case(ret, days, _("day"), _("days")) if days > 0
189: hours, secs = secs.divmod SEC_PER_HR
190: secs_to_string_case(ret, hours, _("hour"), _("hours")) if hours > 0
191: mins, secs = secs.divmod SEC_PER_MIN
192: secs_to_string_case(ret, mins, _("minute"), _("minutes")) if mins > 0
193: secs = secs.to_i
194: secs_to_string_case(ret, secs, _("second"), _("seconds")) if secs > 0 or ret.empty?
195: case ret.length
196: when 0
197: raise "Empty ret array!"
198: when 1
199: return ret.to_s
200: else
201: return [ret[0, ret.length-1].join(", ") , ret[-1]].join(_(" and "))
202: end
203: end
Auxiliary method needed by Utils.secs_to_string
# File lib/rbot/core/utils/utils.rb, line 170
170: def Utils.secs_to_string_case(array, var, string, plural)
171: case var
172: when 1
173: array << "1 #{string}"
174: else
175: array << "#{var} #{plural}"
176: end
177: end
Returns human readable time. Like: 5 days ago
about one hour ago
options :start_date, sets the time to measure against, defaults to now :date_format, used with <tt>to_formatted_s<tt>, default to :default
# File lib/rbot/core/utils/utils.rb, line 227
227: def Utils.timeago(time, options = {})
228: start_date = options.delete(:start_date) || Time.new
229: date_format = options.delete(:date_format) || "%x"
230: delta = (start_date - time).round
231: if delta.abs < 2
232: _("right now")
233: else
234: distance = Utils.age_string(delta)
235: if delta < 0
236: _("%{d} from now") % {:d => distance}
237: else
238: _("%{d} ago") % {:d => distance}
239: end
240: end
241: end
Try executing an external program, returning true if the run was successful and false otherwise
# File lib/rbot/core/utils/utils.rb, line 298
298: def Utils.try_exec(command, *args)
299: IO.popen("-") { |p|
300: if p.nil?
301: begin
302: $stderr.reopen($stdout)
303: exec(command, *args)
304: rescue Exception => e
305: Kernel::exit! 1
306: end
307: Kernel::exit! 1
308: else
309: debug p.readlines
310: end
311: }
312: debug $?
313: return $?.success?
314: end
This method runs an appropriately-crafted DataStream ds through the filters in the :htmlinfo filter group, in order. If one of the filters returns non-nil, its results are merged in ds and returned. Otherwise nil is returned.
The input DataStream should have the downloaded HTML as primary key (:text) and possibly a :headers key holding the resonse headers.
# File lib/rbot/core/utils/utils.rb, line 637
637: def Utils.try_htmlinfo_filters(ds)
638: filters = @@bot.filter_names(:htmlinfo)
639: return nil if filters.empty?
640: cur = nil
641: # TODO filter priority
642: filters.each { |n|
643: debug "testing htmlinfo filter #{n}"
644: cur = @@bot.filter(@@bot.global_filter_name(n, :htmlinfo), ds)
645: debug "returned #{cur.pretty_inspect}"
646: break if cur
647: }
648: return ds.merge(cur) if cur
649: end