| Class | Mechanize::Util |
| In: |
lib/mechanize/util.rb
|
| Parent: | Object |
| CODE_DIC | = | { :JIS => "ISO-2022-JP", :EUC => "EUC-JP", :SJIS => "SHIFT_JIS", :UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"} |
# File lib/mechanize/util.rb, line 12
12: def build_query_string(parameters, enc=nil)
13: parameters.map { |k,v|
14: # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*.
15: [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k
16: }.compact.join('&')
17: end
# File lib/mechanize/util.rb, line 54
54: def detect_charset(src)
55: tmp = NKF.guess(src || "<html></html>")
56: if RUBY_VERSION >= "1.9.0"
57: enc = tmp.to_s.upcase
58: else
59: enc = NKF.constants.find{|c|
60: NKF.const_get(c) == tmp
61: }
62: enc = CODE_DIC[enc.intern]
63: end
64: enc || "ISO-8859-1"
65: end
# File lib/mechanize/util.rb, line 29
29: def from_native_charset(s, code)
30: return s unless s && code
31: return s unless Mechanize.html_parser == Nokogiri::HTML
32:
33: begin
34: Iconv.iconv(code.to_s, "UTF-8", s).join("")
35: rescue Iconv::InvalidEncoding
36: s
37: end
38: end
# File lib/mechanize/util.rb, line 40
40: def html_unescape(s)
41: return s unless s
42: s.gsub(/&(\w+|#[0-9]+);/) { |match|
43: number = case match
44: when /&(\w+);/
45: Mechanize.html_parser::NamedCharacters[$1]
46: when /&#([0-9]+);/
47: $1.to_i
48: end
49:
50: number ? ([number].pack('U') rescue match) : match
51: }
52: end