| Class | ::HTTPResponse |
| In: |
lib/rbot/core/utils/httputil.rb
|
| Parent: | Object |
| body | -> | raw_body |
| no_cache | [RW] |
# File lib/rbot/core/utils/httputil.rb, line 32
32: def body_charset(str=self.raw_body)
33: ctype = self['content-type'] || 'text/html'
34: return nil unless ctype =~ /^text/i || ctype =~ /x(ht)?ml/i
35:
36: charsets = ['latin1'] # should be in config
37:
38: if ctype.match(/charset=["']?([^\s"']+)["']?/i)
39: charsets << $1
40: debug "charset #{charsets.last} added from header"
41: end
42:
43: case str
44: when /<\?xml\s[^>]*encoding=['"]([^\s"'>]+)["'][^>]*\?>/i
45: charsets << $1
46: debug "xml charset #{charsets.last} added from xml pi"
47: when /<(meta\s[^>]*http-equiv=["']?Content-Type["']?[^>]*)>/i
48: meta = $1
49: if meta =~ /charset=['"]?([^\s'";]+)['"]?/
50: charsets << $1
51: debug "html charset #{charsets.last} added from meta"
52: end
53: end
54: return charsets.uniq
55: end
# File lib/rbot/core/utils/httputil.rb, line 57
57: def body_to_utf(str)
58: charsets = self.body_charset(str) or return str
59:
60: charsets.reverse_each do |charset|
61: # XXX: this one is really ugly, but i don't know how to make it better
62: # -jsn
63:
64: 0.upto(5) do |off|
65: begin
66: debug "trying #{charset} / offset #{off}"
67: return Iconv.iconv('utf-8//ignore',
68: charset,
69: str.slice(0 .. (-1 - off))).first
70: rescue
71: debug "conversion failed for #{charset} / offset #{off}"
72: end
73: end
74: end
75: return str
76: end
# File lib/rbot/core/utils/httputil.rb, line 126
126: def cooked_body
127: return self.body_to_utf(self.decompress_body(self.raw_body))
128: end
# File lib/rbot/core/utils/httputil.rb, line 78
78: def decompress_body(str)
79: method = self['content-encoding']
80: case method
81: when nil
82: return str
83: when /gzip/ # Matches gzip, x-gzip, and the non-rfc-compliant gzip;q=\d sent by some servers
84: debug "gunzipping body"
85: begin
86: return Zlib::GzipReader.new(StringIO.new(str)).read
87: rescue Zlib::Error => e
88: # If we can't unpack the whole stream (e.g. because we're doing a
89: # partial read
90: debug "full gunzipping failed (#{e}), trying to recover as much as possible"
91: ret = ""
92: begin
93: Zlib::GzipReader.new(StringIO.new(str)).each_byte { |byte|
94: ret << byte
95: }
96: rescue
97: end
98: return ret
99: end
100: when 'deflate'
101: debug "inflating body"
102: # From http://www.koders.com/ruby/fid927B4382397E5115AC0ABE21181AB5C1CBDD5C17.aspx?s=thread:
103: # -MAX_WBITS stops zlib from looking for a zlib header
104: inflater = Zlib::Inflate.new(-Zlib::MAX_WBITS)
105: begin
106: return inflater.inflate(str)
107: rescue Zlib::Error => e
108: raise e
109: # TODO
110: # debug "full inflation failed (#{e}), trying to recover as much as possible"
111: end
112: when /^(?:iso-8859-\d+|windows-\d+|utf-8|utf8)$/i
113: # B0rked servers (Freshmeat being one of them) sometimes return the charset
114: # in the content-encoding; in this case we assume that the document has
115: # a standarc content-encoding
116: old_hsh = self.to_hash
117: self['content-type']= self['content-type']+"; charset="+method.downcase
118: warning "Charset vs content-encoding confusion, trying to recover: from\n#{old_hsh.pretty_inspect}to\n#{self.to_hash.pretty_inspect}"
119: return str
120: else
121: debug self.to_hash
122: raise "Unhandled content encoding #{method}"
123: end
124: end
Read chunks from the body until we have at least size bytes, yielding the partial text at each chunk. Return the partial body.
# File lib/rbot/core/utils/httputil.rb, line 132
132: def partial_body(size=0, &block)
133:
134: partial = String.new
135:
136: if @read
137: debug "using body() as partial"
138: partial = self.body
139: yield self.body_to_utf(self.decompress_body(partial)) if block_given?
140: else
141: debug "disabling cache"
142: self.no_cache = true
143: self.read_body { |chunk|
144: partial << chunk
145: yield self.body_to_utf(self.decompress_body(partial)) if block_given?
146: break if size and size > 0 and partial.length >= size
147: }
148: end
149:
150: return self.body_to_utf(self.decompress_body(partial))
151: end