| Class | FeedTools::FeedItem |
| In: |
lib/feed_tools/feed_item.rb
|
| Parent: | Object |
The FeedTools::FeedItem class represents the structure of a single item within a web feed.
Initialize the feed object
# File lib/feed_tools/feed_item.rb, line 31
31: def initialize
32: super
33: @feed_data = nil
34: @feed_data_type = :xml
35: @xml_document = nil
36: @root_node = nil
37: @title = nil
38: @id = nil
39: @time = Time.now.gmtime
40: @version = FeedTools::FEED_TOOLS_VERSION::STRING
41: end
# File lib/feed_tools/feed_item.rb, line 1420
1420: def author
1421: if @author.nil?
1422: @author = FeedTools::Author.new
1423: author_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
1424: "atom10:author",
1425: "atom03:author",
1426: "atom:author",
1427: "author",
1428: "managingEditor",
1429: "dc:author",
1430: "dc:creator",
1431: "creator"
1432: ])
1433: unless author_node.nil?
1434: @author.raw = FeedTools::XmlHelper.try_xpaths(
1435: author_node, ["text()"], :select_result_value => true)
1436: @author.raw = FeedTools::HtmlHelper.unescape_entities(@author.raw)
1437: unless @author.raw.nil?
1438: raw_scan = @author.raw.scan(
1439: /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
1440: if raw_scan.nil? || raw_scan.size == 0
1441: raw_scan = @author.raw.scan(
1442: /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
1443: unless raw_scan.size == 0
1444: author_raw_pair = raw_scan.first.reverse
1445: end
1446: else
1447: author_raw_pair = raw_scan.first
1448: end
1449: if raw_scan.nil? || raw_scan.size == 0
1450: email_scan = @author.raw.scan(
1451: /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
1452: if email_scan != nil && email_scan.size > 0
1453: @author.email = email_scan.first.strip
1454: end
1455: end
1456: unless author_raw_pair.nil? || author_raw_pair.size == 0
1457: @author.name = author_raw_pair.first.strip
1458: @author.email = author_raw_pair.last.strip
1459: else
1460: unless @author.raw.include?("@")
1461: # We can be reasonably sure we are looking at something
1462: # that the creator didn't intend to contain an email address
1463: # if it got through the preceeding regexes and it doesn't
1464: # contain the tell-tale '@' symbol.
1465: @author.name = @author.raw
1466: end
1467: end
1468: end
1469: if @author.name.blank?
1470: @author.name = FeedTools::HtmlHelper.unescape_entities(
1471: FeedTools::XmlHelper.try_xpaths(author_node, [
1472: "atom10:name/text()",
1473: "atom03:name/text()",
1474: "atom:name/text()",
1475: "name/text()",
1476: "@name"
1477: ], :select_result_value => true)
1478: )
1479: end
1480: if @author.email.blank?
1481: @author.email = FeedTools::HtmlHelper.unescape_entities(
1482: FeedTools::XmlHelper.try_xpaths(author_node, [
1483: "atom10:email/text()",
1484: "atom03:email/text()",
1485: "atom:email/text()",
1486: "email/text()",
1487: "@email"
1488: ], :select_result_value => true)
1489: )
1490: end
1491: if @author.url.blank?
1492: @author.url = FeedTools::HtmlHelper.unescape_entities(
1493: FeedTools::XmlHelper.try_xpaths(author_node, [
1494: "atom10:url/text()",
1495: "atom03:url/text()",
1496: "atom:url/text()",
1497: "url/text()",
1498: "atom10:uri/text()",
1499: "atom03:uri/text()",
1500: "atom:uri/text()",
1501: "uri/text()",
1502: "@url",
1503: "@uri",
1504: "@href"
1505: ], :select_result_value => true)
1506: )
1507: end
1508: if @author.name.blank? && !@author.raw.blank? &&
1509: !@author.email.blank?
1510: name_scan = @author.raw.scan(
1511: /"?([^"]*)"? ?[\(<].*#{@author.email}.*[\)>].*/)
1512: if name_scan.flatten.size == 1
1513: @author.name = name_scan.flatten[0].strip
1514: end
1515: if @author.name.blank?
1516: name_scan = @author.raw.scan(
1517: /.*#{@author.email} ?[\(<]"?([^"]*)"?[\)>].*/)
1518: if name_scan.flatten.size == 1
1519: @author.name = name_scan.flatten[0].strip
1520: end
1521: end
1522: end
1523: @author.name = nil if @author.name.blank?
1524: @author.raw = nil if @author.raw.blank?
1525: @author.email = nil if @author.email.blank?
1526: @author.url = nil if @author.url.blank?
1527: if @author.url != nil
1528: begin
1529: if !(@author.url =~ /^file:/) &&
1530: !FeedTools::UriHelper.is_uri?(@author.url)
1531: @author.url = FeedTools::UriHelper.resolve_relative_uri(
1532: @author.url, [author_node.base_uri, self.base_uri])
1533: end
1534: rescue
1535: end
1536: end
1537: if FeedTools::XmlHelper.try_xpaths(author_node,
1538: ["@gr:unknown-author"], :select_result_value => true) == "true"
1539: if @author.name == "(author unknown)"
1540: @author.name = nil
1541: end
1542: end
1543: end
1544: # Fallback on the itunes module if we didn't find an author name
1545: begin
1546: @author.name = self.itunes_author if @author.name.nil?
1547: rescue
1548: @author.name = nil
1549: end
1550: if @author.name.blank? && @author.email.blank? &&
1551: @author.href.blank?
1552: parent_feed = self.feed
1553: if parent_feed != nil
1554: @author = parent_feed.author.dup
1555: end
1556: end
1557: end
1558: return @author
1559: end
# File lib/feed_tools/feed_item.rb, line 1562
1562: def author=(new_author)
1563: if new_author.respond_to?(:name) &&
1564: new_author.respond_to?(:email) &&
1565: new_author.respond_to?(:url)
1566: # It's a complete author object, just set it.
1567: @author = new_author
1568: else
1569: # We're not looking at an author object, this is probably a string,
1570: # default to setting the author's name.
1571: if @author.nil?
1572: @author = FeedTools::Author.new
1573: end
1574: @author.name = new_author
1575: end
1576: end
Generates xml based on the content of the feed item
# File lib/feed_tools/feed_item.rb, line 2034
2034: def build_xml(feed_type=(self.feed.feed_type or "atom"), version=nil,
2035: xml_builder=Builder::XmlMarkup.new(
2036: :indent => 2, :escape_attrs => false))
2037:
2038: parent_feed = self.feed
2039: if parent_feed.find_node(
2040: "access:restriction/@relationship").to_s == "deny"
2041: raise StandardError,
2042: "Operation not permitted. This feed denies redistribution."
2043: elsif parent_feed.find_node("@indexing:index").to_s == "no"
2044: raise StandardError,
2045: "Operation not permitted. This feed denies redistribution."
2046: end
2047: if self.find_node(
2048: "access:restriction/@relationship").to_s == "deny"
2049: raise StandardError,
2050: "Operation not permitted. This feed item denies redistribution."
2051: end
2052:
2053: self.full_parse()
2054:
2055: if feed_type == "rss" && (version == nil || version == 0.0)
2056: version = 1.0
2057: elsif feed_type == "atom" && (version == nil || version == 0.0)
2058: version = 1.0
2059: end
2060: if feed_type == "rss" &&
2061: (version == 0.9 || version == 1.0 || version == 1.1)
2062: # RDF-based rss format
2063: if link.nil?
2064: raise "Cannot generate an rdf-based feed item with a " +
2065: "nil link field."
2066: end
2067: return xml_builder.item("rdf:about" =>
2068: FeedTools::HtmlHelper.escape_entities(link)) do
2069: unless self.title.blank?
2070: xml_builder.title(
2071: FeedTools::HtmlHelper.strip_html_tags(self.title))
2072: else
2073: xml_builder.title
2074: end
2075: unless self.link.blank?
2076: xml_builder.link(self.link)
2077: else
2078: xml_builder.link
2079: end
2080: unless self.author.nil? || self.author.name.nil?
2081: xml_builder.tag!("dc:creator", self.author.name)
2082: end
2083: unless self.summary.blank?
2084: xml_builder.description(self.summary)
2085: else
2086: xml_builder.description
2087: end
2088: unless self.content.blank?
2089: xml_builder.tag!("content:encoded") do
2090: xml_builder.cdata!(self.content)
2091: end
2092: end
2093: unless time.nil?
2094: xml_builder.tag!("dc:date", time.iso8601)
2095: end
2096: unless self.rights.blank?
2097: xml_builder.tag!("dc:rights", self.rights)
2098: end
2099: unless tags.nil? || tags.size == 0
2100: for tag in tags
2101: xml_builder.tag!("dc:subject", tag)
2102: end
2103: if self.feed.podcast?
2104: xml_builder.tag!("itunes:keywords", tags.join(", "))
2105: end
2106: end
2107: build_xml_hook(feed_type, version, xml_builder)
2108: end
2109: elsif feed_type == "rss"
2110: # normal rss format
2111: return xml_builder.item do
2112: unless self.title.blank?
2113: xml_builder.title(
2114: FeedTools::HtmlHelper.strip_html_tags(self.title))
2115: end
2116: unless self.link.blank?
2117: xml_builder.link(self.link)
2118: end
2119: unless self.author.nil? || self.author.name.nil?
2120: xml_builder.tag!("dc:creator", self.author.name)
2121: end
2122: unless self.author.nil? || self.author.email.nil? ||
2123: self.author.name.nil?
2124: xml_builder.author("#{self.author.email} (#{self.author.name})")
2125: end
2126: unless self.summary.blank?
2127: xml_builder.description(self.summary)
2128: end
2129: unless self.content.blank?
2130: xml_builder.tag!("content:encoded") do
2131: xml_builder.cdata!(self.content)
2132: end
2133: end
2134: if !self.published.nil?
2135: xml_builder.pubDate(self.published.rfc822)
2136: elsif !self.time.nil?
2137: xml_builder.pubDate(self.time.rfc822)
2138: end
2139: unless self.rights.blank?
2140: xml_builder.tag!("dc:rights", self.rights)
2141: end
2142: unless self.guid.blank?
2143: if FeedTools::UriHelper.is_uri?(self.guid) &&
2144: (self.guid =~ /^http/)
2145: xml_builder.guid(self.guid, "isPermaLink" => "true")
2146: else
2147: xml_builder.guid(self.guid, "isPermaLink" => "false")
2148: end
2149: else
2150: unless self.link.blank?
2151: xml_builder.guid(self.link, "isPermaLink" => "true")
2152: end
2153: end
2154: unless tags.nil? || tags.size == 0
2155: for tag in tags
2156: xml_builder.tag!("category", tag)
2157: end
2158: if self.feed.podcast?
2159: xml_builder.tag!("itunes:keywords", tags.join(", "))
2160: end
2161: end
2162: unless self.enclosures.blank? || self.enclosures.size == 0
2163: for enclosure in self.enclosures
2164: attribute_hash = {}
2165: next if enclosure.url.blank?
2166: begin
2167: if enclosure.file_size.blank? || enclosure.file_size.to_i == 0
2168: # We can't use this enclosure because it's missing the
2169: # required file size. Check alternate versions for
2170: # file_size.
2171: if !enclosure.versions.blank? && enclosure.versions.size > 0
2172: for alternate in enclosure.versions
2173: if alternate.file_size != nil &&
2174: alternate.file_size.to_i > 0
2175: enclosure = alternate
2176: break
2177: end
2178: end
2179: end
2180: end
2181: rescue
2182: end
2183: attribute_hash["url"] =
2184: FeedTools::UriHelper.normalize_url(enclosure.url)
2185: if enclosure.type != nil
2186: attribute_hash["type"] = enclosure.type
2187: end
2188: if enclosure.file_size != nil && enclosure.file_size.to_i > 0
2189: attribute_hash["length"] = enclosure.file_size.to_s
2190: else
2191: # We couldn't find an alternate and the problem is still
2192: # there. Give up and go on.
2193: xml_builder.comment!(
2194: "*** Enclosure failed to include file size. Ignoring. ***")
2195: next
2196: end
2197: xml_builder.enclosure(attribute_hash)
2198: end
2199: end
2200: build_xml_hook(feed_type, version, xml_builder)
2201: end
2202: elsif feed_type == "atom" && version == 0.3
2203: raise "Atom 0.3 is obsolete."
2204: elsif feed_type == "atom" && version == 1.0
2205: # normal atom format
2206: return xml_builder.entry("xmlns" =>
2207: FEED_TOOLS_NAMESPACES['atom10']) do
2208: unless title.nil? || title == ""
2209: xml_builder.title(
2210: FeedTools::HtmlHelper.strip_html_tags(self.title),
2211: "type" => "html")
2212: end
2213: xml_builder.author do
2214: unless self.author.nil? || self.author.name.nil?
2215: xml_builder.name(self.author.name)
2216: else
2217: xml_builder.name("n/a")
2218: end
2219: unless self.author.nil? || self.author.email.nil?
2220: xml_builder.email(self.author.email)
2221: end
2222: unless self.author.nil? || self.author.url.nil?
2223: xml_builder.uri(self.author.url)
2224: end
2225: end
2226: unless link.nil? || link == ""
2227: xml_builder.link(
2228: "href" =>
2229: FeedTools::HtmlHelper.escape_entities(self.link),
2230: "rel" => "alternate")
2231: end
2232: if !self.content.blank?
2233: xml_builder.content(self.content,
2234: "type" => "html")
2235: end
2236: if !self.summary.blank?
2237: xml_builder.summary(self.summary,
2238: "type" => "html")
2239: end
2240: if self.updated != nil
2241: xml_builder.updated(self.updated.iso8601)
2242: elsif self.time != nil
2243: # Not technically correct, but a heck of a lot better
2244: # than the Time.now fall-back.
2245: xml_builder.updated(self.time.iso8601)
2246: else
2247: xml_builder.updated(Time.now.gmtime.iso8601)
2248: end
2249: unless self.published.nil?
2250: xml_builder.published(self.published.iso8601)
2251: end
2252: unless self.rights.blank?
2253: xml_builder.rights(self.rights)
2254: end
2255: if self.id != nil
2256: unless FeedTools::UriHelper.is_uri? self.id
2257: if self.time != nil && self.link != nil
2258: xml_builder.id(FeedTools::UriHelper.build_tag_uri(
2259: self.link, self.time))
2260: elsif self.link != nil
2261: xml_builder.id(FeedTools.build_urn_uuid_uri(self.link))
2262: else
2263: raise "The unique id must be a URI. " +
2264: "(Attempted to generate id, but failed.)"
2265: end
2266: else
2267: xml_builder.id(self.id)
2268: end
2269: elsif self.time != nil && self.link != nil
2270: xml_builder.id(FeedTools::UriHelper.build_tag_uri(
2271: self.link, self.time))
2272: else
2273: raise "Cannot build feed, missing feed unique id."
2274: end
2275: unless self.tags.nil? || self.tags.size == 0
2276: for tag in self.tags
2277: xml_builder.category("term" => tag)
2278: end
2279: end
2280: unless self.enclosures.blank? || self.enclosures.size == 0
2281: for enclosure in self.enclosures
2282: attribute_hash = {}
2283: next if enclosure.url.blank?
2284: attribute_hash["rel"] = "enclosure"
2285: attribute_hash["href"] =
2286: FeedTools::UriHelper.normalize_url(enclosure.url)
2287: if enclosure.type != nil
2288: attribute_hash["type"] = enclosure.type
2289: end
2290: if enclosure.file_size != nil && enclosure.file_size.to_i > 0
2291: attribute_hash["length"] = enclosure.file_size.to_s
2292: end
2293: xml_builder.link(attribute_hash)
2294: end
2295: end
2296: build_xml_hook(feed_type, version, xml_builder)
2297: end
2298: else
2299: raise "Unsupported feed format/version."
2300: end
2301: end
Returns a list of the feed item‘s categories
# File lib/feed_tools/feed_item.rb, line 743
743: def categories
744: if @categories.nil?
745: @categories = []
746: category_nodes = FeedTools::XmlHelper.try_xpaths_all(self.root_node, [
747: "category",
748: "dc:subject"
749: ])
750: for category_node in category_nodes
751: category = FeedTools::Category.new
752: category.term = FeedTools::XmlHelper.try_xpaths(
753: category_node, ["@term", "text()"],
754: :select_result_value => true)
755: category.term.strip! unless category.term.nil?
756: category.label = FeedTools::XmlHelper.try_xpaths(
757: category_node, ["@label"],
758: :select_result_value => true)
759: category.label.strip! unless category.label.nil?
760: category.scheme = FeedTools::XmlHelper.try_xpaths(
761: category_node, [
762: "@scheme",
763: "@domain"
764: ], :select_result_value => true)
765: category.scheme.strip! unless category.scheme.nil?
766: @categories << category
767: end
768: end
769: return @categories
770: end
Returns the url for posting comments
# File lib/feed_tools/feed_item.rb, line 643
643: def comments
644: if @comments.nil?
645: @comments = FeedTools::XmlHelper.try_xpaths(
646: self.root_node, ["comments/text()"],
647: :select_result_value => true)
648: begin
649: if !(@comments =~ /^file:/) &&
650: !FeedTools::UriHelper.is_uri?(@comments)
651: root_base_uri = nil
652: unless self.root_node.nil?
653: root_base_uri = self.root_node.base_uri
654: end
655: @comments = FeedTools::UriHelper.resolve_relative_uri(
656: @comments, [root_base_uri, self.base_uri])
657: end
658: rescue
659: end
660: if self.configurations[:url_normalization_enabled]
661: @comments = FeedTools::UriHelper.normalize_url(@comments)
662: end
663: end
664: return @comments
665: end
Returns the load options for this feed.
# File lib/feed_tools/feed_item.rb, line 139
139: def configurations
140: if @configurations.blank?
141: parent_feed = self.feed
142: if parent_feed != nil
143: @configurations = parent_feed.configurations.dup
144: else
145: @configurations = FeedTools.configurations.dup
146: end
147: end
148: return @configurations
149: end
# File lib/feed_tools/feed_item.rb, line 328
328: def content
329: if @content.nil?
330: repair_entities = false
331: content_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
332: "atom10:content",
333: "atom03:content",
334: "atom:content",
335: "body/datacontent",
336: "xhtml:body",
337: "body",
338: "xhtml:div",
339: "div",
340: "p:payload",
341: "payload",
342: "content:encoded",
343: "content",
344: "fullitem",
345: "encoded",
346: "description",
347: "tagline",
348: "subtitle",
349: "atom10:summary",
350: "atom03:summary",
351: "atom:summary",
352: "summary",
353: "abstract",
354: "blurb",
355: "info"
356: ])
357: @content = FeedTools::HtmlHelper.process_text_construct(content_node,
358: self.feed_type, self.feed_version, [self.base_uri])
359: if self.feed_type == "atom" ||
360: self.configurations[:always_strip_wrapper_elements]
361: @content = FeedTools::HtmlHelper.strip_wrapper_element(@content)
362: end
363: if @content.nil?
364: @content = self.media_text
365: end
366: if @content.nil?
367: @content = self.itunes_summary
368: end
369: if @content.nil?
370: @content = self.itunes_subtitle
371: end
372: end
373: return @content
374: end
Breaks any references that the feed entry may be keeping around, thus making the job of the garbage collector much, much easier. Call this method prior to feed entries going out of scope to prevent memory leaks.
# File lib/feed_tools/feed_item.rb, line 46
46: def dispose()
47: @feed_data = nil
48: @feed_data_type = nil
49: @xml_document = nil
50: @root_node = nil
51: @title = nil
52: @id = nil
53: @time = nil
54: end
Returns all feed item enclosures
# File lib/feed_tools/feed_item.rb, line 937
937: def enclosures
938: if @enclosures.nil?
939: @enclosures = []
940:
941: # First, load up all the different possible sources of enclosures
942: rss_enclosures =
943: FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["enclosure"])
944: atom_enclosures =
945: FeedTools::XmlHelper.try_xpaths_all(self.root_node, [
946: "atom10:link[@rel='enclosure']",
947: "atom03:link[@rel='enclosure']",
948: "atom:link[@rel='enclosure']",
949: "link[@rel='enclosure']"
950: ])
951: media_content_enclosures =
952: FeedTools::XmlHelper.try_xpaths_all(self.root_node,
953: ["media:content"])
954: media_group_enclosures =
955: FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["media:group"])
956:
957: bogus_enclosures =
958: FeedTools::XmlHelper.try_xpaths_all(self.root_node, ["video"])
959:
960: # TODO: Implement this
961: bittorrent_enclosures =
962: FeedTools::XmlHelper.try_xpaths_all(self.root_node,
963: ["bitTorrent:torrent"])
964:
965:
966: # Parse RSS-type enclosures. Thanks to a few buggy enclosures
967: # implementations, sometimes these also manage to show up in atom
968: # files.
969: for enclosure_node in rss_enclosures
970: enclosure = FeedTools::Enclosure.new
971: enclosure.url = FeedTools::HtmlHelper.unescape_entities(
972: enclosure_node.attributes["url"].to_s)
973: enclosure.type = enclosure_node.attributes["type"].to_s
974: enclosure.file_size = enclosure_node.attributes["length"].to_i
975: enclosure.credits = []
976: enclosure.explicit = false
977: @enclosures << enclosure
978: end
979:
980: # Parse atom-type enclosures. If there are repeats of the same
981: # enclosure object, we merge the two together.
982: for enclosure_node in atom_enclosures
983: enclosure_url = FeedTools::HtmlHelper.unescape_entities(
984: enclosure_node.attributes["href"].to_s)
985: enclosure = nil
986: new_enclosure = false
987: for existing_enclosure in @enclosures
988: if existing_enclosure.url == enclosure_url
989: enclosure = existing_enclosure
990: break
991: end
992: end
993: if enclosure.nil?
994: new_enclosure = true
995: enclosure = FeedTools::Enclosure.new
996: end
997: enclosure.url = enclosure_url
998: enclosure.type = enclosure_node.attributes["type"].to_s
999: enclosure.file_size = enclosure_node.attributes["length"].to_i
1000: enclosure.credits = []
1001: enclosure.explicit = false
1002: if new_enclosure
1003: @enclosures << enclosure
1004: end
1005: end
1006:
1007: # Parse atom-type enclosures. If there are repeats of the same
1008: # enclosure object, we merge the two together.
1009: for enclosure_node in bogus_enclosures
1010: enclosure_url = FeedTools::HtmlHelper.unescape_entities(
1011: enclosure_node.attributes["url"].to_s)
1012: enclosure = nil
1013: new_enclosure = false
1014: for existing_enclosure in @enclosures
1015: if existing_enclosure.url == enclosure_url
1016: enclosure = existing_enclosure
1017: break
1018: end
1019: end
1020: if enclosure.nil?
1021: new_enclosure = true
1022: enclosure = FeedTools::Enclosure.new
1023: end
1024: enclosure.url = enclosure_url
1025: if File.extname(enclosure_url) == ".wmv"
1026: enclosure.type = "video/x-ms-wmv"
1027: end
1028: enclosure.explicit = false
1029: if new_enclosure
1030: @enclosures << enclosure
1031: end
1032: end
1033:
1034: # Creates an anonymous method to parse content objects from the media
1035: # module. We do this to avoid excessive duplication of code since we
1036: # have to do identical processing for content objects within group
1037: # objects.
1038: parse_media_content = lambda do |media_content_nodes|
1039: affected_enclosures = []
1040: for enclosure_node in media_content_nodes
1041: enclosure_url = FeedTools::HtmlHelper.unescape_entities(
1042: enclosure_node.attributes["url"].to_s)
1043: enclosure = nil
1044: new_enclosure = false
1045: for existing_enclosure in @enclosures
1046: if existing_enclosure.url == enclosure_url
1047: enclosure = existing_enclosure
1048: break
1049: end
1050: end
1051: if enclosure.nil?
1052: new_enclosure = true
1053: enclosure = FeedTools::Enclosure.new
1054: end
1055: enclosure.url = enclosure_url
1056: enclosure.type = enclosure_node.attributes["type"].to_s
1057: enclosure.file_size = enclosure_node.attributes["fileSize"].to_i
1058: enclosure.duration = enclosure_node.attributes["duration"].to_s
1059: enclosure.height = enclosure_node.attributes["height"].to_i
1060: enclosure.width = enclosure_node.attributes["width"].to_i
1061: enclosure.bitrate = enclosure_node.attributes["bitrate"].to_i
1062: enclosure.framerate = enclosure_node.attributes["framerate"].to_i
1063: enclosure.expression =
1064: enclosure_node.attributes["expression"].to_s
1065: enclosure.is_default =
1066: (enclosure_node.attributes["isDefault"].to_s.downcase == "true")
1067: enclosure_thumbnail_url =
1068: FeedTools::XmlHelper.try_xpaths(enclosure_node,
1069: ["media:thumbnail/@url"], :select_result_value => true)
1070: if !enclosure_thumbnail_url.blank?
1071: enclosure.thumbnail = FeedTools::EnclosureThumbnail.new(
1072: FeedTools::HtmlHelper.unescape_entities(
1073: enclosure_thumbnail_url),
1074: FeedTools::HtmlHelper.unescape_entities(
1075: FeedTools::XmlHelper.try_xpaths(enclosure_node,
1076: ["media:thumbnail/@height"],
1077: :select_result_value => true)),
1078: FeedTools::HtmlHelper.unescape_entities(
1079: FeedTools::XmlHelper.try_xpaths(enclosure_node,
1080: ["media:thumbnail/@width"],
1081: :select_result_value => true))
1082: )
1083: end
1084: enclosure.categories = []
1085: for category in FeedTools::XmlHelper.try_xpaths_all(
1086: enclosure_node, ["media:category"])
1087: enclosure.categories << FeedTools::Category.new
1088: enclosure.categories.last.term =
1089: FeedTools::HtmlHelper.unescape_entities(category.inner_xml)
1090: enclosure.categories.last.scheme =
1091: FeedTools::HtmlHelper.unescape_entities(
1092: category.attributes["scheme"].to_s)
1093: enclosure.categories.last.label =
1094: FeedTools::HtmlHelper.unescape_entities(
1095: category.attributes["label"].to_s)
1096: if enclosure.categories.last.scheme.blank?
1097: enclosure.categories.last.scheme = nil
1098: end
1099: if enclosure.categories.last.label.blank?
1100: enclosure.categories.last.label = nil
1101: end
1102: end
1103: enclosure_media_hash =
1104: FeedTools::XmlHelper.try_xpaths(enclosure_node,
1105: ["media:hash/text()"], :select_result_value => true)
1106: if !enclosure_media_hash.nil?
1107: enclosure.hash = FeedTools::EnclosureHash.new(
1108: FeedTools::HtmlHelper.sanitize_html(
1109: FeedTools::HtmlHelper.unescape_entities(
1110: enclosure_media_hash), :strip),
1111: "md5"
1112: )
1113: end
1114: enclosure_media_player_url =
1115: FeedTools::XmlHelper.try_xpaths(enclosure_node,
1116: ["media:player/@url"], :select_result_value => true)
1117: if !enclosure_media_player_url.blank?
1118: enclosure.player = FeedTools::EnclosurePlayer.new(
1119: FeedTools::HtmlHelper.unescape_entities(
1120: enclosure_media_player_url),
1121: FeedTools::HtmlHelper.unescape_entities(
1122: FeedTools::XmlHelper.try_xpaths(enclosure_node,
1123: ["media:player/@height"], :select_result_value => true)),
1124: FeedTools::HtmlHelper.unescape_entities(
1125: FeedTools::XmlHelper.try_xpaths(enclosure_node,
1126: ["media:player/@width"], :select_result_value => true))
1127: )
1128: end
1129: enclosure.credits = []
1130: for credit in FeedTools::XmlHelper.try_xpaths_all(
1131: enclosure_node, ["media:credit"])
1132: enclosure.credits << FeedTools::EnclosureCredit.new(
1133: FeedTools::HtmlHelper.unescape_entities(
1134: credit.inner_xml.to_s.strip),
1135: FeedTools::HtmlHelper.unescape_entities(
1136: credit.attributes["role"].to_s.downcase)
1137: )
1138: if enclosure.credits.last.name.blank?
1139: enclosure.credits.last.name = nil
1140: end
1141: if enclosure.credits.last.role.blank?
1142: enclosure.credits.last.role = nil
1143: end
1144: end
1145: enclosure.explicit =
1146: (FeedTools::XmlHelper.try_xpaths(enclosure_node,
1147: ["media:adult/text()"]).to_s.downcase == "true")
1148: enclosure_media_text =
1149: FeedTools::XmlHelper.try_xpaths(enclosure_node,
1150: ["media:text/text()"])
1151: if !enclosure_media_text.blank?
1152: enclosure.text = FeedTools::HtmlHelper.unescape_entities(
1153: enclosure_media_text)
1154: end
1155: affected_enclosures << enclosure
1156: if new_enclosure
1157: @enclosures << enclosure
1158: end
1159: end
1160: affected_enclosures
1161: end
1162:
1163: # Parse the independant content objects.
1164: parse_media_content.call(media_content_enclosures)
1165:
1166: media_groups = []
1167:
1168: # Parse the group objects.
1169: for media_group in media_group_enclosures
1170: group_media_content_enclosures =
1171: FeedTools::XmlHelper.try_xpaths_all(media_group,
1172: ["media:content"])
1173:
1174: # Parse the content objects within the group objects.
1175: affected_enclosures =
1176: parse_media_content.call(group_media_content_enclosures)
1177:
1178: # Now make sure that content objects inherit certain properties from
1179: # the group objects.
1180: for enclosure in affected_enclosures
1181: media_group_thumbnail =
1182: FeedTools::XmlHelper.try_xpaths(media_group,
1183: ["media:thumbnail/@url"], :select_result_value => true)
1184: if enclosure.thumbnail.nil? && !media_group_thumbnail.blank?
1185: enclosure.thumbnail = FeedTools::EnclosureThumbnail.new(
1186: FeedTools::HtmlHelper.unescape_entities(
1187: media_group_thumbnail),
1188: FeedTools::HtmlHelper.unescape_entities(
1189: FeedTools::XmlHelper.try_xpaths(media_group,
1190: ["media:thumbnail/@height"],
1191: :select_result_value => true)),
1192: FeedTools::HtmlHelper.unescape_entities(
1193: FeedTools::XmlHelper.try_xpaths(media_group,
1194: ["media:thumbnail/@width"],
1195: :select_result_value => true))
1196: )
1197: end
1198: if (enclosure.categories.blank?)
1199: enclosure.categories = []
1200: for category in FeedTools::XmlHelper.try_xpaths_all(
1201: media_group, ["media:category"])
1202: enclosure.categories << FeedTools::Category.new
1203: enclosure.categories.last.term =
1204: FeedTools::HtmlHelper.unescape_entities(category.inner_xml)
1205: enclosure.categories.last.scheme =
1206: FeedTools::HtmlHelper.unescape_entities(
1207: category.attributes["scheme"].to_s)
1208: enclosure.categories.last.label =
1209: FeedTools::HtmlHelper.unescape_entities(
1210: category.attributes["label"].to_s)
1211: if enclosure.categories.last.scheme.blank?
1212: enclosure.categories.last.scheme = nil
1213: end
1214: if enclosure.categories.last.label.blank?
1215: enclosure.categories.last.label = nil
1216: end
1217: end
1218: end
1219: enclosure_media_group_hash =
1220: FeedTools::XmlHelper.try_xpaths(enclosure_node,
1221: ["media:hash/text()"], :select_result_value => true)
1222: if enclosure.hash.nil? && !enclosure_media_group_hash.blank?
1223: enclosure.hash = FeedTools::EnclosureHash.new(
1224: FeedTools::HtmlHelper.sanitize_html(
1225: FeedTools::HtmlHelper.unescape_entities(
1226: enclosure_media_group_hash), :strip),
1227: "md5"
1228: )
1229: end
1230: enclosure_media_group_url = FeedTools::XmlHelper.try_xpaths(
1231: media_group,
1232: "media:player/@url",
1233: :select_result_value => true
1234: )
1235: if enclosure.player.nil? && !enclosure_media_group_url.blank?
1236: enclosure.player = FeedTools::EnclosurePlayer.new(
1237: FeedTools::HtmlHelper.unescape_entities(
1238: enclosure_media_group_url),
1239: FeedTools::HtmlHelper.unescape_entities(
1240: FeedTools::XmlHelper.try_xpaths(media_group,
1241: ["media:player/@height"],
1242: :select_result_value => true)),
1243: FeedTools::HtmlHelper.unescape_entities(
1244: FeedTools::XmlHelper.try_xpaths(media_group,
1245: ["media:player/@width"],
1246: :select_result_value => true
1247: )
1248: )
1249: )
1250: end
1251: if enclosure.credits.nil? || enclosure.credits.size == 0
1252: enclosure.credits = []
1253: for credit in FeedTools::XmlHelper.try_xpaths_all(
1254: media_group, ["media:credit"])
1255: enclosure.credits << FeedTools::EnclosureCredit.new(
1256: FeedTools::HtmlHelper.unescape_entities(credit.inner_xml),
1257: FeedTools::HtmlHelper.unescape_entities(
1258: credit.attributes["role"].to_s.downcase)
1259: )
1260: if enclosure.credits.last.role.blank?
1261: enclosure.credits.last.role = nil
1262: end
1263: end
1264: end
1265: if enclosure.explicit?.nil?
1266: enclosure.explicit =
1267: ((FeedTools::XmlHelper.try_xpaths(media_group, [
1268: "media:adult/text()"
1269: ], :select_result_value => true).downcase == "true") ?
1270: true : false)
1271: end
1272: enclosure_media_group_text =
1273: FeedTools::XmlHelper.try_xpaths(media_group,
1274: ["media:text/text()"], :select_result_value => true)
1275: if enclosure.text.nil? && !enclosure_media_group_text.blank?
1276: enclosure.text = FeedTools::HtmlHelper.sanitize_html(
1277: FeedTools::HtmlHelper.unescape_entities(
1278: enclosure_media_group_text), :strip)
1279: end
1280: end
1281:
1282: # Keep track of the media groups
1283: media_groups << affected_enclosures
1284: end
1285:
1286: # Now we need to inherit any relevant item level information.
1287: if self.explicit?
1288: for enclosure in @enclosures
1289: enclosure.explicit = true
1290: end
1291: end
1292:
1293: # Add all the itunes categories
1294: itunes_categories =
1295: FeedTools::XmlHelper.try_xpaths_all(self.root_node,
1296: ["itunes:category"])
1297: for itunes_category in itunes_categories
1298: genre = "Podcasts"
1299: category = itunes_category.attributes["text"].to_s
1300: subcategory =
1301: FeedTools::XmlHelper.try_xpaths(itunes_category,
1302: ["itunes:category/@text"],
1303: :select_result_value => true)
1304: category_path = genre
1305: if !category.blank?
1306: category_path << "/" + category
1307: end
1308: if !subcategory.blank?
1309: category_path << "/" + subcategory
1310: end
1311: for enclosure in @enclosures
1312: if enclosure.categories.nil?
1313: enclosure.categories = []
1314: end
1315: enclosure.categories << FeedTools::Category.new
1316: enclosure.categories.last.term =
1317: FeedTools::HtmlHelper.unescape_entities(category_path)
1318: enclosure.categories.last.scheme =
1319: "http://www.apple.com/itunes/store/"
1320: enclosure.categories.last.label =
1321: "iTunes Music Store Categories"
1322: end
1323: end
1324:
1325: for enclosure in @enclosures
1326: # Clean up any of those attributes that incorrectly have ""
1327: # or 0 as their values
1328: if enclosure.type.blank?
1329: enclosure.type = nil
1330: end
1331: if enclosure.file_size == 0
1332: enclosure.file_size = nil
1333: end
1334: if enclosure.duration == 0
1335: enclosure.duration = nil
1336: end
1337: if enclosure.height == 0
1338: enclosure.height = nil
1339: end
1340: if enclosure.width == 0
1341: enclosure.width = nil
1342: end
1343: if enclosure.bitrate == 0
1344: enclosure.bitrate = nil
1345: end
1346: if enclosure.framerate == 0
1347: enclosure.framerate = nil
1348: end
1349: if enclosure.expression.blank?
1350: enclosure.expression = "full"
1351: end
1352:
1353: # If an enclosure is missing the text field, fall back on the
1354: # itunes:summary field
1355: if enclosure.text.blank?
1356: enclosure.text = self.itunes_summary
1357: end
1358:
1359: # Make sure we don't have duplicate categories
1360: unless enclosure.categories.nil?
1361: enclosure.categories.uniq!
1362: end
1363:
1364: # Normalize enclosure URIs
1365: if !enclosure.href.blank?
1366: enclosure.href =
1367: FeedTools::UriHelper.normalize_url(enclosure.href)
1368: else
1369: enclosure.href = nil
1370: end
1371: end
1372:
1373: # And finally, now things get complicated. This is where we make
1374: # sure that the enclosures method only returns either default
1375: # enclosures or enclosures with only one version. Any enclosures
1376: # that are wrapped in a media:group will be placed in the appropriate
1377: # versions field.
1378: affected_enclosure_urls = []
1379: for media_group in media_groups
1380: affected_enclosure_urls =
1381: affected_enclosure_urls | (media_group.map do |enclosure|
1382: enclosure.url
1383: end)
1384: end
1385: @enclosures.delete_if do |enclosure|
1386: (affected_enclosure_urls.include? enclosure.url)
1387: end
1388: for media_group in media_groups
1389: default_enclosure = nil
1390: for enclosure in media_group
1391: if enclosure.is_default?
1392: default_enclosure = enclosure
1393: end
1394: end
1395: for enclosure in media_group
1396: enclosure.default_version = default_enclosure
1397: enclosure.versions = media_group.clone
1398: enclosure.versions.delete(enclosure)
1399: end
1400: @enclosures << default_enclosure
1401: end
1402: end
1403:
1404: # If we have a single enclosure, it's safe to inherit the
1405: # itunes:duration field if it's missing.
1406: if @enclosures.size == 1
1407: if @enclosures.first.duration.nil? || @enclosures.first.duration == 0
1408: @enclosures.first.duration = self.itunes_duration
1409: end
1410: end
1411:
1412: return @enclosures
1413: end
# File lib/feed_tools/feed_item.rb, line 1415
1415: def enclosures=(new_enclosures)
1416: @enclosures = new_enclosures
1417: end
Returns true if this feed item contains explicit material. If the whole feed has been marked as explicit, this will return true even if the item isn‘t explicitly marked as explicit.
# File lib/feed_tools/feed_item.rb, line 2003
2003: def explicit?
2004: if @explicit.nil?
2005: explicit_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
2006: "media:adult/text()",
2007: "itunes:explicit/text()"
2008: ], :select_result_value => true)
2009: parent_feed = self.feed
2010: if explicit_string == "true" || explicit_string == "yes"
2011: @explicit = true
2012: elsif parent_feed != nil && parent_feed.explicit?
2013: @explicit = true
2014: else
2015: @explicit = false
2016: end
2017: end
2018: return @explicit
2019: end
Returns the parent feed of this feed item Warning, this method may be slow if you have a large number of FeedTools::Feed objects. Can‘t use a direct reference to the parent because it plays havoc with the garbage collector. Could‘ve used a WeakRef object, but really, if there are multiple parent feeds, something is going to go wrong, and the programmer needs to be notified. A WeakRef implementation can‘t detect this condition.
# File lib/feed_tools/feed_item.rb, line 65
65: def feed
66: parent_feed = nil
67: ObjectSpace.each_object(FeedTools::Feed) do |feed|
68: if feed.instance_variable_get("@entries").nil?
69: feed.items
70: end
71: unsorted_items = feed.instance_variable_get("@entries")
72: for item in unsorted_items
73: if item.object_id == self.object_id
74: if parent_feed.nil?
75: parent_feed = feed
76: break
77: else
78: raise "Multiple parent feeds found."
79: end
80: end
81: end
82: end
83: return parent_feed
84: end
Returns all nodes within the root_node that match the xpath query.
# File lib/feed_tools/feed_item.rb, line 220
220: def find_all_nodes(xpath, select_result_value=false)
221: if self.feed_data_type != :xml
222: raise "The feed data type is not xml."
223: end
224: return FeedTools::XmlHelper.try_xpaths_all(self.root_node, [xpath],
225: :select_result_value => select_result_value)
226: end
Returns the first node within the root_node that matches the xpath query.
# File lib/feed_tools/feed_item.rb, line 211
211: def find_node(xpath, select_result_value=false)
212: if self.feed_data_type != :xml
213: raise "The feed data type is not xml."
214: end
215: return FeedTools::XmlHelper.try_xpaths(self.root_node, [xpath],
216: :select_result_value => select_result_value)
217: end
Does a full parse of the feed item.
# File lib/feed_tools/feed_item.rb, line 87
87: def full_parse
88: self.configurations
89:
90: self.encoding
91: self.xml_document
92: self.root_node
93:
94: self.feed_type
95: self.feed_version
96:
97: self.id
98: self.title
99: self.content
100: self.summary
101: self.links
102: self.link
103: self.comments
104: self.time
105: self.updated
106: self.published
107: self.source
108: self.categories
109: self.tags
110: self.images
111: self.rights
112: self.author
113: self.publisher
114:
115: self.itunes_summary
116: self.itunes_subtitle
117: self.itunes_image_link
118: self.itunes_author
119: self.itunes_duration
120:
121: self.media_text
122: self.media_thumbnail_link
123:
124: self.explicit?
125: end
Returns the feed items‘s unique id
# File lib/feed_tools/feed_item.rb, line 267
267: def id
268: if @id.nil?
269: @id = FeedTools::XmlHelper.try_xpaths(self.root_node, [
270: "atom10:id/@gr:original-id",
271: "atom03:id/@gr:original-id",
272: "atom:id/@gr:original-id",
273: "id/@gr:original-id",
274: "atom10:id/text()",
275: "atom03:id/text()",
276: "atom:id/text()",
277: "id/text()",
278: "guid/text()"
279: ], :select_result_value => true)
280: end
281: return @id
282: end
Returns a list of the feed items‘s images
# File lib/feed_tools/feed_item.rb, line 773
773: def images
774: if @images.nil?
775: @images = []
776: image_nodes = FeedTools::XmlHelper.try_xpaths_all(self.root_node, [
777: "image",
778: "logo",
779: "apple-wallpapers:image",
780: "imageUrl"
781: ])
782: unless image_nodes.blank?
783: for image_node in image_nodes
784: image = FeedTools::Image.new
785: image.href = FeedTools::XmlHelper.try_xpaths(image_node, [
786: "url/text()",
787: "@rdf:resource",
788: "@href",
789: "@url",
790: "text()"
791: ], :select_result_value => true)
792: if image.href.nil? && image_node.base_uri != nil
793: image.href = ""
794: end
795: begin
796: if !(image.href =~ /^file:/) &&
797: !FeedTools::UriHelper.is_uri?(image.href)
798: stored_base_uri =
799: FeedTools::GenericHelper.recursion_trap(:feed_link) do
800: self.base_uri if self.feed != nil
801: end
802: image.href = FeedTools::UriHelper.resolve_relative_uri(
803: image.href, [image_node.base_uri, stored_base_uri])
804: end
805: rescue
806: end
807: if self.configurations[:url_normalization_enabled]
808: image.href = FeedTools::UriHelper.normalize_url(image.href)
809: end
810: image.href.strip! unless image.href.nil?
811: next if image.href.blank?
812: image.title = FeedTools::XmlHelper.try_xpaths(image_node,
813: ["title/text()"], :select_result_value => true)
814: image.title.strip! unless image.title.nil?
815: image.description = FeedTools::XmlHelper.try_xpaths(image_node,
816: ["description/text()"], :select_result_value => true)
817: image.description.strip! unless image.description.nil?
818: image.link = FeedTools::XmlHelper.try_xpaths(image_node,
819: ["link/text()"], :select_result_value => true)
820: image.link.strip! unless image.link.nil?
821: image.height = FeedTools::XmlHelper.try_xpaths(image_node,
822: ["height/text()"], :select_result_value => true).to_i
823: image.height = nil if image.height <= 0
824: image.width = FeedTools::XmlHelper.try_xpaths(image_node,
825: ["width/text()"], :select_result_value => true).to_i
826: image.width = nil if image.width <= 0
827: image.style = FeedTools::XmlHelper.try_xpaths(image_node, [
828: "style/text()",
829: "@style"
830: ], :select_result_value => true)
831: image.style.strip! unless image.style.nil?
832: image.style.downcase! unless image.style.nil?
833: @images << image unless image.url.nil?
834: end
835: end
836: for link_object in self.links
837: if link_object.type != nil && link_object.type =~ /^image/
838: image = FeedTools::Image.new
839: image.href = link_object.href
840: image.title = link_object.title
841: @images << image unless image.href.nil?
842: end
843: end
844: end
845: return @images
846: end
Returns the contents of the itunes:author element
This inherits from any incorrectly placed channel-level itunes:author elements. They‘re actually amazingly common. People don‘t read specs.
# File lib/feed_tools/feed_item.rb, line 1672
1672: def itunes_author
1673: if @itunes_author.nil?
1674: @itunes_author = FeedTools::HtmlHelper.unescape_entities(
1675: FeedTools::XmlHelper.try_xpaths(self.root_node,
1676: ["itunes:author/text()"], :select_result_value => true))
1677: if @itunes_author.blank?
1678: parent_feed = self.feed
1679: if parent_feed != nil
1680: @itunes_author = parent_feed.itunes_author
1681: end
1682: end
1683: end
1684: return @itunes_author
1685: end
Returns the number of seconds that the associated media runs for
# File lib/feed_tools/feed_item.rb, line 1693
1693: def itunes_duration
1694: if @itunes_duration.nil?
1695: raw_duration = FeedTools::HtmlHelper.unescape_entities(
1696: FeedTools::XmlHelper.try_xpaths(self.root_node,
1697: ["itunes:duration/text()"], :select_result_value => true))
1698: if !raw_duration.blank?
1699: hms = raw_duration.split(":").map { |x| x.to_i }
1700: if hms.size == 3
1701: @itunes_duration = hms[0].hours + hms[1].minutes + hms[2]
1702: elsif hms.size == 2
1703: @itunes_duration = hms[0].minutes + hms[1]
1704: elsif hms.size == 1
1705: @itunes_duration = hms[0]
1706: end
1707: end
1708: end
1709: return @itunes_duration
1710: end
Sets the number of seconds that the associate media runs for
# File lib/feed_tools/feed_item.rb, line 1713
1713: def itunes_duration=(new_itunes_duration)
1714: @itunes_duration = new_itunes_duration
1715: end
Returns the feed item itunes image link
# File lib/feed_tools/feed_item.rb, line 849
849: def itunes_image_link
850: if @itunes_image_link.nil?
851: @itunes_image_link = FeedTools::XmlHelper.try_xpaths(self.root_node, [
852: "itunes:image/@href",
853: "itunes:link[@rel='image']/@href"
854: ], :select_result_value => true)
855: if self.configurations[:url_normalization_enabled]
856: @itunes_image_link =
857: FeedTools::UriHelper.normalize_url(@itunes_image_link)
858: end
859: end
860: return @itunes_image_link
861: end
Returns the contents of the itunes:subtitle element
# File lib/feed_tools/feed_item.rb, line 697
697: def itunes_subtitle
698: if @itunes_subtitle.nil?
699: @itunes_subtitle = FeedTools::XmlHelper.try_xpaths(self.root_node, [
700: "itunes:subtitle/text()"
701: ], :select_result_value => true)
702: unless @itunes_subtitle.blank?
703: @itunes_subtitle =
704: FeedTools::HtmlHelper.unescape_entities(@itunes_subtitle)
705: @itunes_subtitle =
706: FeedTools::HtmlHelper.sanitize_html(@itunes_subtitle)
707: @itunes_subtitle.strip!
708: else
709: @itunes_subtitle = nil
710: end
711: end
712: return @itunes_subtitle
713: end
Sets the contents of the itunes:subtitle element
# File lib/feed_tools/feed_item.rb, line 716
716: def itunes_subtitle=(new_itunes_subtitle)
717: @itunes_subtitle = new_itunes_subtitle
718: end
Returns the contents of the itunes:summary element
# File lib/feed_tools/feed_item.rb, line 673
673: def itunes_summary
674: if @itunes_summary.nil?
675: @itunes_summary = FeedTools::XmlHelper.try_xpaths(self.root_node, [
676: "itunes:summary/text()"
677: ], :select_result_value => true)
678: unless @itunes_summary.blank?
679: @itunes_summary =
680: FeedTools::HtmlHelper.unescape_entities(@itunes_summary)
681: @itunes_summary =
682: FeedTools::HtmlHelper.sanitize_html(@itunes_summary)
683: @itunes_summary.strip!
684: else
685: @itunes_summary = nil
686: end
687: end
688: return @itunes_summary
689: end
# File lib/feed_tools/feed_item.rb, line 557
557: def link
558: if @link.nil?
559: max_score = 0
560: for link_object in self.links.reverse
561: score = 0
562: if FeedTools::HtmlHelper.html_type?(link_object.type)
563: score = score + 2
564: elsif link_object.type != nil
565: score = score - 1
566: end
567: if FeedTools::HtmlHelper.xml_type?(link_object.type)
568: score = score + 1
569: end
570: if link_object.type =~ /^video/ && self.links.size == 1
571: score = score + 1
572: elsif link_object.type =~ /^audio/ && self.links.size == 1
573: score = score + 1
574: end
575: if link_object.rel == "alternate"
576: score = score + 1
577: end
578: if link_object.rel == "self"
579: score = score - 1
580: end
581: if score >= max_score
582: max_score = score
583: @link = link_object.href
584: end
585: end
586: if @link.blank?
587: @link = FeedTools::XmlHelper.try_xpaths(self.root_node, [
588: "@href",
589: "@rdf:about",
590: "@about"
591: ], :select_result_value => true)
592: end
593: if @link.blank?
594: if FeedTools::UriHelper.is_uri?(self.id) &&
595: (self.id =~ /^http/)
596: @link = self.id
597: end
598: end
599: if !@link.blank?
600: @link = FeedTools::HtmlHelper.unescape_entities(@link)
601: end
602: @link = self.comments if @link.blank?
603: @link = nil if @link.blank?
604: begin
605: if !(@link =~ /^file:/) &&
606: !FeedTools::UriHelper.is_uri?(@link)
607: stored_base_uri =
608: FeedTools::GenericHelper.recursion_trap(:feed_link) do
609: self.base_uri if self.feed != nil
610: end
611: root_base_uri = nil
612: unless self.root_node.nil?
613: root_base_uri = self.root_node.base_uri
614: end
615: @link = FeedTools::UriHelper.resolve_relative_uri(
616: @link, [root_base_uri,stored_base_uri])
617: end
618: rescue
619: end
620: if self.configurations[:url_normalization_enabled]
621: @link = FeedTools::UriHelper.normalize_url(@link)
622: end
623: end
624: return @link
625: end
Returns the links collection
# File lib/feed_tools/feed_item.rb, line 436
436: def links
437: if @links.nil?
438: @links = []
439: link_nodes =
440: FeedTools::XmlHelper.combine_xpaths_all(self.root_node, [
441: "atom10:link",
442: "atom03:link",
443: "atom:link",
444: "link",
445: "a",
446: "url",
447: "href"
448: ])
449: for link_node in link_nodes
450: link_object = FeedTools::Link.new
451: link_object.href = FeedTools::XmlHelper.try_xpaths(link_node, [
452: "@atom10:href",
453: "@atom03:href",
454: "@atom:href",
455: "@href",
456: "@url",
457: "text()"
458: ], :select_result_value => true)
459: if link_object.href.nil? && link_node.base_uri != nil
460: link_object.href = ""
461: end
462: begin
463: if !(link_object.href =~ /^file:/) &&
464: !FeedTools::UriHelper.is_uri?(link_object.href)
465: stored_base_uri =
466: FeedTools::GenericHelper.recursion_trap(:feed_link) do
467: self.base_uri if self.feed != nil
468: end
469: link_object.href = FeedTools::UriHelper.resolve_relative_uri(
470: link_object.href,
471: [link_node.base_uri, stored_base_uri])
472: end
473: rescue
474: end
475: if self.configurations[:url_normalization_enabled]
476: link_object.href =
477: FeedTools::UriHelper.normalize_url(link_object.href)
478: end
479: link_object.href.strip! unless link_object.href.nil?
480: next if link_object.href.blank?
481: link_object.hreflang = FeedTools::XmlHelper.try_xpaths(link_node, [
482: "@atom10:hreflang",
483: "@atom03:hreflang",
484: "@atom:hreflang",
485: "@hreflang"
486: ], :select_result_value => true)
487: unless link_object.hreflang.nil?
488: link_object.hreflang = link_object.hreflang.downcase
489: end
490: link_object.rel = FeedTools::XmlHelper.try_xpaths(link_node, [
491: "@atom10:rel",
492: "@atom03:rel",
493: "@atom:rel",
494: "@rel"
495: ], :select_result_value => true)
496: unless link_object.rel.nil?
497: link_object.rel = link_object.rel.downcase
498: end
499: link_object.type = FeedTools::XmlHelper.try_xpaths(link_node, [
500: "@atom10:type",
501: "@atom03:type",
502: "@atom:type",
503: "@type"
504: ], :select_result_value => true)
505: unless link_object.type.nil?
506: link_object.type = link_object.type.downcase
507: end
508: link_object.title = FeedTools::XmlHelper.try_xpaths(link_node, [
509: "@atom10:title",
510: "@atom03:title",
511: "@atom:title",
512: "@title",
513: "text()"
514: ], :select_result_value => true)
515: # This catches the ambiguities between atom, rss, and cdf
516: if link_object.title == link_object.href
517: link_object.title = nil
518: end
519: link_object.length = FeedTools::XmlHelper.try_xpaths(link_node, [
520: "@atom10:length",
521: "@atom03:length",
522: "@atom:length",
523: "@length"
524: ], :select_result_value => true)
525: if !link_object.length.nil?
526: link_object.length = link_object.length.to_i
527: else
528: if !link_object.type.nil? && link_object.type[0..4] != "text" &&
529: link_object.type[-3..-1] != "xml" &&
530: link_object.href =~ /^http:\/\//
531: # Retrieve the length with an http HEAD request
532: else
533: link_object.length = nil
534: end
535: end
536: @links << link_object
537: end
538: if @links.empty? && self.enclosures.size > 0
539: # If there's seriously nothing to link to, but there's enclosures
540: # available, then add a link to the first one.
541: enclosure_link = self.enclosures[0]
542: link_object = FeedTools::Link.new
543: link_object.href = enclosure_link.url
544: link_object.type = enclosure_link.type
545: @links << link_object
546: end
547: end
548: return @links
549: end
Returns the contents of the media:text element
# File lib/feed_tools/feed_item.rb, line 721
721: def media_text
722: if @media_text.nil?
723: @media_text = FeedTools::XmlHelper.try_xpaths(self.root_node, [
724: "media:text/text()"
725: ], :select_result_value => true)
726: unless @media_text.blank?
727: @media_text = FeedTools::HtmlHelper.unescape_entities(@media_text)
728: @media_text = FeedTools::HtmlHelper.sanitize_html(@media_text)
729: @media_text.strip!
730: else
731: @media_text = nil
732: end
733: end
734: return @media_text
735: end
Sets the contents of the media:text element
# File lib/feed_tools/feed_item.rb, line 738
738: def media_text=(new_media_text)
739: @media_text = new_media_text
740: end
Returns the feed item media thumbnail link
# File lib/feed_tools/feed_item.rb, line 869
869: def media_thumbnail_link
870: if @media_thumbnail_link.nil?
871: @media_thumbnail_link = FeedTools::XmlHelper.try_xpaths(
872: self.root_node, [
873: "media:thumbnail/@url"
874: ], :select_result_value => true)
875: if self.configurations[:url_normalization_enabled]
876: @media_thumbnail_link =
877: FeedTools::UriHelper.normalize_url(@media_thumbnail_link)
878: end
879: end
880: return @media_thumbnail_link
881: end
Returns the feed item published time
# File lib/feed_tools/feed_item.rb, line 1864
1864: def published
1865: if @published.nil?
1866: published_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
1867: "atom10:issued/text()",
1868: "atom03:issued/text()",
1869: "atom:issued/text()",
1870: "issued/text()",
1871: "atom10:published/text()",
1872: "atom03:published/text()",
1873: "atom:published/text()",
1874: "published/text()",
1875: "dc:date/text()",
1876: "pubDate/text()",
1877: "date/text()"
1878: ], :select_result_value => true)
1879: if !published_string.blank?
1880: @published = Time.parse(published_string).gmtime rescue nil
1881: else
1882: @published = nil
1883: end
1884: end
1885: return @published
1886: end
# File lib/feed_tools/feed_item.rb, line 1579
1579: def publisher
1580: if @publisher.nil?
1581: @publisher = FeedTools::Author.new
1582:
1583: # Set the author name
1584: @publisher.raw = FeedTools::HtmlHelper.unescape_entities(
1585: FeedTools::XmlHelper.try_xpaths(self.root_node, [
1586: "dc:publisher/text()",
1587: "webMaster/text()"
1588: ], :select_result_value => true))
1589: unless @publisher.raw.blank?
1590: raw_scan = @publisher.raw.scan(
1591: /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
1592: if raw_scan.nil? || raw_scan.size == 0
1593: raw_scan = @publisher.raw.scan(
1594: /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
1595: unless raw_scan.size == 0
1596: publisher_raw_pair = raw_scan.first.reverse
1597: end
1598: else
1599: publisher_raw_pair = raw_scan.first
1600: end
1601: if raw_scan.nil? || raw_scan.size == 0
1602: email_scan = @publisher.raw.scan(
1603: /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
1604: if email_scan != nil && email_scan.size > 0
1605: @publisher.email = email_scan.first.strip
1606: end
1607: end
1608: unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0
1609: @publisher.name = publisher_raw_pair.first.strip
1610: @publisher.email = publisher_raw_pair.last.strip
1611: else
1612: unless @publisher.raw.include?("@")
1613: # We can be reasonably sure we are looking at something
1614: # that the creator didn't intend to contain an email address if
1615: # it got through the preceeding regexes and it doesn't
1616: # contain the tell-tale '@' symbol.
1617: @publisher.name = @publisher.raw
1618: end
1619: end
1620: end
1621:
1622: @publisher.name = nil if @publisher.name.blank?
1623: @publisher.raw = nil if @publisher.raw.blank?
1624: @publisher.email = nil if @publisher.email.blank?
1625: @publisher.url = nil if @publisher.url.blank?
1626: if @publisher.url != nil
1627: begin
1628: if !(@publisher.url =~ /^file:/) &&
1629: !FeedTools::UriHelper.is_uri?(@publisher.url)
1630: root_base_uri = nil
1631: unless self.root_node.nil?
1632: root_base_uri = self.root_node.base_uri
1633: end
1634: @publisher.url = FeedTools::UriHelper.resolve_relative_uri(
1635: @publisher.url, [root_base_uri, self.base_uri])
1636: end
1637: rescue
1638: end
1639: end
1640: if @publisher.name.blank? && @publisher.email.blank? &&
1641: @publisher.href.blank?
1642: parent_feed = self.feed
1643: if parent_feed != nil
1644: @publisher = parent_feed.publisher.dup
1645: end
1646: end
1647: end
1648: return @publisher
1649: end
# File lib/feed_tools/feed_item.rb, line 1652
1652: def publisher=(new_publisher)
1653: if new_publisher.respond_to?(:name) &&
1654: new_publisher.respond_to?(:email) &&
1655: new_publisher.respond_to?(:url)
1656: # It's a complete Author object, just set it.
1657: @publisher = new_publisher
1658: else
1659: # We're not looking at an Author object, this is probably a string,
1660: # default to setting the publisher's name.
1661: if @publisher.nil?
1662: @publisher = FeedTools::Author.new
1663: end
1664: @publisher.name = new_publisher
1665: end
1666: end
Returns the feed item‘s rights information
# File lib/feed_tools/feed_item.rb, line 889
889: def rights
890: if @rights.nil?
891: repair_entities = false
892: rights_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
893: "atom10:copyright",
894: "atom03:copyright",
895: "atom:copyright",
896: "copyright",
897: "copyrights",
898: "dc:rights",
899: "rights"
900: ])
901: @rights = FeedTools::HtmlHelper.process_text_construct(rights_node,
902: self.feed_type, self.feed_version, [self.base_uri])
903: if self.feed_type == "atom" ||
904: self.configurations[:always_strip_wrapper_elements]
905: @rights = FeedTools::HtmlHelper.strip_wrapper_element(@rights)
906: end
907: end
908: return @rights
909: end
Sets the root node of the feed item.
This allows namespace information to be inherited by the feed item from the feed itself. When creating individual nodes from scratch, the feed_data= method should be used instead.
# File lib/feed_tools/feed_item.rb, line 244
244: def root_node=(new_root_node)
245: @root_node = new_root_node
246: end
Returns a duplicate object suitable for serialization
# File lib/feed_tools/feed_item.rb, line 128
128: def serializable
129: self.full_parse()
130: feed_item_to_dump = self.dup
131: feed_item_to_dump.author
132: feed_item_to_dump.publisher
133: feed_item_to_dump.instance_variable_set("@xml_document", nil)
134: feed_item_to_dump.instance_variable_set("@root_node", nil)
135: return feed_item_to_dump
136: end
TODO: FIX ME! This code is completely wrong. The source that this post was based on
# File lib/feed_tools/feed_item.rb, line 1895
1895: def source
1896: if @source.nil?
1897: @source = FeedTools::Link.new
1898: @source.href = FeedTools::XmlHelper.try_xpaths(
1899: self.root_node, ["source/@url"],
1900: :select_result_value => true)
1901: @source.title = FeedTools::XmlHelper.try_xpaths(
1902: self.root_node, ["source/text()"],
1903: :select_result_value => true)
1904: end
1905: return @source
1906: end
# File lib/feed_tools/feed_item.rb, line 382
382: def summary
383: if @summary.nil?
384: repair_entities = false
385: summary_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
386: "atom10:summary",
387: "atom03:summary",
388: "atom:summary",
389: "summary",
390: "abstract",
391: "blurb",
392: "description",
393: "tagline",
394: "subtitle",
395: "xhtml:body",
396: "body",
397: "xhtml:div",
398: "div",
399: "p:payload",
400: "payload",
401: "fullitem",
402: "content:encoded",
403: "encoded",
404: "atom10:content",
405: "atom03:content",
406: "atom:content",
407: "content",
408: "info",
409: "body/datacontent"
410: ])
411: @summary = FeedTools::HtmlHelper.process_text_construct(summary_node,
412: self.feed_type, self.feed_version, [self.base_uri])
413: if self.feed_type == "atom" ||
414: self.configurations[:always_strip_wrapper_elements]
415: @summary = FeedTools::HtmlHelper.strip_wrapper_element(@summary)
416: end
417: if @summary.blank?
418: @summary = self.media_text
419: end
420: if @summary.blank?
421: @summary = self.itunes_summary
422: end
423: if @summary.blank?
424: @summary = self.itunes_subtitle
425: end
426: end
427: return @summary
428: end
# File lib/feed_tools/feed_item.rb, line 1909
1909: def tags
1910: # TODO: support the rel="tag" microformat
1911: # =======================================
1912: if @tags.nil?
1913: @tags = []
1914: if root_node.nil?
1915: return @tags
1916: end
1917: if @tags.nil? || @tags.size == 0
1918: @tags = []
1919: tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node,
1920: ["dc:subject/rdf:Bag/rdf:li/text()"],
1921: :select_result_value => true)
1922: if tag_list != nil && tag_list.size > 0
1923: for tag in tag_list
1924: @tags << tag.downcase.strip
1925: end
1926: end
1927: end
1928: if @tags.nil? || @tags.size == 0
1929: # messy effort to find ourselves some tags, mainly for del.icio.us
1930: @tags = []
1931: rdf_bag = FeedTools::XmlHelper.try_xpaths_all(self.root_node,
1932: ["taxo:topics/rdf:Bag/rdf:li"])
1933: if rdf_bag != nil && rdf_bag.size > 0
1934: for tag_node in rdf_bag
1935: begin
1936: tag_url = FeedTools::XmlHelper.try_xpaths(tag_node,
1937: ["@resource"],
1938: :select_result_value => true)
1939: tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)$/)
1940: if tag_match.size > 0
1941: @tags << tag_match.first.last.downcase.strip
1942: end
1943: rescue
1944: end
1945: end
1946: end
1947: end
1948: if @tags.nil? || @tags.size == 0
1949: @tags = []
1950: tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node,
1951: ["category/text()"],
1952: :select_result_value => true)
1953: for tag in tag_list
1954: @tags << tag.to_s.downcase.strip
1955: end
1956: end
1957: if @tags.nil? || @tags.size == 0
1958: @tags = []
1959: tag_list = FeedTools::XmlHelper.try_xpaths_all(self.root_node,
1960: ["dc:subject/text()"],
1961: :select_result_value => true)
1962: for tag in tag_list
1963: @tags << tag.to_s.downcase.strip
1964: end
1965: end
1966: if @tags.blank?
1967: begin
1968: itunes_keywords_string =
1969: FeedTools::XmlHelper.try_xpaths(self.root_node, [
1970: "itunes:keywords/text()"
1971: ], :select_result_value => true)
1972: unless itunes_keywords_string.blank?
1973: @tags = itunes_keywords_string.downcase.split(",")
1974: if @tags.size == 1
1975: @tags = itunes_keywords_string.downcase.split(" ")
1976: @tags = @tags.map { |tag| tag.chomp(",") }
1977: end
1978: if @tags.size == 1
1979: @tags = itunes_keywords_string.downcase.split(",")
1980: end
1981: @tags = @tags.map { |tag| tag.strip }
1982: end
1983: rescue
1984: @tags = []
1985: end
1986: end
1987: if @tags.nil?
1988: @tags = []
1989: end
1990: @tags.uniq!
1991: end
1992: return @tags
1993: end
# File lib/feed_tools/feed_item.rb, line 1718
1718: def time(options = {})
1719: FeedTools::GenericHelper.validate_options([ :estimate_timestamp ],
1720: options.keys)
1721: options = { :estimate_timestamp => true }.merge(options)
1722: if @time.nil?
1723: time_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
1724: "atom10:updated/text()",
1725: "atom03:updated/text()",
1726: "atom:updated/text()",
1727: "updated/text()",
1728: "atom10:modified/text()",
1729: "atom03:modified/text()",
1730: "atom:modified/text()",
1731: "modified/text()",
1732: "time/text()",
1733: "lastBuildDate/text()",
1734: "atom10:issued/text()",
1735: "atom03:issued/text()",
1736: "atom:issued/text()",
1737: "issued/text()",
1738: "atom10:published/text()",
1739: "atom03:published/text()",
1740: "atom:published/text()",
1741: "published/text()",
1742: "dc:date/text()",
1743: "pubDate/text()",
1744: "date/text()",
1745: "lastupdated/text()"
1746: ], :select_result_value => true)
1747: begin
1748: if !time_string.blank?
1749: @time = Time.parse(time_string).gmtime
1750: elsif self.configurations[:timestamp_estimation_enabled] &&
1751: !self.title.nil? &&
1752: (Time.parse(self.title) - Time.now).abs > 100
1753: @time = Time.parse(self.title).gmtime
1754: end
1755: rescue
1756: end
1757: if self.configurations[:timestamp_estimation_enabled]
1758: if options[:estimate_timestamp]
1759: if @time.nil?
1760: begin
1761: @time = succ_time
1762: if @time.nil?
1763: @time = prev_time
1764: end
1765: rescue
1766: end
1767: if @time.nil?
1768: @time = Time.now.gmtime
1769: end
1770: end
1771: end
1772: end
1773: end
1774: return @time
1775: end
# File lib/feed_tools/feed_item.rb, line 290
290: def title
291: if @title.nil?
292: repair_entities = false
293: title_node = FeedTools::XmlHelper.try_xpaths(self.root_node, [
294: "atom10:title",
295: "atom03:title",
296: "atom:title",
297: "title",
298: "dc:title",
299: "headline"
300: ])
301: @title = FeedTools::HtmlHelper.process_text_construct(title_node,
302: self.feed_type, self.feed_version, [self.base_uri])
303: if self.feed_type == "atom" ||
304: self.configurations[:always_strip_wrapper_elements]
305: @title = FeedTools::HtmlHelper.strip_wrapper_element(@title)
306: end
307: if !@title.blank? && self.configurations[:strip_comment_count]
308: # Some blogging tools include the number of comments in a post
309: # in the title... this is supremely ugly, and breaks any
310: # applications which expect the title to be static, so we're
311: # gonna strip them out.
312: #
313: # If for some incredibly wierd reason you need the actual
314: # unstripped title, just use find_node("title/text()").to_s
315: @title = @title.strip.gsub(/\[\d*\]$/, "").strip
316: end
317: @title = nil if @title.blank?
318: end
319: return @title
320: end
Returns the feed item updated time
# File lib/feed_tools/feed_item.rb, line 1835
1835: def updated
1836: if @updated.nil?
1837: updated_string = FeedTools::XmlHelper.try_xpaths(self.root_node, [
1838: "atom10:updated/text()",
1839: "atom03:updated/text()",
1840: "atom:updated/text()",
1841: "updated/text()",
1842: "atom10:modified/text()",
1843: "atom03:modified/text()",
1844: "atom:modified/text()",
1845: "modified/text()",
1846: "lastBuildDate/text()",
1847: "lastupdated/text()"
1848: ], :select_result_value => true)
1849: if !updated_string.blank?
1850: @updated = Time.parse(updated_string).gmtime rescue nil
1851: else
1852: @updated = nil
1853: end
1854: end
1855: return @updated
1856: end
Returns a REXML Document of the feed_data
# File lib/feed_tools/feed_item.rb, line 194
194: def xml_document
195: if @xml_document.nil?
196: return nil if self.feed_data.blank?
197: if self.feed_data_type != :xml
198: @xml_document = nil
199: else
200: # TODO: :ignore_whitespace_nodes => :all
201: # Add that?
202: # ======================================
203: @xml_document = REXML::Document.new(self.feed_data)
204: end
205: end
206: return @xml_document
207: end