<?php
//
// RSS 0.91, 0.92, 1.0, 2.0 and Atom 1.0 Parser
// This is a bit of a fuzzy parser that doesn't follow the specs very closely
// but it gets close enough for my needs.
//
// Rasmus Lerdorf, August 2005.
//
// Warning: This doesn't attempt to sanitize your feed.  You'll need to
//          run any output through htmlspecialchars() if you don't trust
//          your feeds.
//          
function rss_request($url, $timeout=1800) {
  if(!$feed = apc_fetch($url)) {
	$dest_file = '/tmp/rss_'.md5($url);
	if(!file_exists($dest_file) || filemtime($dest_file) < (time()-$timeout)) {
		$stream = fopen($url,'r');
		$tmpf = tempnam('/tmp','YWS');
		// Note the direct write from the stream here
		file_put_contents($tmpf, $stream);
		fclose($stream);
		rename($tmpf, $dest_file);
	}
	$dom = DOMDocument::load($dest_file);

	// Pick out the namespaces that apply to this doc.
	// We need to do this from DOM because simplexml does't see the
	// special xmlns attributes because of the way libxml2 handles them.
	$xpath = new DOMXPath($dom);
	$ns = array(''=>NULL);
	foreach($xpath->query("namespace::*") as $v) {
		if($v->localName!='xml') $ns[$v->localName] = $v->nodeValue;
	}
	if(in_array("http://www.w3.org/2005/Atom",$ns)) {
		$atom10 = true;
		unset($ns['xmlns']);
	} else $atom10 = false;

	// Ok, now we can switch to simplexml
	$xml = simplexml_import_dom($dom);

	// Pull out the root attributes - usually just version
	foreach($xml->attributes() as $k=>$v) $feed[$k] = (string)$v;

	// We will deal with the items separately, so start by only looking 
	// at the stuff leading up to the items checking each namespace.
	$rss1 = false;
	if($atom10) $top = $xml;
	else $top = $xml->channel;
	foreach($ns as $alias=>$uri) foreach($top->children($uri) as $key=>$val) {
		if($key=="item" || $key=="entry") continue;
		if($key=="items") {
			$rss1 = true; continue;
		}
		if(!$val->children()) {
			$feed[$key][0] = (string)$val;
			foreach($ns as $a=>$u) foreach($val->attributes($u) as $at=>$atv) {
				$feed[$key][$at] = (string)$atv;
			}
		} else {
			foreach($val->children() as $k=>$v) {
				$feed[$key][$k] = (string)$v;
				foreach($v->attributes() as $at=>$atv) {
					$feed[$k][$at] = (string)$atv;
				}
			}
		}
	}

	// Now we deal with the items
	// Atom and RSS1 have the feed items a level higher than RSS2
	$i = 0;
	if($rss1) { $feed['_type']='rss1.0'; $items = $xml->item; }
	else if($atom10) { $feed['_type']='atom1.0'; $items = $xml->entry; }
	else {
		if($feed['version']=='2.0') $feed['_type']='rss2.0';
		else if($feed['version']=='0.91') $feed['_type']='rss0.91';
		$items = $xml->channel->item;
	}
	foreach($items as $key=>$val) {
		foreach($ns as $a=>$u) foreach($val->attributes($u) as $at=>$atv) {
			$feed[$i][$at] = (string)$atv;
		}
		foreach($ns as $alias=>$uri) {
			foreach($val->children($uri) as $k=>$v) {
				$feed[$i][$k][0] = (string)$v;
				foreach($v->attributes() as $at=>$atv) {
					$at_val = (string)$atv;
					if($atom10) {
						$feed[$i][$k][$at][] = $at_val;
						// Don't even try parsing this stuff, just pass it through.
						if($at_val=='xhtml' || $at_val=='html' || $at_val=='text') {
							$tags = $v->children();
							$feed[$i][$k]['text'] = $tags->asXML();
						}
					} else $feed[$i][$k][$at] = $at_val;
			
				}
			} 
		}
		$i++;
	}
	// Cache it in APC's shared memory segment
	apc_store($url,$feed,$timeout);
  }
  return $feed;
}
?>
