view goodreads/goodreadsReviews.php @ 6:077b0a0a3e6d

remaining originals according to dependency walk
author Robert Boland <robert@markup.co.uk>
date Thu, 16 Feb 2017 22:29:02 +0000
parents
children
line wrap: on
line source

<?php
include_once('simple_html_dom.php');

$isbn=$_GET['isbn'];

function scraping_goodreads($url) {
    // create HTML DOM
    $html = file_get_html($url);

	$loop = 0;
	$ret =  "<?xml version=\"1.0\"?>";
	$ret .= "<Reviews>";
	foreach($html->find('div[class="gr_review_text"]') as $review) {
		$rating = $html->find('<span[class="gr_rating"]',$loop);
		$link = $html->find('<a[class="gr_more_link"]',$loop);
		$ret .= "<Review>";
		$ret .= "<Rating>" . substr( $rating->plaintext, 0, 1) . "</Rating>";
		$ret .= "<Body>" . trim(substr(trim($review->plaintext),0,-7)) . "</Body>";		
		$ret .= "<URL><![CDATA[" . $link . "]]></URL>";
		$ret .= "</Review>";
		$loop++;
	}
	
	$ret .= "</Reviews>";
	$html->clear();
	unset($html);
	$ret = preg_replace("/[^[:alnum:][:punct:][:space:]]/","",$ret);
	echo convert_utf8($ret);
}
		
function convert_utf8($str){
    if(!seems_utf8($str))
        return mb_convert_encoding($str, 'UTF-8');
    return $str;
}

function seems_utf8($Str) {
	for ($i=0; $i<strlen($Str); $i++) {
		if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb
		elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb
		elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb
		elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb
		elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb
		elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b
		else return false; # Does not match any model
		for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
			if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80))
			return false;
		}
	}
	return true;
}

$ret = scraping_goodreads('http://www.goodreads.com/api/reviews_widget_iframe?did=8vxmKmAn8AVolobtvBbMIg&isbn=' . $isbn);
?>