view xml/getAmazonInfo.php @ 23:d606320ec331

post-5.7-upgrade efforts to reduce dropped connections, db insertion fails
author Charlie Root
date Sun, 30 Dec 2018 07:00:09 -0500
parents 46382face560
children 828895488948
line wrap: on
line source

<?php

$lastReqTime=0;
function getAmazonDet($isbn,$go,$localeIn)
{
  global $output;
$Adefault=array(
  'language'           =>'en',           //what language to render the page in
  'locale'             =>$localeIn,           //which server's products? available: ca,de,fr,jp,uk,us
//'mode'               =>'books',        //what product category?
  'page'               =>1,              //first page to show (we are counting from 1 not 0)
//'search'             =>'Machiavelli',  //what to search for?
  'operation'          =>'ItemLookup',   //what to do?	//ItemSearch
 // 'searchindex'        =>'Books',        //what product category for search?
  'searchparameter'    =>'ItemId',       //what kind of search?
  'searchparameterdata'=>$isbn,  //what to search for?
  //here some debugging flags you can put at the end of the URL to call this script with, like: '?show_array=true'
  'show_array'         =>false,          //debug: show complete incoming array? You can use this to see what other information Amazon is sending
  'show_url'           =>false,          //debug: show XML request url to be send to Amazon?
  'show_xml'           =>false,          //debug: show incoming XML code from Amazon?
);
//change the debug options to true if you want to activate them or call the script with '?show_array=true' to see what actual information you're getting from Amazon and how little my standard script is actually showing of it

$Aassociates_id=array(
  'uk' => 'bookwhack-21',
  'us' => 'your02b-20',
  'ca' => 'book009-20',
  'de' => 'book04c-21',
  'fr' => 'book07f-21',
);

$Aserver=array(
  'ca' => array(
    'ext' => 'ca'                      ,  //Canadian normal server
    'nor' => 'http://www.amazon.ca'    ,  //Canadian normal server
    'xml' => 'http://xml.amazon.com'   ,  //Canadian xml server
  ),
  'de' => array(
    'ext' => 'de'                      ,  //German normal server
    'nor' => 'http://www.amazon.de'    ,  //German normal server
    'xml' => 'http://xml-eu.amazon.com',  //German xml server
  ),
  'fr' => array(
    'ext' => 'fr'                      ,  //French normal server
    'nor' => 'http://www.amazon.fr'    ,  //French normal server
    'xml' => 'http://xml-eu.amazon.com',  //French xml server
  ),
  'jp' => array(
    'ext' => 'jp'                      ,  //Japanese normal server, not co.jp!
    'nor' => 'http://www.amazon.co.jp' ,  //Japanese normal server
    'xml' => 'http://xml.amazon.com'   ,  //Japanese xml server
  ),
  'uk' => array(
    'ext' => 'co.uk'                   ,  //UK normal server
    'nor' => 'http://www.amazon.co.uk' ,  //UK normal server
    'xml' => 'http://xml-eu.amazon.com',  //UK xml server
  ),
  'us' => array(
    'ext' => 'com'                     ,  //USA normal server
    'nor' => 'http://www.amazon.com'   ,  //USA normal server
    'xml' => 'http://xml.amazon.com'   ,  //USA xml server
  ),
);

//if(go != 1)
	//include "aws_signed_request.php";  
$public_key ="AKIAIHTNWC7L6LOUY4LQ";
$private_key="zWQlIzndJDtXNfxEXH7K7YR7hzv3u77lOcqfqPde";
//for all parameters see if the user has overruled it or use the default
foreach ($Adefault as $i=>$d) {
  $$i=isset($_GET[$i])?$_GET[$i]:$d;
}
//this is the data that is used to form the request for AWS
//this is the part that is search specific
  $parameters=array(
    'Operation'       =>$operation              ,
    //'Keywords'        =>urlencode($search)      ,
    //'SearchIndex'     =>$searchindex            ,  //Books for example.
    "$searchparameter"=>$searchparameterdata    ,
    'ItemPage'        =>$page                   ,  //which page?
    'AssociateTag'    =>$Aassociates_id[$locale],
    'ResponseGroup'   =>'ItemAttributes,Reviews,EditorialReview,OfferSummary,Offers,Images,AlternateVersions,SalesRank,BrowseNodes'                ,  //Small, Medium, Large or SellerListing,'BrowseNodes',// ,
	'ReviewSort'	  =>'-HelpfulVotes'
  );

$requestURI = $_SERVER['REQUEST_URI'];
$requestIP = $_SERVER['REMOTE_ADDR'];
// if ($requestIP=="173.161.113.65" || $requestIP=="141.8.132.25") {
// 	    $delay=60;
// 	    file_put_contents('/var/ywww/debug/phpDebug',
// 			      "bad guy: $requestIP, $requestURI\n",
// 			      FILE_APPEND);
// 	    sleep($delay);
// 	    # No, can't do this
// 	    # return; # bomb!
// 	    # 'Kung', sitting on my desk in the office while I'm at home,
// 	    # is occasionally hitting xml/getAmazonInfo.php:
// 	    # e.g. Losing: ItemLookupErrorResponse, RequestThrottled, 129.215.197.36, /xml/getAmazonInfo.php?searchparameterdata=075154454X&locale=uk
// 	    # repeatedly, same params
// }
$ext=$Aserver[$locale]['ext'];  
$file_data=$ext;
ksort($parameters);
foreach ($parameters as $i=>$d) {
  $file_data.='&'.$i.'='.$d;
}
        $gotit=0;
        $url=aws_signed_request($ext,$parameters,$public_key,$private_key);  
        $crl = curl_init();
        $timeout = 5;
        curl_setopt ($crl, CURLOPT_URL,$url);
		curl_setopt ($crl, CURLOPT_ENCODING , "gzip"); 
        curl_setopt ($crl, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt ($crl, CURLOPT_CONNECTTIMEOUT, $timeout);
	$semaphore = new SyncSemaphore("Amazon");
	$gotit = $semaphore->lock(1000);
	if (!$gotit) {
	  file_put_contents('/var/ywww/debug/phpDebug',
			"Got it: ".(int)$gotit." $requestIP 1\n",FILE_APPEND);
	  $gotit=$semaphore->lock(1000);
	  file_put_contents('/var/ywww/debug/phpDebug',
		        "Got it: ".(int)$gotit." $requestIP 1a\n",FILE_APPEND);
	}
        $output = curl_exec($crl);
        curl_close($crl);
	usleep(500000);
	if ($gotit) {
	  $semaphore->unlock();
	}
	else {
	  file_put_contents('/var/ywww/debug/phpDebug',
			    "W/o lock for $requestIP 1b\n",FILE_APPEND);
	}
	$review = "";
	$review1 = "";
	$review2 = "";
	$review3 = "";
	// HST added this
	$mm=array();
	if (preg_match("<Error>",$output,$mm)) {
	  $xml = new SimpleXMLElement($output);	
	  $resName=$xml->getName();
	  $code=$xml->Error->Code;
	  if (!$code) {
	    $code=$xml->Items->Request->Errors->Error->Code;
	  }
	  file_put_contents('/var/ywww/debug/phpDebug',"Losing: ".$mm[0].", $resName, $code, $requestIP, $requestURI\n",FILE_APPEND);
	  if ($code=='RequestThrottled') {
	      usleep(200000); // Try to reduce throttling until we get a 
	      // principled solution in place
	  }
	  else {
	     file_put_contents('/var/ywww/debug/phpDebug',"parms: $go\nurl: $url\n".
		   print_r($parameters,TRUE)."\n",FILE_APPEND);
	     if ($code=="") {
	       file_put_contents('/var/ywww/debug/phpDebug',"error elt:\n$output\n",FILE_APPEND);
	     }
	  }
	}
	else {
	  $xml = new SimpleXMLElement($output);	
	  set_error_handler(function () {
	      global $output;
	      file_put_contents('/var/ywww/debug/phpDebug',
				"Caught one?: ".$searchparameterdata,
				FILE_APPEND);
	      file_put_contents('/var/ywww/debug/phpDebug',
				print_r($output, TRUE)."\n",
				FILE_APPEND);
	    } );
	  $review = $xml->Items->Item->CustomerReviews->IFrameURL;
	  // The above is failing repeatedly -- 
	  //PHP Notice: Trying to get property of non-object in
	  // /var/ywww/xml/getAmazonInfo.php on line [109]
	  // See the dumped structure at the end of this file for the
	  // cause
	  restore_error_handler();
	  //echo $review;
	}
	if ($review != "")
	{
		$text = @file_get_contents($review . "&truncate=300");
		$removeTop1 = preg_replace('~<div class="crIFrameHeaderLeftColumn">(.*?)<div class="crIFrameHeaderHistogram">~si', '', $text);
		$removeTop2 = preg_replace('~<div class="crIFrameHeaderHistogram">(.*?)<div class="crIframeReviewList">~si', '', $text);
		if (preg_match('~<body[^>]*>(.*?)</body>~si', $removeTop2, $body)){ $getBody = trim($body[1]); } else { $getBody = $removeTop2;}
		$removeDiv = preg_replace('~<div[^>]*>(.*?)</div>~si', '', $getBody);
		$removeCloseDivs = preg_replace('/<\/div>/','', $removeDiv);
		$setBoundary = str_replace('<!-- BOUNDARY -->','BOTTOM-TOP', $removeCloseDivs);
		//replace <!-- BOUNDARY --> with BOTTOM-TOP
		$remove1 = '~<table cellpadding="0"(.*?)%">~s';
		$setBoundary = preg_replace($remove1,'', $setBoundary);
		$remove2 = '~</td><td bg(.*?)</table>~s';
		$setBoundary = preg_replace($remove2,'', $setBoundary);
		$remove3 = '~<a name=(.*?)</a>~s';
		$setBoundary = preg_replace($remove3,'', $setBoundary);
		$setBoundary2 = str_replace('<br />','', $setBoundary);
		//remove all extra crap;
		$setBoundary3 = str_replace('</td>','BOTTOM', $setBoundary2);
		//replace </td> with BOTTOM
		
		if (preg_match_all('~TOP(.*?)BOTTOM~s', $setBoundary3, $reviews))
		{ 
			$reviewContents = $reviews[1];
			//print_r($reviewContents);
			$review1 = trim($reviewContents[0]);
			$review1 = str_replace("\n", "", $review1);
			$review1 = str_replace("\r", "", $review1);
			if (isset($reviewContents[1])) {
			  $review2 = trim($reviewContents[1]);
			  $review2 = str_replace("\n", "", $review2);
			  $review2 = str_replace("\r", "", $review2);
			}
			else {
			  $review2 = "";
			}
			if (isset($reviewContents[2])) {
			  $review3 = trim($reviewContents[2]);
			  $review3 = str_replace("\n", "", $review3);
			  $review3 = str_replace("\r", "", $review3);
			}
			else {
			  $review3 = "";
			}
		}
		else
		{
			$review1 = "";
			$review2 = "";
			$review3 = "";
			//echo "EPIC FAIL";
		}

		unset($xml->Items->Item->CustomerReviews);
		$xdoc = new DomDocument;
		$xdoc->loadXML($xml->asXML());
		
		$cReviews = $xdoc ->createElement('CustomerReviews');
		$cReviewHolder = $xdoc ->createElement('Review');
		$cReview = $xdoc ->createElement('Content');
		$cReviewHolder2 = $xdoc ->createElement('Review');
		$cReview2 = $xdoc ->createElement('Content');	
		$cReviewHolder3 = $xdoc ->createElement('Review');
		$cReview3 = $xdoc ->createElement('Content');
		
		$txtNode = $xdoc ->createTextNode ($review1);
		$cReview -> appendChild($txtNode);
		
		$txtNode2 = $xdoc ->createTextNode ($review2);
		$cReview2 -> appendChild($txtNode2);
		
		$txtNode3 = $xdoc ->createTextNode ($review3);
		$cReview3 -> appendChild($txtNode3);
		
		$cReviewHolder -> appendChild($cReview);
		$cReviewHolder2 -> appendChild($cReview2);
		$cReviewHolder3 -> appendChild($cReview3);
		
		$cReviews -> appendChild($cReviewHolder);
		$cReviews -> appendChild($cReviewHolder2);
		$cReviews -> appendChild($cReviewHolder3);
		
		$xdoc->documentElement->childNodes->item(1)->childNodes->item(1)->appendChild($cReviews);
		$newXML = simplexml_import_dom($xdoc);
		$output = $newXML->asXml();
	}
	
	switch($locale){
		case "us": 
			$loc = 0;
			break;
		case "uk": 
			$loc = 1;
			break;
		case "ca": 
			$loc = 2;
			break;
		case "de": 
			$loc = 3;
			break;
		case "fr": 
			$loc = 4;
			break;
	};		
	
	$errorCode = $xml->Error->Code;
	//echo $errorCode;

	if($errorCode != "AccountLimitExceeded")
	{
	  if($go == 1) {
	    $item = $xml->Items->Item[0];
	    if ($item && $item->ItemAttributes && $item->ItemAttributes->Title) {
	      $title = $item->ItemAttributes->Title; }
	    else { $title = "[no title]"; };
	    file_put_contents('/var/ywww/debug/phpDebug',"win: ".
			    $title."\n",
			      FILE_APPEND);
			return $output;
	  }
	  else
	    {
	      if ($xml->Items->Item) {
		$title = $xml->Items->Item[0]->ItemAttributes->Title;
		$author = $xml->Items->Item[0]->ItemAttributes->Author;
		$binding = $xml->Items->Item[0]->ItemAttributes->Binding;
		$dewey = $xml->Items->Item[0]->ItemAttributes->DeweyDecimalNumber;
		if($dewey == "")
		  $dewey = "null";
		$imageURL = $xml->Items->Item[0]->MediumImage->URL;
		$salesRank = $xml->Items->Item[0]->SalesRank;
		$pubDate = $xml->Items->Item[0]->ItemAttributes->PublicationDate;
		$publisher = $xml->Items->Item[0]->ItemAttributes->Publisher;
	      }
	      else {
		$title = $salesRank = "";
		$dewey = "null";
	      }
			
	      $genreID = "";
	      $genre = "";		
	      $genArr = array();

	      if ($xml->Items->Item[0] && $xml->Items->Item[0]->BrowseNodes) {
		for($i=0;$i<sizeof($xml->Items->Item->BrowseNodes->BrowseNode);$i++){	
		  //sexy recursive function
		  findGenre($xml->Items->Item->BrowseNodes->BrowseNode[$i], $genreID, $genre);
				
		  if($genre != "")
		    $genArr[strval($genreID)] = strval($genre);
		  //$genArr[$i] = array(strval($genreID) => strval($genre));

		  //echo $genre;
		  //echo $genreID;
				
		  $genre = "";
		  $genreID = "";
		}
	      }
			
	      $g1 = "null";
	      $g2 = "null";
	      $g3 = "null";
	      $loop = 1;
			
	      foreach ($genArr as $key => $value) {
		//echo "$key => $value";
		if ($key>2047) {
		  //HST added
		  break;
		}
		$queryG = "CALL b_addBrowseNode($key,\"$value\")";	//add the name value pair for genre to new table	
		//echo $queryG;
		include "../../private/db.php";
		$resG = mysqli_query($link, $queryG);
		mysqli_close($link);
				
		switch ($loop) {
		case 1:
		  $g1 = $key;
		  break;
		case 2:
		  $g2 = $key;
		  break;
		case 3:
		  $g3 = $key;
		  break;
		}
				
		$loop++;
	      }
			
	      if($salesRank == "")
		$salesRank = "null";
			
	      $title = strtr($title, '"', "'");
	      include "../../private/db.php";
	      $review1 = mysqli_real_escape_string($link,$review1);
	      $review2 = mysqli_real_escape_string($link,$review2);
	      $review3 = mysqli_real_escape_string($link,$review3);
			
	      if($title != "")
		{
		  $queryInsert = "CALL b_addNewBook(\"$searchparameterdata\",\"$title\", \"$author\",\"$binding\",\"$imageURL\", $dewey, $salesRank,\"$pubDate\",\"$publisher\",$g1,$g2,$g3,$loc)";	
		  //echo $queryInsert;
		  $queryInsertReviews = "CALL b_insertReviews(\"$searchparameterdata\",\"$review1\",\"$review2\",\"$review3\")";
				
		  $resG = mysqli_query($link, $queryInsert) or exit( mysqli_error( $link ));
		  if($review1 != "" && $review2 != "" && $review3 != "")
		    $resG = mysqli_query($link, $queryInsertReviews) or exit( mysqli_error( $link ));
					
		  mysqli_close($link);	//do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop		
		}

	      echo $output;
	    }
	}
	else
	{
		//look up info from db	
		include "../../private/db.php"; 
		$query = "CALL b_getBookInfo('$searchparameterdata', $loc)";
		//echo $query;
		$res = mysqli_query($link, $query) or exit( mysqli_error( $link ));
	
		$output = "";
		$output .=  "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
		$output .=  "<Details>";
		if ( mysqli_num_rows( $res ) > 0 )
		{
			$rows=mysqli_fetch_array($res, MYSQLI_ASSOC);
			mysqli_close($link);	//do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop						
			$output .=  "<ASIN>" . $searchparameterdata . "</ASIN>";
			$output .=  "<Title>" . htmlspecialchars($rows["Title"]) . "</Title>";			
			$output .=  "<Author>" . htmlspecialchars($rows["Author"]) . "</Author>";
			$output .=  "<Binding>" . htmlspecialchars($rows["Binding"]) . "</Binding>";
			$output .=  "<Dewey>" . htmlspecialchars($rows["DeweyDecimal"]) . "</Dewey>";
			$output .=  "<ImageURL>" . htmlspecialchars($rows["ImageURL"]) . "</ImageURL>";
			$output .=  "<SalesRank>" . htmlspecialchars($rows["SalesRank"]) . "</SalesRank>";
			$output .=  "<PublicationDate>" . htmlspecialchars($rows["PublicationDate"]) . "</PublicationDate>";
			$output .=  "<Publisher>" . htmlspecialchars($rows["Publisher"]) . "</Publisher>";
			$output .=  "<Genre1>" . htmlspecialchars($rows["Genre1"]) . "</Genre1>";
			$output .=  "<Genre2>" . htmlspecialchars($rows["Genre2"]) . "</Genre2>";
			$output .=  "<Genre3>" . htmlspecialchars($rows["Genre3"]) . "</Genre3>";
			$output .=  "<ProductGroup>Book</ProductGroup>";
			$output .=  "<Error>AccountLimitExceeded</Error>";
			
		}
		else {
		  mysqli_close($link);	//do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop
		}
		$output .=  "</Details>";
		echo $output;
	}     
}

function findGenre($browseNode, &$ID, &$gen)
{
	if($browseNode->Name == "Subjects")
	{
		 return true;		 
	}
	else
	{
		if($browseNode->Ancestors->BrowseNode)
		{
			if(findGenre($browseNode->Ancestors->BrowseNode, $ID, $gen) == true)
			{
				$gen = $browseNode->Name;
				$ID = $browseNode->BrowseNodeId;
			}		
		}
		return false;		
	}
}

if(!isset($ret))
{
	include "aws_signed_request.php";  
	getAmazonDet('default',0,'us');	//will get overwritten
}
/*Caught oneSimpleXMLElement Object
(
    [OperationRequest] => SimpleXMLElement Object
        (
            [RequestId] => d2eaacba-2411-44e7-b268-f23a20167330
            [Arguments] => SimpleXMLElement Object
                (
                    [Argument] => Array
                        (
                            [0] => SimpleXMLElement Object
                                (
                                    [@attributes] => Array
                                        (
                                            [Name] => AWSAccessKeyId
                                            [Value] => AKIAIHTNWC7L6LOUY4LQ 
                                        )

                                )

                            [1] => SimpleXMLElement Object
                                (
                                    [@attributes] => Array
                                        (
                                            [Name] => AssociateTag
                                            [Value] => bookwhack-21
                                        )

                                )

                            [2] => SimpleXMLElement Object
                                (
                                    [@attributes] => Array
                                        (
                                            [Name] => ItemId
                                            [Value] => B004Q3Q3Y4
                                        )

                                )

                            [3] => SimpleXMLElement Object
                                (
                                    [@attributes] => Array
                                        (
                                            [Name] => ItemPage
                                            [Value] => 1
                                        )

                                )

                            [4] => SimpleXMLElement Object
                                (
                                    [@attributes] => Array
                                        (
                                            [Name] => Operation
                                            [Value] => ItemLookup
                                        )

                                )

                            [5] => SimpleXMLElement Object
                                (
                                    [@attributes] => Array
                                        (
                                            [Name] => ResponseGroup
                                            [Value] => ItemAttributes,Reviews,EditorialReview,OfferSummary,Offers,Images,AlternateVersions,SalesRank,BrowseNodes
                                        )

                                )

                            [6] => SimpleXMLElement Object
                                (
                                    [@attributes] => Array
                                        (
                                            [Name] => ReviewSort
                                            [Value] => -HelpfulVotes
                                        )

                                )

                            [7] => SimpleXMLElement Object
                                (
                                    [@attributes] => Array
                                        (
                                            [Name] => Service
                                            [Value] => AWSECommerceService
                                        )

                                )

                            [8] => SimpleXMLElement Object
                                (
                                    [@attributes] => Array
                                        (
                                            [Name] => Timestamp
                                            [Value] => 2016-12-15T23:12:34Z
                                        )

                                )

                            [9] => SimpleXMLElement Object
                                (
                                    [@attributes] => Array
                                        (
                                            [Name] => Version
                                            [Value] => 2011-08-01
                                        )

                                )

                            [10] => SimpleXMLElement Object
                                (
                                    [@attributes] => Array
                                        (
                                            [Name] => Signature
                                            [Value] => SUXfFZHQ74Joc+WDLx87uzemTdtHijNohykqafJXYKQ=
                                        )

                                )

                        )

                )

            [RequestProcessingTime] => 0.3518217620000000
        )

    [Items] => SimpleXMLElement Object
        (
            [Request] => SimpleXMLElement Object
                (
                    [IsValid] => True
                    [ItemLookupRequest] => SimpleXMLElement Object
                        (
                            [IdType] => ASIN
                            [ItemId] => B004Q3Q3Y4
                            [ResponseGroup] => Array
                                (
                                    [0] => ItemAttributes
                                    [1] => Reviews
                                    [2] => EditorialReview
                                    [3] => OfferSummary
                                    [4] => Offers
                                    [5] => Images
                                    [6] => AlternateVersions
                                    [7] => SalesRank
                                    [8] => BrowseNodes
                                )

                            [VariationPage] => All
                        )

                    [Errors] => SimpleXMLElement Object
                        (
                            [Error] => SimpleXMLElement Object
                                (
                                    [Code] => AWS.InvalidParameterValue
                                    [Message] => B004Q3Q3Y4 is not a valid value for ItemId. Please change this value and retry your request.
                                )

                        )

                )

        )

)
*/
	/*$xml = new SimpleXMLElement("<?xml version=\"1.0\"?><ItemLookupErrorResponse xmlns=\"http://ecs.amazonaws.com/doc/2009-03-31/\"><Error><Code>AccountLimitExceeded</Code><Message>Account limit of 2056 requests per hour exceeded.</Message></Error><RequestID>290ed059-730c-4789-93b4-6d21e11053d3</RequestID></ItemLookupErrorResponse>");*/
?>