Mercurial > hg > ywww
view xml/getAmazonInfo.php @ 25:828895488948
more db column protection
author | Robert Boland <robert@markup.co.uk> |
---|---|
date | Tue, 01 Jan 2019 07:30:05 -0500 |
parents | d606320ec331 |
children | 4124f103b46b |
line wrap: on
line source
<?php $lastReqTime=0; function getAmazonDet($isbn,$go,$localeIn) { global $output; $Adefault=array( 'language' =>'en', //what language to render the page in 'locale' =>$localeIn, //which server's products? available: ca,de,fr,jp,uk,us //'mode' =>'books', //what product category? 'page' =>1, //first page to show (we are counting from 1 not 0) //'search' =>'Machiavelli', //what to search for? 'operation' =>'ItemLookup', //what to do? //ItemSearch // 'searchindex' =>'Books', //what product category for search? 'searchparameter' =>'ItemId', //what kind of search? 'searchparameterdata'=>$isbn, //what to search for? //here some debugging flags you can put at the end of the URL to call this script with, like: '?show_array=true' 'show_array' =>false, //debug: show complete incoming array? You can use this to see what other information Amazon is sending 'show_url' =>false, //debug: show XML request url to be send to Amazon? 'show_xml' =>false, //debug: show incoming XML code from Amazon? ); //change the debug options to true if you want to activate them or call the script with '?show_array=true' to see what actual information you're getting from Amazon and how little my standard script is actually showing of it $Aassociates_id=array( 'uk' => 'bookwhack-21', 'us' => 'your02b-20', 'ca' => 'book009-20', 'de' => 'book04c-21', 'fr' => 'book07f-21', ); $Aserver=array( 'ca' => array( 'ext' => 'ca' , //Canadian normal server 'nor' => 'http://www.amazon.ca' , //Canadian normal server 'xml' => 'http://xml.amazon.com' , //Canadian xml server ), 'de' => array( 'ext' => 'de' , //German normal server 'nor' => 'http://www.amazon.de' , //German normal server 'xml' => 'http://xml-eu.amazon.com', //German xml server ), 'fr' => array( 'ext' => 'fr' , //French normal server 'nor' => 'http://www.amazon.fr' , //French normal server 'xml' => 'http://xml-eu.amazon.com', //French xml server ), 'jp' => array( 'ext' => 'jp' , //Japanese normal server, not co.jp! 'nor' => 'http://www.amazon.co.jp' , //Japanese normal server 'xml' => 'http://xml.amazon.com' , //Japanese xml server ), 'uk' => array( 'ext' => 'co.uk' , //UK normal server 'nor' => 'http://www.amazon.co.uk' , //UK normal server 'xml' => 'http://xml-eu.amazon.com', //UK xml server ), 'us' => array( 'ext' => 'com' , //USA normal server 'nor' => 'http://www.amazon.com' , //USA normal server 'xml' => 'http://xml.amazon.com' , //USA xml server ), ); //if(go != 1) //include "aws_signed_request.php"; $public_key ="AKIAIHTNWC7L6LOUY4LQ"; $private_key="zWQlIzndJDtXNfxEXH7K7YR7hzv3u77lOcqfqPde"; //for all parameters see if the user has overruled it or use the default foreach ($Adefault as $i=>$d) { $$i=isset($_GET[$i])?$_GET[$i]:$d; } //this is the data that is used to form the request for AWS //this is the part that is search specific $parameters=array( 'Operation' =>$operation , //'Keywords' =>urlencode($search) , //'SearchIndex' =>$searchindex , //Books for example. "$searchparameter"=>$searchparameterdata , 'ItemPage' =>$page , //which page? 'AssociateTag' =>$Aassociates_id[$locale], 'ResponseGroup' =>'ItemAttributes,Reviews,EditorialReview,OfferSummary,Offers,Images,AlternateVersions,SalesRank,BrowseNodes' , //Small, Medium, Large or SellerListing,'BrowseNodes',// , 'ReviewSort' =>'-HelpfulVotes' ); $requestURI = $_SERVER['REQUEST_URI']; $requestIP = $_SERVER['REMOTE_ADDR']; // if ($requestIP=="173.161.113.65" || $requestIP=="141.8.132.25") { // $delay=60; // file_put_contents('/var/ywww/debug/phpDebug', // "bad guy: $requestIP, $requestURI\n", // FILE_APPEND); // sleep($delay); // # No, can't do this // # return; # bomb! // # 'Kung', sitting on my desk in the office while I'm at home, // # is occasionally hitting xml/getAmazonInfo.php: // # e.g. Losing: ItemLookupErrorResponse, RequestThrottled, 129.215.197.36, /xml/getAmazonInfo.php?searchparameterdata=075154454X&locale=uk // # repeatedly, same params // } $ext=$Aserver[$locale]['ext']; $file_data=$ext; ksort($parameters); foreach ($parameters as $i=>$d) { $file_data.='&'.$i.'='.$d; } $gotit=0; $url=aws_signed_request($ext,$parameters,$public_key,$private_key); $crl = curl_init(); $timeout = 5; curl_setopt ($crl, CURLOPT_URL,$url); curl_setopt ($crl, CURLOPT_ENCODING , "gzip"); curl_setopt ($crl, CURLOPT_RETURNTRANSFER, 1); curl_setopt ($crl, CURLOPT_CONNECTTIMEOUT, $timeout); $semaphore = new SyncSemaphore("Amazon"); $gotit = $semaphore->lock(1000); if (!$gotit) { file_put_contents('/var/ywww/debug/phpDebug', "Got it: ".(int)$gotit." $requestIP 1\n",FILE_APPEND); $gotit=$semaphore->lock(1000); file_put_contents('/var/ywww/debug/phpDebug', "Got it: ".(int)$gotit." $requestIP 1a\n",FILE_APPEND); } $output = curl_exec($crl); curl_close($crl); usleep(500000); if ($gotit) { $semaphore->unlock(); } else { file_put_contents('/var/ywww/debug/phpDebug', "W/o lock for $requestIP 1b\n",FILE_APPEND); } $review = ""; $review1 = ""; $review2 = ""; $review3 = ""; // HST added this $mm=array(); if (preg_match("<Error>",$output,$mm)) { $xml = new SimpleXMLElement($output); $resName=$xml->getName(); $code=$xml->Error->Code; if (!$code) { $code=$xml->Items->Request->Errors->Error->Code; } file_put_contents('/var/ywww/debug/phpDebug',"Losing: ".$mm[0].", $resName, $code, $requestIP, $requestURI\n",FILE_APPEND); if ($code=='RequestThrottled') { usleep(200000); // Try to reduce throttling until we get a // principled solution in place } else { file_put_contents('/var/ywww/debug/phpDebug',"parms: $go\nurl: $url\n". print_r($parameters,TRUE)."\n",FILE_APPEND); if ($code=="") { file_put_contents('/var/ywww/debug/phpDebug',"error elt:\n$output\n",FILE_APPEND); } } } else { $xml = new SimpleXMLElement($output); set_error_handler(function () { global $output; file_put_contents('/var/ywww/debug/phpDebug', "Caught one?: ".$searchparameterdata, FILE_APPEND); file_put_contents('/var/ywww/debug/phpDebug', print_r($output, TRUE)."\n", FILE_APPEND); } ); $review = $xml->Items->Item->CustomerReviews->IFrameURL; // The above is failing repeatedly -- //PHP Notice: Trying to get property of non-object in // /var/ywww/xml/getAmazonInfo.php on line [109] // See the dumped structure at the end of this file for the // cause restore_error_handler(); //echo $review; } if ($review != "") { $text = @file_get_contents($review . "&truncate=300"); $removeTop1 = preg_replace('~<div class="crIFrameHeaderLeftColumn">(.*?)<div class="crIFrameHeaderHistogram">~si', '', $text); $removeTop2 = preg_replace('~<div class="crIFrameHeaderHistogram">(.*?)<div class="crIframeReviewList">~si', '', $text); if (preg_match('~<body[^>]*>(.*?)</body>~si', $removeTop2, $body)){ $getBody = trim($body[1]); } else { $getBody = $removeTop2;} $removeDiv = preg_replace('~<div[^>]*>(.*?)</div>~si', '', $getBody); $removeCloseDivs = preg_replace('/<\/div>/','', $removeDiv); $setBoundary = str_replace('<!-- BOUNDARY -->','BOTTOM-TOP', $removeCloseDivs); //replace <!-- BOUNDARY --> with BOTTOM-TOP $remove1 = '~<table cellpadding="0"(.*?)%">~s'; $setBoundary = preg_replace($remove1,'', $setBoundary); $remove2 = '~</td><td bg(.*?)</table>~s'; $setBoundary = preg_replace($remove2,'', $setBoundary); $remove3 = '~<a name=(.*?)</a>~s'; $setBoundary = preg_replace($remove3,'', $setBoundary); $setBoundary2 = str_replace('<br />','', $setBoundary); //remove all extra crap; $setBoundary3 = str_replace('</td>','BOTTOM', $setBoundary2); //replace </td> with BOTTOM if (preg_match_all('~TOP(.*?)BOTTOM~s', $setBoundary3, $reviews)) { $reviewContents = $reviews[1]; //print_r($reviewContents); $review1 = trim($reviewContents[0]); $review1 = str_replace("\n", "", $review1); $review1 = str_replace("\r", "", $review1); if (isset($reviewContents[1])) { $review2 = trim($reviewContents[1]); $review2 = str_replace("\n", "", $review2); $review2 = str_replace("\r", "", $review2); } else { $review2 = ""; } if (isset($reviewContents[2])) { $review3 = trim($reviewContents[2]); $review3 = str_replace("\n", "", $review3); $review3 = str_replace("\r", "", $review3); } else { $review3 = ""; } } else { $review1 = ""; $review2 = ""; $review3 = ""; //echo "EPIC FAIL"; } unset($xml->Items->Item->CustomerReviews); $xdoc = new DomDocument; $xdoc->loadXML($xml->asXML()); $cReviews = $xdoc ->createElement('CustomerReviews'); $cReviewHolder = $xdoc ->createElement('Review'); $cReview = $xdoc ->createElement('Content'); $cReviewHolder2 = $xdoc ->createElement('Review'); $cReview2 = $xdoc ->createElement('Content'); $cReviewHolder3 = $xdoc ->createElement('Review'); $cReview3 = $xdoc ->createElement('Content'); $txtNode = $xdoc ->createTextNode ($review1); $cReview -> appendChild($txtNode); $txtNode2 = $xdoc ->createTextNode ($review2); $cReview2 -> appendChild($txtNode2); $txtNode3 = $xdoc ->createTextNode ($review3); $cReview3 -> appendChild($txtNode3); $cReviewHolder -> appendChild($cReview); $cReviewHolder2 -> appendChild($cReview2); $cReviewHolder3 -> appendChild($cReview3); $cReviews -> appendChild($cReviewHolder); $cReviews -> appendChild($cReviewHolder2); $cReviews -> appendChild($cReviewHolder3); $xdoc->documentElement->childNodes->item(1)->childNodes->item(1)->appendChild($cReviews); $newXML = simplexml_import_dom($xdoc); $output = $newXML->asXml(); } switch($locale){ case "us": $loc = 0; break; case "uk": $loc = 1; break; case "ca": $loc = 2; break; case "de": $loc = 3; break; case "fr": $loc = 4; break; }; $errorCode = $xml->Error->Code; //echo $errorCode; if($errorCode != "AccountLimitExceeded") { if($go == 1) { $item = $xml->Items->Item[0]; if ($item && $item->ItemAttributes && $item->ItemAttributes->Title) { $title = $item->ItemAttributes->Title; } else { $title = "[no title]"; }; file_put_contents('/var/ywww/debug/phpDebug',"win: ". $title."\n", FILE_APPEND); return $output; } else { if ($xml->Items->Item) { $title = $xml->Items->Item[0]->ItemAttributes->Title; $author = $xml->Items->Item[0]->ItemAttributes->Author; $binding = $xml->Items->Item[0]->ItemAttributes->Binding; $dewey = $xml->Items->Item[0]->ItemAttributes->DeweyDecimalNumber; if($dewey == "") $dewey = "null"; $imageURL = $xml->Items->Item[0]->MediumImage->URL; $salesRank = $xml->Items->Item[0]->SalesRank; $pubDate = $xml->Items->Item[0]->ItemAttributes->PublicationDate; if (strlen($pubDate)==4) { $pubDate=$pubDate."-01-01";} if (strlen($pubDate)==7) { $pubDate=$pubDate."-01";} if (strlen($pubDate)==0) { $pubDate="null"; } else { $pubDate="\"$pubDate\""; } $publisher = $xml->Items->Item[0]->ItemAttributes->Publisher; if ($publisher and strlen($publisher)>30) { $publisher=substr($publisher,0,30); } if ($author and strlen($author)>30) { $author=substr($author,0,30); } if ($title and strlen($title)>100) { $title=substr($title,0,100); } $publisher=mysqli_real_escape_string($link,$publisher); $author=mysqli_real_escape_string($link,$author); $title=mysqli_real_escape_string($link,$title); } else { $title = $salesRank = ""; $dewey = "null"; } $genreID = ""; $genre = ""; $genArr = array(); if ($xml->Items->Item[0] && $xml->Items->Item[0]->BrowseNodes) { for($i=0;$i<sizeof($xml->Items->Item->BrowseNodes->BrowseNode);$i++){ //sexy recursive function findGenre($xml->Items->Item->BrowseNodes->BrowseNode[$i], $genreID, $genre); if($genre != "") $genArr[strval($genreID)] = strval($genre); //$genArr[$i] = array(strval($genreID) => strval($genre)); //echo $genre; //echo $genreID; $genre = ""; $genreID = ""; } } $g1 = "null"; $g2 = "null"; $g3 = "null"; $loop = 1; foreach ($genArr as $key => $value) { //echo "$key => $value"; if ($key>2047) { //HST added break; } $queryG = "CALL b_addBrowseNode($key,\"$value\")"; //add the name value pair for genre to new table //echo $queryG; include "../../private/db.php"; $resG = mysqli_query($link, $queryG); mysqli_close($link); switch ($loop) { case 1: $g1 = $key; break; case 2: $g2 = $key; break; case 3: $g3 = $key; break; } $loop++; } if($salesRank == "") $salesRank = "null"; $title = strtr($title, '"', "'"); include "../../private/db.php"; $review1 = mysqli_real_escape_string($link,$review1); if (strlen($review1)>500) { $review1=substr($review1,0,500);} $review2 = mysqli_real_escape_string($link,$review2); if (strlen($review2)>500) { $review2=substr($review2,0,500);} $review3 = mysqli_real_escape_string($link,$review3); if (strlen($review3)>500) { $review3=substr($review3,0,500);} if($title != "") { $queryInsert = "CALL b_addNewBook(\"$searchparameterdata\",\"$title\", \"$author\",\"$binding\",\"$imageURL\", $dewey, $salesRank,\"$pubDate\",\"$publisher\",$g1,$g2,$g3,$loc)"; //echo $queryInsert; $res = mysqli_query($link, $queryInsert); if (!$res) { $err=mysqli_error( $link ); mysqli_close($link); file_put_contents('/var/ywww/debug/phpDebug', "anb failed: $err, $pubDate, $g2, $publisher, $title\n", FILE_APPEND); exit($err); } $queryInsertReviews = "CALL b_insertReviews(\"$searchparameterdata\",\"$review1\",\"$review2\",\"$review3\")"; if($review1 != "" && $review2 != "" && $review3 != "") { $res = mysqli_query($link, $queryInsertReviews); if (!$res) { $err=mysqli_error( $link ); mysqli_close($link); file_put_contents('/var/ywww/debug/phpDebug', "anr failed: $err, $pubDate, $g2, $publisher, $title\n", FILE_APPEND); exit($err); } } mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop } echo $output; } } else { //look up info from db include "../../private/db.php"; $query = "CALL b_getBookInfo('$searchparameterdata', $loc)"; //echo $query; $res = mysqli_query($link, $query) or exit( mysqli_error( $link )); $output = ""; $output .= "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"; $output .= "<Details>"; if ( mysqli_num_rows( $res ) > 0 ) { $rows=mysqli_fetch_array($res, MYSQLI_ASSOC); mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop $output .= "<ASIN>" . $searchparameterdata . "</ASIN>"; $output .= "<Title>" . htmlspecialchars($rows["Title"]) . "</Title>"; $output .= "<Author>" . htmlspecialchars($rows["Author"]) . "</Author>"; $output .= "<Binding>" . htmlspecialchars($rows["Binding"]) . "</Binding>"; $output .= "<Dewey>" . htmlspecialchars($rows["DeweyDecimal"]) . "</Dewey>"; $output .= "<ImageURL>" . htmlspecialchars($rows["ImageURL"]) . "</ImageURL>"; $output .= "<SalesRank>" . htmlspecialchars($rows["SalesRank"]) . "</SalesRank>"; $output .= "<PublicationDate>" . htmlspecialchars($rows["PublicationDate"]) . "</PublicationDate>"; $output .= "<Publisher>" . htmlspecialchars($rows["Publisher"]) . "</Publisher>"; $output .= "<Genre1>" . htmlspecialchars($rows["Genre1"]) . "</Genre1>"; $output .= "<Genre2>" . htmlspecialchars($rows["Genre2"]) . "</Genre2>"; $output .= "<Genre3>" . htmlspecialchars($rows["Genre3"]) . "</Genre3>"; $output .= "<ProductGroup>Book</ProductGroup>"; $output .= "<Error>AccountLimitExceeded</Error>"; } else { mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop } $output .= "</Details>"; echo $output; } } function findGenre($browseNode, &$ID, &$gen) { if($browseNode->Name == "Subjects") { return true; } else { if($browseNode->Ancestors->BrowseNode) { if(findGenre($browseNode->Ancestors->BrowseNode, $ID, $gen) == true) { $gen = $browseNode->Name; $ID = $browseNode->BrowseNodeId; } } return false; } } if(!isset($ret)) { include "aws_signed_request.php"; getAmazonDet('default',0,'us'); //will get overwritten } /*Caught oneSimpleXMLElement Object ( [OperationRequest] => SimpleXMLElement Object ( [RequestId] => d2eaacba-2411-44e7-b268-f23a20167330 [Arguments] => SimpleXMLElement Object ( [Argument] => Array ( [0] => SimpleXMLElement Object ( [@attributes] => Array ( [Name] => AWSAccessKeyId [Value] => AKIAIHTNWC7L6LOUY4LQ ) ) [1] => SimpleXMLElement Object ( [@attributes] => Array ( [Name] => AssociateTag [Value] => bookwhack-21 ) ) [2] => SimpleXMLElement Object ( [@attributes] => Array ( [Name] => ItemId [Value] => B004Q3Q3Y4 ) ) [3] => SimpleXMLElement Object ( [@attributes] => Array ( [Name] => ItemPage [Value] => 1 ) ) [4] => SimpleXMLElement Object ( [@attributes] => Array ( [Name] => Operation [Value] => ItemLookup ) ) [5] => SimpleXMLElement Object ( [@attributes] => Array ( [Name] => ResponseGroup [Value] => ItemAttributes,Reviews,EditorialReview,OfferSummary,Offers,Images,AlternateVersions,SalesRank,BrowseNodes ) ) [6] => SimpleXMLElement Object ( [@attributes] => Array ( [Name] => ReviewSort [Value] => -HelpfulVotes ) ) [7] => SimpleXMLElement Object ( [@attributes] => Array ( [Name] => Service [Value] => AWSECommerceService ) ) [8] => SimpleXMLElement Object ( [@attributes] => Array ( [Name] => Timestamp [Value] => 2016-12-15T23:12:34Z ) ) [9] => SimpleXMLElement Object ( [@attributes] => Array ( [Name] => Version [Value] => 2011-08-01 ) ) [10] => SimpleXMLElement Object ( [@attributes] => Array ( [Name] => Signature [Value] => SUXfFZHQ74Joc+WDLx87uzemTdtHijNohykqafJXYKQ= ) ) ) ) [RequestProcessingTime] => 0.3518217620000000 ) [Items] => SimpleXMLElement Object ( [Request] => SimpleXMLElement Object ( [IsValid] => True [ItemLookupRequest] => SimpleXMLElement Object ( [IdType] => ASIN [ItemId] => B004Q3Q3Y4 [ResponseGroup] => Array ( [0] => ItemAttributes [1] => Reviews [2] => EditorialReview [3] => OfferSummary [4] => Offers [5] => Images [6] => AlternateVersions [7] => SalesRank [8] => BrowseNodes ) [VariationPage] => All ) [Errors] => SimpleXMLElement Object ( [Error] => SimpleXMLElement Object ( [Code] => AWS.InvalidParameterValue [Message] => B004Q3Q3Y4 is not a valid value for ItemId. Please change this value and retry your request. ) ) ) ) ) */ /*$xml = new SimpleXMLElement("<?xml version=\"1.0\"?><ItemLookupErrorResponse xmlns=\"http://ecs.amazonaws.com/doc/2009-03-31/\"><Error><Code>AccountLimitExceeded</Code><Message>Account limit of 2056 requests per hour exceeded.</Message></Error><RequestID>290ed059-730c-4789-93b4-6d21e11053d3</RequestID></ItemLookupErrorResponse>");*/ ?>