comparison xml/getAmazonInfo.php @ 32:8130865e9e82

refactor to try to improve error handling
author Charlie Root
date Fri, 04 Jan 2019 09:50:55 -0500
parents 4124f103b46b
children c9d9b76ecbf9
comparison
equal deleted inserted replaced
31:4124f103b46b 32:8130865e9e82
1 <?php 1 <?php
2 2
3 $lastReqTime=0;
4 function getAmazonDet($isbn,$go,$localeIn) 3 function getAmazonDet($isbn,$go,$localeIn)
5 { 4 {
5 $Adefault=array(
6 'language' =>'en', //what language to render the page in
7 'locale' =>$localeIn, //which server's products? available: ca,de,fr,jp,uk,us
8 //'mode' =>'books', //what product category?
9 'page' =>1, //first page to show (we are counting from 1 not 0)
10 //'search' =>'Machiavelli', //what to search for?
11 'operation' =>'ItemLookup', //what to do? //ItemSearch
12 // 'searchindex' =>'Books', //what product category for search?
13 'searchparameter' =>'ItemId', //what kind of search?
14 'searchparameterdata'=>$isbn, //what to search for?
15 //here some debugging flags you can put at the end of the URL to call this script with, like: '?show_array=true'
16 'show_array' =>false, //debug: show complete incoming array? You can use this to see what other information Amazon is sending
17 'show_url' =>false, //debug: show XML request url to be send to Amazon?
18 'show_xml' =>false, //debug: show incoming XML code from Amazon?
19 );
20 //change the debug options to true if you want to activate them or call the script with '?show_array=true' to see what actual information you're getting from Amazon and how little my standard script is actually showing of it
21 //for all parameters see if the user has overruled it or use the default
22 foreach ($Adefault as $i=>$d) {
23 $$i=isset($_GET[$i])?$_GET[$i]:$d;
24 }
25 switch($locale){
26 case "us":
27 $loc = 0;
28 break;
29 case "uk":
30 $loc = 1;
31 break;
32 case "ca":
33 $loc = 2;
34 break;
35 case "de":
36 $loc = 3;
37 break;
38 case "fr":
39 $loc = 4;
40 break;
41 };
42
43 for ($i=1; $i<=3; $i++) {
44 try {
45 return getAmazonDetRemote($go,$language,$locale,$loc,
46 $page,$operation,$searchparameter,
47 $searchparameterdata,
48 $show_array,$show_url,$show_xml);
49 }
50 catch (Exception $code) {
51 file_put_contents('/var/ywww/debug/phpDebug',
52 "Bang: $code $i\n",FILE_APPEND);
53 if ($code=='RequestThrottled') {
54 usleep(200000); // Try to reduce throttling until we get a
55 // principled solution in place
56 $bail=False;
57 }
58 else {
59 $bail=True;
60 }
61 }
62 if ($bail) { break; }
63 }
64 return getAmazonDetLocal($go,$language,$loc,
65 $page,$operation,$searchparameter,
66 $searchparameterdata,
67 $show_array,$show_url,$show_xml);
68 }
69
70 function getAmazonDetRemote($go,$language,$locale,$loc,
71 $page,$operation,$searchparameter,
72 $searchparameterdata,
73 $show_array,$show_url,$show_xml)
74 {
6 global $output; 75 global $output;
7 $Adefault=array( 76
8 'language' =>'en', //what language to render the page in 77 $Aassociates_id=array(
9 'locale' =>$localeIn, //which server's products? available: ca,de,fr,jp,uk,us 78 'uk' => 'bookwhack-21',
10 //'mode' =>'books', //what product category? 79 'us' => 'your02b-20',
11 'page' =>1, //first page to show (we are counting from 1 not 0) 80 'ca' => 'book009-20',
12 //'search' =>'Machiavelli', //what to search for? 81 'de' => 'book04c-21',
13 'operation' =>'ItemLookup', //what to do? //ItemSearch 82 'fr' => 'book07f-21',
14 // 'searchindex' =>'Books', //what product category for search? 83 );
15 'searchparameter' =>'ItemId', //what kind of search? 84
16 'searchparameterdata'=>$isbn, //what to search for? 85 $Aserver=array(
17 //here some debugging flags you can put at the end of the URL to call this script with, like: '?show_array=true' 86 'ca' => array(
18 'show_array' =>false, //debug: show complete incoming array? You can use this to see what other information Amazon is sending 87 'ext' => 'ca' , //Canadian normal server
19 'show_url' =>false, //debug: show XML request url to be send to Amazon? 88 'nor' => 'http://www.amazon.ca' , //Canadian normal server
20 'show_xml' =>false, //debug: show incoming XML code from Amazon? 89 'xml' => 'http://xml.amazon.com' , //Canadian xml server
21 ); 90 ),
22 //change the debug options to true if you want to activate them or call the script with '?show_array=true' to see what actual information you're getting from Amazon and how little my standard script is actually showing of it 91 'de' => array(
23 92 'ext' => 'de' , //German normal server
24 $Aassociates_id=array( 93 'nor' => 'http://www.amazon.de' , //German normal server
25 'uk' => 'bookwhack-21', 94 'xml' => 'http://xml-eu.amazon.com', //German xml server
26 'us' => 'your02b-20', 95 ),
27 'ca' => 'book009-20', 96 'fr' => array(
28 'de' => 'book04c-21', 97 'ext' => 'fr' , //French normal server
29 'fr' => 'book07f-21', 98 'nor' => 'http://www.amazon.fr' , //French normal server
30 ); 99 'xml' => 'http://xml-eu.amazon.com', //French xml server
31 100 ),
32 $Aserver=array( 101 'jp' => array(
33 'ca' => array( 102 'ext' => 'jp' , //Japanese normal server, not co.jp!
34 'ext' => 'ca' , //Canadian normal server 103 'nor' => 'http://www.amazon.co.jp' , //Japanese normal server
35 'nor' => 'http://www.amazon.ca' , //Canadian normal server 104 'xml' => 'http://xml.amazon.com' , //Japanese xml server
36 'xml' => 'http://xml.amazon.com' , //Canadian xml server 105 ),
37 ), 106 'uk' => array(
38 'de' => array( 107 'ext' => 'co.uk' , //UK normal server
39 'ext' => 'de' , //German normal server 108 'nor' => 'http://www.amazon.co.uk' , //UK normal server
40 'nor' => 'http://www.amazon.de' , //German normal server 109 'xml' => 'http://xml-eu.amazon.com', //UK xml server
41 'xml' => 'http://xml-eu.amazon.com', //German xml server 110 ),
42 ), 111 'us' => array(
43 'fr' => array( 112 'ext' => 'com' , //USA normal server
44 'ext' => 'fr' , //French normal server 113 'nor' => 'http://www.amazon.com' , //USA normal server
45 'nor' => 'http://www.amazon.fr' , //French normal server 114 'xml' => 'http://xml.amazon.com' , //USA xml server
46 'xml' => 'http://xml-eu.amazon.com', //French xml server 115 ),
47 ), 116 );
48 'jp' => array( 117
49 'ext' => 'jp' , //Japanese normal server, not co.jp! 118 //if(go != 1)
50 'nor' => 'http://www.amazon.co.jp' , //Japanese normal server 119 //include "aws_signed_request.php";
51 'xml' => 'http://xml.amazon.com' , //Japanese xml server 120 $public_key ="AKIAIHTNWC7L6LOUY4LQ";
52 ), 121 $private_key="zWQlIzndJDtXNfxEXH7K7YR7hzv3u77lOcqfqPde";
53 'uk' => array( 122 //this is the data that is used to form the request for AWS
54 'ext' => 'co.uk' , //UK normal server 123 //this is the part that is search specific
55 'nor' => 'http://www.amazon.co.uk' , //UK normal server 124 $parameters=array(
56 'xml' => 'http://xml-eu.amazon.com', //UK xml server 125 'Operation' =>$operation ,
57 ), 126 //'Keywords' =>urlencode($search) ,
58 'us' => array( 127 //'SearchIndex' =>$searchindex , //Books for example.
59 'ext' => 'com' , //USA normal server 128 "$searchparameter"=>$searchparameterdata ,
60 'nor' => 'http://www.amazon.com' , //USA normal server 129 'ItemPage' =>$page , //which page?
61 'xml' => 'http://xml.amazon.com' , //USA xml server 130 'AssociateTag' =>$Aassociates_id[$locale],
62 ), 131 'ResponseGroup' =>'ItemAttributes,Reviews,EditorialReview,OfferSummary,Offers,Images,AlternateVersions,SalesRank,BrowseNodes' , //Small, Medium, Large or SellerListing,'BrowseNodes',// ,
63 ); 132 'ReviewSort' =>'-HelpfulVotes'
64 133 );
65 //if(go != 1) 134
66 //include "aws_signed_request.php"; 135 $requestURI = $_SERVER['REQUEST_URI'];
67 $public_key ="AKIAIHTNWC7L6LOUY4LQ"; 136 $requestIP = $_SERVER['REMOTE_ADDR'];
68 $private_key="zWQlIzndJDtXNfxEXH7K7YR7hzv3u77lOcqfqPde"; 137 // if ($requestIP=="173.161.113.65" || $requestIP=="141.8.132.25") {
69 //for all parameters see if the user has overruled it or use the default 138 // $delay=60;
70 foreach ($Adefault as $i=>$d) { 139 // file_put_contents('/var/ywww/debug/phpDebug',
71 $$i=isset($_GET[$i])?$_GET[$i]:$d; 140 // "bad guy: $requestIP, $requestURI\n",
141 // FILE_APPEND);
142 // sleep($delay);
143 // # No, can't do this
144 // # return; # bomb!
145 // # 'Kung', sitting on my desk in the office while I'm at home,
146 // # is occasionally hitting xml/getAmazonInfo.php:
147 // # e.g. Losing: ItemLookupErrorResponse, RequestThrottled, 129.215.197.36, /xml/getAmazonInfo.php?searchparameterdata=075154454X&locale=uk
148 // # repeatedly, same params
149 // }
150 $ext=$Aserver[$locale]['ext'];
151 $file_data=$ext;
152 ksort($parameters);
153 foreach ($parameters as $i=>$d) {
154 $file_data.='&'.$i.'='.$d;
155 }
156 $gotit=0;
157 $url=aws_signed_request($ext,$parameters,$public_key,$private_key);
158 $crl = curl_init();
159 $timeout = 5;
160 curl_setopt ($crl, CURLOPT_URL,$url);
161 curl_setopt ($crl, CURLOPT_ENCODING , "gzip");
162 curl_setopt ($crl, CURLOPT_RETURNTRANSFER, 1);
163 curl_setopt ($crl, CURLOPT_CONNECTTIMEOUT, $timeout);
164 $semaphore = new SyncSemaphore("Amazon");
165 $gotit = $semaphore->lock(1000);
166 if (!$gotit) {
167 file_put_contents('/var/ywww/debug/phpDebug',
168 "Got it: ".(int)$gotit." $requestIP 1\n",FILE_APPEND);
169 $gotit=$semaphore->lock(1000);
170 file_put_contents('/var/ywww/debug/phpDebug',
171 "Got it: ".(int)$gotit." $requestIP 1a\n",FILE_APPEND);
172 }
173 $output = curl_exec($crl);
174 curl_close($crl);
175 usleep(500000);
176 if ($gotit) {
177 $semaphore->unlock();
178 }
179 else {
180 file_put_contents('/var/ywww/debug/phpDebug',
181 "W/o lock for $requestIP 1b\n",FILE_APPEND);
182 }
183 $review = "";
184 $review1 = "";
185 $review2 = "";
186 $review3 = "";
187 // HST added this
188 $mm=array();
189 if (preg_match("/<Error>/",$output,$mm)) {
190 $xml = new SimpleXMLElement($output);
191 $resName=$xml->getName();
192 $code=$xml->Error->Code;
193 if (!$code) {
194 $code=$xml->Items->Request->Errors->Error->Code;
195 }
196 file_put_contents('/var/ywww/debug/phpDebug',"Losing: ".$mm[0].", $resName, $code, $requestIP, $requestURI\n",FILE_APPEND);
197 if ($code!='RequestThrottled') {
198 file_put_contents('/var/ywww/debug/phpDebug',"parms: $go\nurl: $url\n".
199 print_r($parameters,TRUE)."\n",FILE_APPEND);
200 if ($code=="") {
201 file_put_contents('/var/ywww/debug/phpDebug',"error elt:\n$output\n",FILE_APPEND);
202 }
203 }
204 throw new Exception($code);
205 }
206 else {
207 $xml = new SimpleXMLElement($output);
208 set_error_handler(function () {
209 global $output;
210 file_put_contents('/var/ywww/debug/phpDebug',
211 "Caught one?: ".$searchparameterdata,
212 FILE_APPEND);
213 file_put_contents('/var/ywww/debug/phpDebug',
214 print_r($output, TRUE)."\n",
215 FILE_APPEND);
216 } );
217 $review = $xml->Items->Item->CustomerReviews->IFrameURL;
218 // The above is failing repeatedly --
219 //PHP Notice: Trying to get property of non-object in
220 // /var/ywww/xml/getAmazonInfo.php on line [109]
221 // See the dumped structure at the end of this file for the
222 // cause
223 restore_error_handler();
224 //echo $review;
225 }
226 if ($review != "")
227 {
228 $text = @file_get_contents($review . "&truncate=300");
229 $removeTop1 = preg_replace('~<div class="crIFrameHeaderLeftColumn">(.*?)<div class="crIFrameHeaderHistogram">~si', '', $text);
230 $removeTop2 = preg_replace('~<div class="crIFrameHeaderHistogram">(.*?)<div class="crIframeReviewList">~si', '', $text);
231 if (preg_match('~<body[^>]*>(.*?)</body>~si', $removeTop2, $body)){ $getBody = trim($body[1]); } else { $getBody = $removeTop2;}
232 $removeDiv = preg_replace('~<div[^>]*>(.*?)</div>~si', '', $getBody);
233 $removeCloseDivs = preg_replace('/<\/div>/','', $removeDiv);
234 $setBoundary = str_replace('<!-- BOUNDARY -->','BOTTOM-TOP', $removeCloseDivs);
235 //replace <!-- BOUNDARY --> with BOTTOM-TOP
236 $remove1 = '~<table cellpadding="0"(.*?)%">~s';
237 $setBoundary = preg_replace($remove1,'', $setBoundary);
238 $remove2 = '~</td><td bg(.*?)</table>~s';
239 $setBoundary = preg_replace($remove2,'', $setBoundary);
240 $remove3 = '~<a name=(.*?)</a>~s';
241 $setBoundary = preg_replace($remove3,'', $setBoundary);
242 $setBoundary2 = str_replace('<br />','', $setBoundary);
243 //remove all extra crap;
244 $setBoundary3 = str_replace('</td>','BOTTOM', $setBoundary2);
245 //replace </td> with BOTTOM
246
247 if (preg_match_all('~TOP(.*?)BOTTOM~s', $setBoundary3, $reviews))
248 {
249 $reviewContents = $reviews[1];
250 //print_r($reviewContents);
251 $review1 = trim($reviewContents[0]);
252 $review1 = str_replace("\n", "", $review1);
253 $review1 = str_replace("\r", "", $review1);
254 if (isset($reviewContents[1])) {
255 $review2 = trim($reviewContents[1]);
256 $review2 = str_replace("\n", "", $review2);
257 $review2 = str_replace("\r", "", $review2);
258 }
259 else {
260 $review2 = "";
261 }
262 if (isset($reviewContents[2])) {
263 $review3 = trim($reviewContents[2]);
264 $review3 = str_replace("\n", "", $review3);
265 $review3 = str_replace("\r", "", $review3);
266 }
267 else {
268 $review3 = "";
269 }
270 }
271 else
272 {
273 $review1 = "";
274 $review2 = "";
275 $review3 = "";
276 //echo "EPIC FAIL";
277 }
278
279 unset($xml->Items->Item->CustomerReviews);
280 $xdoc = new DomDocument;
281 $xdoc->loadXML($xml->asXML());
282
283 $cReviews = $xdoc ->createElement('CustomerReviews');
284 $cReviewHolder = $xdoc ->createElement('Review');
285 $cReview = $xdoc ->createElement('Content');
286 $cReviewHolder2 = $xdoc ->createElement('Review');
287 $cReview2 = $xdoc ->createElement('Content');
288 $cReviewHolder3 = $xdoc ->createElement('Review');
289 $cReview3 = $xdoc ->createElement('Content');
290
291 $txtNode = $xdoc ->createTextNode ($review1);
292 $cReview -> appendChild($txtNode);
293
294 $txtNode2 = $xdoc ->createTextNode ($review2);
295 $cReview2 -> appendChild($txtNode2);
296
297 $txtNode3 = $xdoc ->createTextNode ($review3);
298 $cReview3 -> appendChild($txtNode3);
299
300 $cReviewHolder -> appendChild($cReview);
301 $cReviewHolder2 -> appendChild($cReview2);
302 $cReviewHolder3 -> appendChild($cReview3);
303
304 $cReviews -> appendChild($cReviewHolder);
305 $cReviews -> appendChild($cReviewHolder2);
306 $cReviews -> appendChild($cReviewHolder3);
307
308 $xdoc->documentElement->childNodes->item(1)->childNodes->item(1)->appendChild($cReviews);
309 $newXML = simplexml_import_dom($xdoc);
310 $output = $newXML->asXml();
311 }
312
313 if($go == 1) {
314 $item = $xml->Items->Item[0];
315 if ($item && $item->ItemAttributes && $item->ItemAttributes->Title) {
316 $title = $item->ItemAttributes->Title; }
317 else { $title = "[no title]"; };
318 file_put_contents('/var/ywww/debug/phpDebug',"win: |$errorCode| ".
319 $title."\n",
320 FILE_APPEND);
321 return $output;
322 }
323 else
324 {
325 if ($xml->Items->Item) {
326 $title = $xml->Items->Item[0]->ItemAttributes->Title;
327 $author = $xml->Items->Item[0]->ItemAttributes->Author;
328 $binding = $xml->Items->Item[0]->ItemAttributes->Binding;
329 $dewey = $xml->Items->Item[0]->ItemAttributes->DeweyDecimalNumber;
330 if($dewey == "")
331 $dewey = "null";
332 $imageURL = $xml->Items->Item[0]->MediumImage->URL;
333 $salesRank = $xml->Items->Item[0]->SalesRank;
334 $pubDate = $xml->Items->Item[0]->ItemAttributes->PublicationDate;
335 if (strlen($pubDate)==4) { $pubDate=$pubDate."-01-01";}
336 if (strlen($pubDate)==7) { $pubDate=$pubDate."-01";}
337 if (strlen($pubDate)==0) {
338 $pubDate="null";
339 }
340 else {
341 $pubDate="\"$pubDate\"";
342 }
343 $publisher = $xml->Items->Item[0]->ItemAttributes->Publisher;
344 }
345 else {
346 $title = $salesRank = "";
347 $dewey = "null";
348 }
349
350 $genreID = "";
351 $genre = "";
352 $genArr = array();
353
354 if ($xml->Items->Item[0] && $xml->Items->Item[0]->BrowseNodes) {
355 for($i=0;$i<sizeof($xml->Items->Item->BrowseNodes->BrowseNode);$i++){
356 //sexy recursive function
357 findGenre($xml->Items->Item->BrowseNodes->BrowseNode[$i], $genreID, $genre);
358
359 if($genre != "")
360 $genArr[strval($genreID)] = strval($genre);
361 //$genArr[$i] = array(strval($genreID) => strval($genre));
362
363 //echo $genre;
364 //echo $genreID;
365
366 $genre = "";
367 $genreID = "";
368 }
369 }
370
371 $g1 = "null";
372 $g2 = "null";
373 $g3 = "null";
374 $loop = 1;
375
376 foreach ($genArr as $key => $value) {
377 //echo "$key => $value";
378 if ($key>2047) {
379 //HST added
380 break;
381 }
382 $queryG = "CALL b_addBrowseNode($key,\"$value\")"; //add the name value pair for genre to new table
383 //echo $queryG;
384 include "../../private/db.php";
385 $resG = mysqli_query($link, $queryG);
386 mysqli_close($link);
387
388 switch ($loop) {
389 case 1:
390 $g1 = $key;
391 break;
392 case 2:
393 $g2 = $key;
394 break;
395 case 3:
396 $g3 = $key;
397 break;
398 }
399
400 $loop++;
401 }
402
403 if($salesRank == "")
404 $salesRank = "null";
405
406 if($title != "")
407 {
408 include "../../private/db.php";
409 if ($publisher->count()==0) {
410 $publisher="null";
411 }
412 else {
413 $publisher=mysqli_real_escape_string($link,$publisher);
414 if (strlen($publisher)>30) {
415 $publisher=rtrim(substr($publisher,0,30),"\\");
416 }
417 $publisher="\"".$publisher."\"";
418 }
419 if ($author->count()==0) {
420 $author="unknown";
421 }
422 else {
423 $author=mysqli_real_escape_string($link,$author);
424 if (strlen($author)>30) {
425 $author=rtrim(substr($author,0,30),"\\");
426 }
427 }
428 $author="\"".$author."\"";
429 $title=mysqli_real_escape_string($link,$title);
430 if (strlen($title)>100) {
431 $title=rtrim(substr($title,0,100),"\\");
432 }
433 $title="\"".$title."\"";
434 $review1 = mysqli_real_escape_string($link,$review1);
435 if (strlen($review1)>500) { $review1=rtrim(substr($review1,0,500),"\\");}
436 $review2 = mysqli_real_escape_string($link,$review2);
437 if (strlen($review2)>500) { $review2=rtrim(substr($review2,0,500),"\\");}
438 $review3 = mysqli_real_escape_string($link,$review3);
439 if (strlen($review3)>500) { $review3=rtrim(substr($review3,0,500),"\\");}
440
441 $queryInsert = "CALL b_addNewBook(\"$searchparameterdata\",$title, $author,\"$binding\",\"$imageURL\", $dewey, $salesRank,$pubDate,$publisher,$g1,$g2,$g3,$loc)";
442 //echo $queryInsert;
443
444 $res = mysqli_query($link, $queryInsert);
445 if (!$res) {
446 $err=mysqli_error( $link );
447 mysqli_close($link);
448 file_put_contents('/var/ywww/debug/phpDebug',
449 "anb failed: $queryInsert\n$err\n",
450 FILE_APPEND);
451 exit($err);
452 }
453 $queryInsertReviews = "CALL b_insertReviews(\"$searchparameterdata\",\"$review1\",\"$review2\",\"$review3\")";
454 if($review1 != "") {
455 $res = mysqli_query($link, $queryInsertReviews);
456 if (!$res) {
457 $err=mysqli_error( $link );
458 mysqli_close($link);
459 file_put_contents('/var/ywww/debug/phpDebug',
460 "anr failed: $queryInsertReviews\n",
461 FILE_APPEND);
462 exit($err);
463 }
464 }
465 mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop
466 }
467
468 echo $output;
469 }
72 } 470 }
73 //this is the data that is used to form the request for AWS 471
74 //this is the part that is search specific 472 function getAmazonDetLocal($go,$language,$loc,
75 $parameters=array( 473 $page,$operation,$searchparameter,
76 'Operation' =>$operation , 474 $searchparameterdata,
77 //'Keywords' =>urlencode($search) , 475 $show_array,$show_url,$show_xml)
78 //'SearchIndex' =>$searchindex , //Books for example. 476 {
79 "$searchparameter"=>$searchparameterdata , 477 global $output;
80 'ItemPage' =>$page , //which page? 478 //look up info from db
81 'AssociateTag' =>$Aassociates_id[$locale], 479 include "../../private/db.php";
82 'ResponseGroup' =>'ItemAttributes,Reviews,EditorialReview,OfferSummary,Offers,Images,AlternateVersions,SalesRank,BrowseNodes' , //Small, Medium, Large or SellerListing,'BrowseNodes',// , 480 $query = "CALL b_getBookInfo('$searchparameterdata', $loc)";
83 'ReviewSort' =>'-HelpfulVotes' 481 //echo $query;
84 ); 482 $res = mysqli_query($link, $query) or exit( mysqli_error( $link ));
85
86 $requestURI = $_SERVER['REQUEST_URI'];
87 $requestIP = $_SERVER['REMOTE_ADDR'];
88 // if ($requestIP=="173.161.113.65" || $requestIP=="141.8.132.25") {
89 // $delay=60;
90 // file_put_contents('/var/ywww/debug/phpDebug',
91 // "bad guy: $requestIP, $requestURI\n",
92 // FILE_APPEND);
93 // sleep($delay);
94 // # No, can't do this
95 // # return; # bomb!
96 // # 'Kung', sitting on my desk in the office while I'm at home,
97 // # is occasionally hitting xml/getAmazonInfo.php:
98 // # e.g. Losing: ItemLookupErrorResponse, RequestThrottled, 129.215.197.36, /xml/getAmazonInfo.php?searchparameterdata=075154454X&locale=uk
99 // # repeatedly, same params
100 // }
101 $ext=$Aserver[$locale]['ext'];
102 $file_data=$ext;
103 ksort($parameters);
104 foreach ($parameters as $i=>$d) {
105 $file_data.='&'.$i.'='.$d;
106 }
107 $gotit=0;
108 $url=aws_signed_request($ext,$parameters,$public_key,$private_key);
109 $crl = curl_init();
110 $timeout = 5;
111 curl_setopt ($crl, CURLOPT_URL,$url);
112 curl_setopt ($crl, CURLOPT_ENCODING , "gzip");
113 curl_setopt ($crl, CURLOPT_RETURNTRANSFER, 1);
114 curl_setopt ($crl, CURLOPT_CONNECTTIMEOUT, $timeout);
115 $semaphore = new SyncSemaphore("Amazon");
116 $gotit = $semaphore->lock(1000);
117 if (!$gotit) {
118 file_put_contents('/var/ywww/debug/phpDebug',
119 "Got it: ".(int)$gotit." $requestIP 1\n",FILE_APPEND);
120 $gotit=$semaphore->lock(1000);
121 file_put_contents('/var/ywww/debug/phpDebug',
122 "Got it: ".(int)$gotit." $requestIP 1a\n",FILE_APPEND);
123 }
124 $output = curl_exec($crl);
125 curl_close($crl);
126 usleep(500000);
127 if ($gotit) {
128 $semaphore->unlock();
129 }
130 else {
131 file_put_contents('/var/ywww/debug/phpDebug',
132 "W/o lock for $requestIP 1b\n",FILE_APPEND);
133 }
134 $review = "";
135 $review1 = "";
136 $review2 = "";
137 $review3 = "";
138 // HST added this
139 $mm=array();
140 if (preg_match("/<Error>/",$output,$mm)) {
141 $xml = new SimpleXMLElement($output);
142 $resName=$xml->getName();
143 $code=$xml->Error->Code;
144 if (!$code) {
145 $code=$xml->Items->Request->Errors->Error->Code;
146 }
147 $errorCode=$code;
148 file_put_contents('/var/ywww/debug/phpDebug',"Losing: ".$mm[0].", $resName, $code, $requestIP, $requestURI\n",FILE_APPEND);
149 if ($code=='RequestThrottled') {
150 usleep(200000); // Try to reduce throttling until we get a
151 // principled solution in place
152 }
153 else {
154 file_put_contents('/var/ywww/debug/phpDebug',"parms: $go\nurl: $url\n".
155 print_r($parameters,TRUE)."\n",FILE_APPEND);
156 if ($code=="") {
157 file_put_contents('/var/ywww/debug/phpDebug',"error elt:\n$output\n",FILE_APPEND);
158 }
159 }
160 }
161 else {
162 $xml = new SimpleXMLElement($output);
163 set_error_handler(function () {
164 global $output;
165 file_put_contents('/var/ywww/debug/phpDebug',
166 "Caught one?: ".$searchparameterdata,
167 FILE_APPEND);
168 file_put_contents('/var/ywww/debug/phpDebug',
169 print_r($output, TRUE)."\n",
170 FILE_APPEND);
171 } );
172 $review = $xml->Items->Item->CustomerReviews->IFrameURL;
173 // The above is failing repeatedly --
174 //PHP Notice: Trying to get property of non-object in
175 // /var/ywww/xml/getAmazonInfo.php on line [109]
176 // See the dumped structure at the end of this file for the
177 // cause
178 restore_error_handler();
179 //echo $review;
180 }
181 if ($review != "")
182 {
183 $text = @file_get_contents($review . "&truncate=300");
184 $removeTop1 = preg_replace('~<div class="crIFrameHeaderLeftColumn">(.*?)<div class="crIFrameHeaderHistogram">~si', '', $text);
185 $removeTop2 = preg_replace('~<div class="crIFrameHeaderHistogram">(.*?)<div class="crIframeReviewList">~si', '', $text);
186 if (preg_match('~<body[^>]*>(.*?)</body>~si', $removeTop2, $body)){ $getBody = trim($body[1]); } else { $getBody = $removeTop2;}
187 $removeDiv = preg_replace('~<div[^>]*>(.*?)</div>~si', '', $getBody);
188 $removeCloseDivs = preg_replace('/<\/div>/','', $removeDiv);
189 $setBoundary = str_replace('<!-- BOUNDARY -->','BOTTOM-TOP', $removeCloseDivs);
190 //replace <!-- BOUNDARY --> with BOTTOM-TOP
191 $remove1 = '~<table cellpadding="0"(.*?)%">~s';
192 $setBoundary = preg_replace($remove1,'', $setBoundary);
193 $remove2 = '~</td><td bg(.*?)</table>~s';
194 $setBoundary = preg_replace($remove2,'', $setBoundary);
195 $remove3 = '~<a name=(.*?)</a>~s';
196 $setBoundary = preg_replace($remove3,'', $setBoundary);
197 $setBoundary2 = str_replace('<br />','', $setBoundary);
198 //remove all extra crap;
199 $setBoundary3 = str_replace('</td>','BOTTOM', $setBoundary2);
200 //replace </td> with BOTTOM
201
202 if (preg_match_all('~TOP(.*?)BOTTOM~s', $setBoundary3, $reviews))
203 {
204 $reviewContents = $reviews[1];
205 //print_r($reviewContents);
206 $review1 = trim($reviewContents[0]);
207 $review1 = str_replace("\n", "", $review1);
208 $review1 = str_replace("\r", "", $review1);
209 if (isset($reviewContents[1])) {
210 $review2 = trim($reviewContents[1]);
211 $review2 = str_replace("\n", "", $review2);
212 $review2 = str_replace("\r", "", $review2);
213 }
214 else {
215 $review2 = "";
216 }
217 if (isset($reviewContents[2])) {
218 $review3 = trim($reviewContents[2]);
219 $review3 = str_replace("\n", "", $review3);
220 $review3 = str_replace("\r", "", $review3);
221 }
222 else {
223 $review3 = "";
224 }
225 }
226 else
227 {
228 $review1 = "";
229 $review2 = "";
230 $review3 = "";
231 //echo "EPIC FAIL";
232 }
233
234 unset($xml->Items->Item->CustomerReviews);
235 $xdoc = new DomDocument;
236 $xdoc->loadXML($xml->asXML());
237
238 $cReviews = $xdoc ->createElement('CustomerReviews');
239 $cReviewHolder = $xdoc ->createElement('Review');
240 $cReview = $xdoc ->createElement('Content');
241 $cReviewHolder2 = $xdoc ->createElement('Review');
242 $cReview2 = $xdoc ->createElement('Content');
243 $cReviewHolder3 = $xdoc ->createElement('Review');
244 $cReview3 = $xdoc ->createElement('Content');
245
246 $txtNode = $xdoc ->createTextNode ($review1);
247 $cReview -> appendChild($txtNode);
248
249 $txtNode2 = $xdoc ->createTextNode ($review2);
250 $cReview2 -> appendChild($txtNode2);
251
252 $txtNode3 = $xdoc ->createTextNode ($review3);
253 $cReview3 -> appendChild($txtNode3);
254
255 $cReviewHolder -> appendChild($cReview);
256 $cReviewHolder2 -> appendChild($cReview2);
257 $cReviewHolder3 -> appendChild($cReview3);
258
259 $cReviews -> appendChild($cReviewHolder);
260 $cReviews -> appendChild($cReviewHolder2);
261 $cReviews -> appendChild($cReviewHolder3);
262
263 $xdoc->documentElement->childNodes->item(1)->childNodes->item(1)->appendChild($cReviews);
264 $newXML = simplexml_import_dom($xdoc);
265 $output = $newXML->asXml();
266 }
267 483
268 switch($locale){ 484 $output = "";
269 case "us": 485 $output .= "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
270 $loc = 0; 486 $output .= "<Details>";
271 break; 487 if ( mysqli_num_rows( $res ) > 0 )
272 case "uk": 488 {
273 $loc = 1; 489 $rows=mysqli_fetch_array($res, MYSQLI_ASSOC);
274 break; 490 mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop
275 case "ca": 491 $output .= "<ASIN>" . $searchparameterdata . "</ASIN>";
276 $loc = 2; 492 $output .= "<Title>" . htmlspecialchars($rows["Title"]) . "</Title>";
277 break; 493 $output .= "<Author>" . htmlspecialchars($rows["Author"]) . "</Author>";
278 case "de": 494 $output .= "<Binding>" . htmlspecialchars($rows["Binding"]) . "</Binding>";
279 $loc = 3; 495 $output .= "<Dewey>" . htmlspecialchars($rows["DeweyDecimal"]) . "</Dewey>";
280 break; 496 $output .= "<ImageURL>" . htmlspecialchars($rows["ImageURL"]) . "</ImageURL>";
281 case "fr": 497 $output .= "<SalesRank>" . htmlspecialchars($rows["SalesRank"]) . "</SalesRank>";
282 $loc = 4; 498 $output .= "<PublicationDate>" . htmlspecialchars($rows["PublicationDate"]) . "</PublicationDate>";
283 break; 499 $output .= "<Publisher>" . htmlspecialchars($rows["Publisher"]) . "</Publisher>";
284 }; 500 $output .= "<Genre1>" . htmlspecialchars($rows["Genre1"]) . "</Genre1>";
285 501 $output .= "<Genre2>" . htmlspecialchars($rows["Genre2"]) . "</Genre2>";
286 $errorCode = $xml->Error->Code; 502 $output .= "<Genre3>" . htmlspecialchars($rows["Genre3"]) . "</Genre3>";
287 //echo $errorCode; 503 $output .= "<ProductGroup>Book</ProductGroup>";
288 504 $output .= "<Error>AccountLimitExceeded</Error>";
289 if($errorCode != "AccountLimitExceeded")
290 {
291 if($go == 1) {
292 $item = $xml->Items->Item[0];
293 if ($item && $item->ItemAttributes && $item->ItemAttributes->Title) {
294 $title = $item->ItemAttributes->Title; }
295 else { $title = "[no title]"; };
296 file_put_contents('/var/ywww/debug/phpDebug',"win: |$errorCode| ".
297 $title."\n",
298 FILE_APPEND);
299 return $output;
300 }
301 else
302 {
303 if ($xml->Items->Item) {
304 $title = $xml->Items->Item[0]->ItemAttributes->Title;
305 $author = $xml->Items->Item[0]->ItemAttributes->Author;
306 $binding = $xml->Items->Item[0]->ItemAttributes->Binding;
307 $dewey = $xml->Items->Item[0]->ItemAttributes->DeweyDecimalNumber;
308 if($dewey == "")
309 $dewey = "null";
310 $imageURL = $xml->Items->Item[0]->MediumImage->URL;
311 $salesRank = $xml->Items->Item[0]->SalesRank;
312 $pubDate = $xml->Items->Item[0]->ItemAttributes->PublicationDate;
313 if (strlen($pubDate)==4) { $pubDate=$pubDate."-01-01";}
314 if (strlen($pubDate)==7) { $pubDate=$pubDate."-01";}
315 if (strlen($pubDate)==0) {
316 $pubDate="null";
317 }
318 else {
319 $pubDate="\"$pubDate\"";
320 }
321 $publisher = $xml->Items->Item[0]->ItemAttributes->Publisher;
322 }
323 else {
324 $title = $salesRank = "";
325 $dewey = "null";
326 }
327 505
328 $genreID = ""; 506 }
329 $genre = ""; 507 else {
330 $genArr = array(); 508 mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop
331 509 }
332 if ($xml->Items->Item[0] && $xml->Items->Item[0]->BrowseNodes) { 510 $output .= "</Details>";
333 for($i=0;$i<sizeof($xml->Items->Item->BrowseNodes->BrowseNode);$i++){ 511 echo $output;
334 //sexy recursive function
335 findGenre($xml->Items->Item->BrowseNodes->BrowseNode[$i], $genreID, $genre);
336
337 if($genre != "")
338 $genArr[strval($genreID)] = strval($genre);
339 //$genArr[$i] = array(strval($genreID) => strval($genre));
340
341 //echo $genre;
342 //echo $genreID;
343
344 $genre = "";
345 $genreID = "";
346 }
347 }
348
349 $g1 = "null";
350 $g2 = "null";
351 $g3 = "null";
352 $loop = 1;
353
354 foreach ($genArr as $key => $value) {
355 //echo "$key => $value";
356 if ($key>2047) {
357 //HST added
358 break;
359 }
360 $queryG = "CALL b_addBrowseNode($key,\"$value\")"; //add the name value pair for genre to new table
361 //echo $queryG;
362 include "../../private/db.php";
363 $resG = mysqli_query($link, $queryG);
364 mysqli_close($link);
365
366 switch ($loop) {
367 case 1:
368 $g1 = $key;
369 break;
370 case 2:
371 $g2 = $key;
372 break;
373 case 3:
374 $g3 = $key;
375 break;
376 }
377
378 $loop++;
379 }
380
381 if($salesRank == "")
382 $salesRank = "null";
383
384 if($title != "")
385 {
386 include "../../private/db.php";
387 if ($publisher->count()==0) {
388 $publisher="null";
389 }
390 else {
391 $publisher=mysqli_real_escape_string($link,$publisher);
392 if (strlen($publisher)>30) {
393 $publisher=rtrim(substr($publisher,0,30),"\\");
394 }
395 $publisher="\"".$publisher."\"";
396 }
397 if ($author->count()==0) {
398 $author="unknown";
399 }
400 else {
401 $author=mysqli_real_escape_string($link,$author);
402 if (strlen($author)>30) {
403 $author=rtrim(substr($author,0,30),"\\");
404 }
405 }
406 $author="\"".$author."\"";
407 $title=mysqli_real_escape_string($link,$title);
408 if (strlen($title)>100) {
409 $title=rtrim(substr($title,0,100),"\\");
410 }
411 $title="\"".$title."\"";
412 $review1 = mysqli_real_escape_string($link,$review1);
413 if (strlen($review1)>500) { $review1=rtrim(substr($review1,0,500),"\\");}
414 $review2 = mysqli_real_escape_string($link,$review2);
415 if (strlen($review2)>500) { $review2=rtrim(substr($review2,0,500),"\\");}
416 $review3 = mysqli_real_escape_string($link,$review3);
417 if (strlen($review3)>500) { $review3=rtrim(substr($review3,0,500),"\\");}
418
419 $queryInsert = "CALL b_addNewBook(\"$searchparameterdata\",$title, $author,\"$binding\",\"$imageURL\", $dewey, $salesRank,$pubDate,$publisher,$g1,$g2,$g3,$loc)";
420 //echo $queryInsert;
421
422 $res = mysqli_query($link, $queryInsert);
423 if (!$res) {
424 $err=mysqli_error( $link );
425 mysqli_close($link);
426 file_put_contents('/var/ywww/debug/phpDebug',
427 "anb failed: $queryInsert\n$err\n",
428 FILE_APPEND);
429 exit($err);
430 }
431 $queryInsertReviews = "CALL b_insertReviews(\"$searchparameterdata\",\"$review1\",\"$review2\",\"$review3\")";
432 if($review1 != "") {
433 $res = mysqli_query($link, $queryInsertReviews);
434 if (!$res) {
435 $err=mysqli_error( $link );
436 mysqli_close($link);
437 file_put_contents('/var/ywww/debug/phpDebug',
438 "anr failed: $queryInsertReviews\n",
439 FILE_APPEND);
440 exit($err);
441 }
442 }
443 mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop
444 }
445
446 echo $output;
447 }
448 }
449 else
450 {
451 //look up info from db
452 include "../../private/db.php";
453 $query = "CALL b_getBookInfo('$searchparameterdata', $loc)";
454 //echo $query;
455 $res = mysqli_query($link, $query) or exit( mysqli_error( $link ));
456
457 $output = "";
458 $output .= "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
459 $output .= "<Details>";
460 if ( mysqli_num_rows( $res ) > 0 )
461 {
462 $rows=mysqli_fetch_array($res, MYSQLI_ASSOC);
463 mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop
464 $output .= "<ASIN>" . $searchparameterdata . "</ASIN>";
465 $output .= "<Title>" . htmlspecialchars($rows["Title"]) . "</Title>";
466 $output .= "<Author>" . htmlspecialchars($rows["Author"]) . "</Author>";
467 $output .= "<Binding>" . htmlspecialchars($rows["Binding"]) . "</Binding>";
468 $output .= "<Dewey>" . htmlspecialchars($rows["DeweyDecimal"]) . "</Dewey>";
469 $output .= "<ImageURL>" . htmlspecialchars($rows["ImageURL"]) . "</ImageURL>";
470 $output .= "<SalesRank>" . htmlspecialchars($rows["SalesRank"]) . "</SalesRank>";
471 $output .= "<PublicationDate>" . htmlspecialchars($rows["PublicationDate"]) . "</PublicationDate>";
472 $output .= "<Publisher>" . htmlspecialchars($rows["Publisher"]) . "</Publisher>";
473 $output .= "<Genre1>" . htmlspecialchars($rows["Genre1"]) . "</Genre1>";
474 $output .= "<Genre2>" . htmlspecialchars($rows["Genre2"]) . "</Genre2>";
475 $output .= "<Genre3>" . htmlspecialchars($rows["Genre3"]) . "</Genre3>";
476 $output .= "<ProductGroup>Book</ProductGroup>";
477 $output .= "<Error>AccountLimitExceeded</Error>";
478
479 }
480 else {
481 mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop
482 }
483 $output .= "</Details>";
484 echo $output;
485 }
486 } 512 }
487 513
488 function findGenre($browseNode, &$ID, &$gen) 514 function findGenre($browseNode, &$ID, &$gen)
489 { 515 {
490 if($browseNode->Name == "Subjects") 516 if($browseNode->Name == "Subjects")