comparison xml/getAmazonInfo.php @ 35:86f79bc1d142

refactor to split out shareable fn to actually do an Amazon API request
author Charlie Root
date Fri, 04 Jan 2019 12:54:12 -0500
parents c9d9b76ecbf9
children 2c0c95bd97a6
comparison
equal deleted inserted replaced
34:5cae8d572998 35:86f79bc1d142
1 <?php 1 <?php
2
3 include_once "doAmazonRequest.inc";
2 4
3 function getAmazonDet($isbn,$go,$localeIn) 5 function getAmazonDet($isbn,$go,$localeIn)
4 { 6 {
5 $Adefault=array( 7 $Adefault=array(
6 'language' =>'en', //what language to render the page in 8 'language' =>'en', //what language to render the page in
38 case "fr": 40 case "fr":
39 $loc = 4; 41 $loc = 4;
40 break; 42 break;
41 }; 43 };
42 44
43 for ($i=1; $i<=3; $i++) { 45 try {
44 try { 46 return getAmazonDetRemote($go,$language,$locale,$loc,
45 return getAmazonDetRemote($go,$language,$locale,$loc, 47 $page,$operation,$searchparameter,
46 $page,$operation,$searchparameter, 48 $searchparameterdata,
47 $searchparameterdata, 49 $show_array,$show_url,$show_xml);
48 $show_array,$show_url,$show_xml);
49 }
50 catch (Exception $e) {
51 $code=$e->getMessage();
52 file_put_contents('/var/ywww/debug/phpDebug',
53 "Bang: $code $i\n",FILE_APPEND);
54 if ($code=='RequestThrottled') {
55 usleep(200000); // Try to reduce throttling until we get a
56 // principled solution in place
57 $bail=False;
58 }
59 else {
60 $bail=True;
61 }
62 }
63 if ($bail) { break; }
64 } 50 }
65 return getAmazonDetLocal($go,$language,$loc, 51 catch (Exception $e) {
66 $page,$operation,$searchparameter, 52 return getAmazonDetLocal($go,$language,$loc,
67 $searchparameterdata, 53 $page,$operation,$searchparameter,
68 $show_array,$show_url,$show_xml); 54 $searchparameterdata,
55 $show_array,$show_url,$show_xml);
56 }
69 } 57 }
70 58
71 function getAmazonDetRemote($go,$language,$locale,$loc, 59 function getAmazonDetRemote($go,$language,$locale,$loc,
72 $page,$operation,$searchparameter, 60 $page,$operation,$searchparameter,
73 $searchparameterdata, 61 $searchparameterdata,
116 ), 104 ),
117 ); 105 );
118 106
119 //if(go != 1) 107 //if(go != 1)
120 //include "aws_signed_request.php"; 108 //include "aws_signed_request.php";
121 $public_key ="AKIAIHTNWC7L6LOUY4LQ";
122 $private_key="zWQlIzndJDtXNfxEXH7K7YR7hzv3u77lOcqfqPde";
123 //this is the data that is used to form the request for AWS 109 //this is the data that is used to form the request for AWS
124 //this is the part that is search specific 110 //this is the part that is search specific
125 $parameters=array( 111 $parameters=array(
126 'Operation' =>$operation , 112 'Operation' =>$operation ,
127 //'Keywords' =>urlencode($search) , 113 //'Keywords' =>urlencode($search) ,
138 // that caused an error: 124 // that caused an error:
139 // If idType equals ASIN, SearchIndex cannot be present 125 // If idType equals ASIN, SearchIndex cannot be present
140 //'SearchIndex' =>$searchindex , //Books for example. 126 //'SearchIndex' =>$searchindex , //Books for example.
141 $parameters['SearchIndex']=$searchindex; 127 $parameters['SearchIndex']=$searchindex;
142 } 128 }
143 $requestURI = $_SERVER['REQUEST_URI']; 129 $xml=doAmazonRequest($Aserver[$locale]['ext'],$parameters,3); // may throw exception
144 $requestIP = $_SERVER['REMOTE_ADDR']; 130 set_error_handler(function () {
145 $ext=$Aserver[$locale]['ext']; 131 global $output;
146 $file_data=$ext; 132 file_put_contents('/var/ywww/debug/phpDebug',
147 ksort($parameters); 133 "Caught one?: ".$searchparameterdata,
148 foreach ($parameters as $i=>$d) { 134 FILE_APPEND);
149 $file_data.='&'.$i.'='.$d; 135 file_put_contents('/var/ywww/debug/phpDebug',
150 } 136 print_r($output, TRUE)."\n",
151 $gotit=0; 137 FILE_APPEND);
152 $url=aws_signed_request($ext,$parameters,$public_key,$private_key); 138 } );
153 $crl = curl_init(); 139 $review = $xml->Items->Item->CustomerReviews->IFrameURL;
154 $timeout = 5; 140 restore_error_handler();
155 curl_setopt ($crl, CURLOPT_URL,$url); 141 //echo $review;
156 curl_setopt ($crl, CURLOPT_ENCODING , "gzip");
157 curl_setopt ($crl, CURLOPT_RETURNTRANSFER, 1);
158 curl_setopt ($crl, CURLOPT_CONNECTTIMEOUT, $timeout);
159 $semaphore = new SyncSemaphore("Amazon");
160 $gotit = $semaphore->lock(1000);
161 if (!$gotit) {
162 file_put_contents('/var/ywww/debug/phpDebug',
163 "Got it: ".(int)$gotit." $requestIP 1\n",FILE_APPEND);
164 $gotit=$semaphore->lock(1000);
165 file_put_contents('/var/ywww/debug/phpDebug',
166 "Got it: ".(int)$gotit." $requestIP 1a\n",FILE_APPEND);
167 }
168 $output = curl_exec($crl);
169 curl_close($crl);
170 usleep(500000);
171 if ($gotit) {
172 $semaphore->unlock();
173 }
174 else {
175 file_put_contents('/var/ywww/debug/phpDebug',
176 "W/o lock for $requestIP 1b\n",FILE_APPEND);
177 }
178 $review = "";
179 $review1 = "";
180 $review2 = "";
181 $review3 = "";
182 // HST added this
183 $mm=array();
184 if (preg_match("/<Error>/",$output,$mm)) {
185 $xml = new SimpleXMLElement($output);
186 $resName=$xml->getName();
187 $code=$xml->Error->Code;
188 if (!$code) {
189 $code=$xml->Items->Request->Errors->Error->Code;
190 }
191 file_put_contents('/var/ywww/debug/phpDebug',"Losing: ".$mm[0].", $resName, $code, $requestIP, $requestURI\n",FILE_APPEND);
192 if ($code!='RequestThrottled') {
193 file_put_contents('/var/ywww/debug/phpDebug',"parms: $go\nurl: $url\n".
194 print_r($parameters,TRUE)."\n",FILE_APPEND);
195 if ($code=="") {
196 file_put_contents('/var/ywww/debug/phpDebug',"error elt:\n$output\n",FILE_APPEND);
197 }
198 }
199 throw new Exception($code);
200 }
201 else {
202 $xml = new SimpleXMLElement($output);
203 set_error_handler(function () {
204 global $output;
205 file_put_contents('/var/ywww/debug/phpDebug',
206 "Caught one?: ".$searchparameterdata,
207 FILE_APPEND);
208 file_put_contents('/var/ywww/debug/phpDebug',
209 print_r($output, TRUE)."\n",
210 FILE_APPEND);
211 } );
212 $review = $xml->Items->Item->CustomerReviews->IFrameURL;
213 // The above is failing repeatedly --
214 //PHP Notice: Trying to get property of non-object in
215 // /var/ywww/xml/getAmazonInfo.php on line [109]
216 // See the dumped structure at the end of this file for the
217 // cause
218 restore_error_handler();
219 //echo $review;
220 }
221 if ($review != "") 142 if ($review != "")
222 { 143 {
144 $review1 = "";
145 $review2 = "";
146 $review3 = "";
223 $text = @file_get_contents($review . "&truncate=300"); 147 $text = @file_get_contents($review . "&truncate=300");
224 $removeTop1 = preg_replace('~<div class="crIFrameHeaderLeftColumn">(.*?)<div class="crIFrameHeaderHistogram">~si', '', $text); 148 $removeTop1 = preg_replace('~<div class="crIFrameHeaderLeftColumn">(.*?)<div class="crIFrameHeaderHistogram">~si', '', $text);
225 $removeTop2 = preg_replace('~<div class="crIFrameHeaderHistogram">(.*?)<div class="crIframeReviewList">~si', '', $text); 149 $removeTop2 = preg_replace('~<div class="crIFrameHeaderHistogram">(.*?)<div class="crIframeReviewList">~si', '', $text);
226 if (preg_match('~<body[^>]*>(.*?)</body>~si', $removeTop2, $body)){ $getBody = trim($body[1]); } else { $getBody = $removeTop2;} 150 if (preg_match('~<body[^>]*>(.*?)</body>~si', $removeTop2, $body)){ $getBody = trim($body[1]); } else { $getBody = $removeTop2;}
227 $removeDiv = preg_replace('~<div[^>]*>(.*?)</div>~si', '', $getBody); 151 $removeDiv = preg_replace('~<div[^>]*>(.*?)</div>~si', '', $getBody);
308 if($go == 1) { 232 if($go == 1) {
309 $item = $xml->Items->Item[0]; 233 $item = $xml->Items->Item[0];
310 if ($item && $item->ItemAttributes && $item->ItemAttributes->Title) { 234 if ($item && $item->ItemAttributes && $item->ItemAttributes->Title) {
311 $title = $item->ItemAttributes->Title; } 235 $title = $item->ItemAttributes->Title; }
312 else { $title = "[no title]"; }; 236 else { $title = "[no title]"; };
313 file_put_contents('/var/ywww/debug/phpDebug',"win: |$errorCode| ". 237 file_put_contents('/var/ywww/debug/phpDebug',"win: $title\n",
314 $title."\n",
315 FILE_APPEND); 238 FILE_APPEND);
316 return $output; 239 return $output;
317 } 240 }
318 else 241 else {
319 { 242 if ($xml->Items->Item) {
320 if ($xml->Items->Item) { 243 $title = $xml->Items->Item[0]->ItemAttributes->Title;
321 $title = $xml->Items->Item[0]->ItemAttributes->Title; 244 $author = $xml->Items->Item[0]->ItemAttributes->Author;
322 $author = $xml->Items->Item[0]->ItemAttributes->Author; 245 $binding = $xml->Items->Item[0]->ItemAttributes->Binding;
323 $binding = $xml->Items->Item[0]->ItemAttributes->Binding; 246 $dewey = $xml->Items->Item[0]->ItemAttributes->DeweyDecimalNumber;
324 $dewey = $xml->Items->Item[0]->ItemAttributes->DeweyDecimalNumber; 247 if($dewey == "")
325 if($dewey == "") 248 $dewey = "null";
326 $dewey = "null"; 249 $imageURL = $xml->Items->Item[0]->MediumImage->URL;
327 $imageURL = $xml->Items->Item[0]->MediumImage->URL; 250 $salesRank = $xml->Items->Item[0]->SalesRank;
328 $salesRank = $xml->Items->Item[0]->SalesRank; 251 $pubDate = $xml->Items->Item[0]->ItemAttributes->PublicationDate;
329 $pubDate = $xml->Items->Item[0]->ItemAttributes->PublicationDate; 252 if (strlen($pubDate)==4) { $pubDate=$pubDate."-01-01";}
330 if (strlen($pubDate)==4) { $pubDate=$pubDate."-01-01";} 253 if (strlen($pubDate)==7) { $pubDate=$pubDate."-01";}
331 if (strlen($pubDate)==7) { $pubDate=$pubDate."-01";} 254 if (strlen($pubDate)==0) {
332 if (strlen($pubDate)==0) { 255 $pubDate="null";
333 $pubDate="null";
334 }
335 else {
336 $pubDate="\"$pubDate\"";
337 }
338 $publisher = $xml->Items->Item[0]->ItemAttributes->Publisher;
339 } 256 }
340 else { 257 else {
341 $title = $salesRank = ""; 258 $pubDate="\"$pubDate\"";
342 $dewey = "null";
343 } 259 }
260 $publisher = $xml->Items->Item[0]->ItemAttributes->Publisher;
261 }
262 else {
263 $title = $salesRank = "";
264 $dewey = "null";
265 }
344 266
345 $genreID = ""; 267 $genreID = "";
346 $genre = ""; 268 $genre = "";
347 $genArr = array(); 269 $genArr = array();
348 270
349 if ($xml->Items->Item[0] && $xml->Items->Item[0]->BrowseNodes) { 271 if ($xml->Items->Item[0] && $xml->Items->Item[0]->BrowseNodes) {
350 for($i=0;$i<sizeof($xml->Items->Item->BrowseNodes->BrowseNode);$i++){ 272 for($i=0;$i<sizeof($xml->Items->Item->BrowseNodes->BrowseNode);$i++){
351 //sexy recursive function 273 //sexy recursive function
352 findGenre($xml->Items->Item->BrowseNodes->BrowseNode[$i], $genreID, $genre); 274 findGenre($xml->Items->Item->BrowseNodes->BrowseNode[$i], $genreID, $genre);
353 275
354 if($genre != "") 276 if($genre != "")
355 $genArr[strval($genreID)] = strval($genre); 277 $genArr[strval($genreID)] = strval($genre);
356 //$genArr[$i] = array(strval($genreID) => strval($genre)); 278 //$genArr[$i] = array(strval($genreID) => strval($genre));
357 279
358 //echo $genre; 280 //echo $genre;
359 //echo $genreID; 281 //echo $genreID;
360 282
361 $genre = ""; 283 $genre = "";
362 $genreID = ""; 284 $genreID = "";
363 }
364 } 285 }
286 }
365 287
366 $g1 = "null"; 288 $g1 = "null";
367 $g2 = "null"; 289 $g2 = "null";
368 $g3 = "null"; 290 $g3 = "null";
369 $loop = 1; 291 $loop = 1;
370 292
371 foreach ($genArr as $key => $value) { 293 foreach ($genArr as $key => $value) {
372 //echo "$key => $value"; 294 //echo "$key => $value";
373 if ($key>2047) { 295 if ($key>2047) {
374 //HST added 296 //HST added
375 break; 297 break;
376 } 298 }
377 $queryG = "CALL b_addBrowseNode($key,\"$value\")"; //add the name value pair for genre to new table 299 $queryG = "CALL b_addBrowseNode($key,\"$value\")"; //add the name value pair for genre to new table
378 //echo $queryG; 300 //echo $queryG;
301 include "../../private/db.php";
302 $resG = mysqli_query($link, $queryG);
303 mysqli_close($link);
304
305 switch ($loop) {
306 case 1:
307 $g1 = $key;
308 break;
309 case 2:
310 $g2 = $key;
311 break;
312 case 3:
313 $g3 = $key;
314 break;
315 }
316
317 $loop++;
318 }
319
320 if($salesRank == "")
321 $salesRank = "null";
322
323 if($title != "")
324 {
379 include "../../private/db.php"; 325 include "../../private/db.php";
380 $resG = mysqli_query($link, $queryG); 326 if ($publisher->count()==0) {
381 mysqli_close($link); 327 $publisher="null";
328 }
329 else {
330 $publisher=mysqli_real_escape_string($link,$publisher);
331 if (strlen($publisher)>30) {
332 $publisher=rtrim(substr($publisher,0,30),"\\");
333 }
334 $publisher="\"".$publisher."\"";
335 }
336 if ($author->count()==0) {
337 $author="unknown";
338 }
339 else {
340 $author=mysqli_real_escape_string($link,$author);
341 if (strlen($author)>30) {
342 $author=rtrim(substr($author,0,30),"\\");
343 }
344 }
345 $author="\"".$author."\"";
346 $title=mysqli_real_escape_string($link,$title);
347 if (strlen($title)>100) {
348 $title=rtrim(substr($title,0,100),"\\");
349 }
350 $title="\"".$title."\"";
351 $review1 = mysqli_real_escape_string($link,$review1);
352 if (strlen($review1)>500) { $review1=rtrim(substr($review1,0,500),"\\");}
353 $review2 = mysqli_real_escape_string($link,$review2);
354 if (strlen($review2)>500) { $review2=rtrim(substr($review2,0,500),"\\");}
355 $review3 = mysqli_real_escape_string($link,$review3);
356 if (strlen($review3)>500) { $review3=rtrim(substr($review3,0,500),"\\");}
357
358 $queryInsert = "CALL b_addNewBook(\"$searchparameterdata\",$title, $author,\"$binding\",\"$imageURL\", $dewey, $salesRank,$pubDate,$publisher,$g1,$g2,$g3,$loc)";
359 //echo $queryInsert;
382 360
383 switch ($loop) { 361 $res = mysqli_query($link, $queryInsert);
384 case 1: 362 if (!$res) {
385 $g1 = $key; 363 $err=mysqli_error( $link );
386 break; 364 mysqli_close($link);
387 case 2: 365 file_put_contents('/var/ywww/debug/phpDebug',
388 $g2 = $key; 366 "anb failed: $queryInsert\n$err\n",
389 break; 367 FILE_APPEND);
390 case 3: 368 exit($err);
391 $g3 = $key; 369 }
392 break; 370 $queryInsertReviews = "CALL b_insertReviews(\"$searchparameterdata\",\"$review1\",\"$review2\",\"$review3\")";
393 } 371 if($review1 != "") {
394 372 $res = mysqli_query($link, $queryInsertReviews);
395 $loop++;
396 }
397
398 if($salesRank == "")
399 $salesRank = "null";
400
401 if($title != "")
402 {
403 include "../../private/db.php";
404 if ($publisher->count()==0) {
405 $publisher="null";
406 }
407 else {
408 $publisher=mysqli_real_escape_string($link,$publisher);
409 if (strlen($publisher)>30) {
410 $publisher=rtrim(substr($publisher,0,30),"\\");
411 }
412 $publisher="\"".$publisher."\"";
413 }
414 if ($author->count()==0) {
415 $author="unknown";
416 }
417 else {
418 $author=mysqli_real_escape_string($link,$author);
419 if (strlen($author)>30) {
420 $author=rtrim(substr($author,0,30),"\\");
421 }
422 }
423 $author="\"".$author."\"";
424 $title=mysqli_real_escape_string($link,$title);
425 if (strlen($title)>100) {
426 $title=rtrim(substr($title,0,100),"\\");
427 }
428 $title="\"".$title."\"";
429 $review1 = mysqli_real_escape_string($link,$review1);
430 if (strlen($review1)>500) { $review1=rtrim(substr($review1,0,500),"\\");}
431 $review2 = mysqli_real_escape_string($link,$review2);
432 if (strlen($review2)>500) { $review2=rtrim(substr($review2,0,500),"\\");}
433 $review3 = mysqli_real_escape_string($link,$review3);
434 if (strlen($review3)>500) { $review3=rtrim(substr($review3,0,500),"\\");}
435
436 $queryInsert = "CALL b_addNewBook(\"$searchparameterdata\",$title, $author,\"$binding\",\"$imageURL\", $dewey, $salesRank,$pubDate,$publisher,$g1,$g2,$g3,$loc)";
437 //echo $queryInsert;
438
439 $res = mysqli_query($link, $queryInsert);
440 if (!$res) { 373 if (!$res) {
441 $err=mysqli_error( $link ); 374 $err=mysqli_error( $link );
442 mysqli_close($link); 375 mysqli_close($link);
443 file_put_contents('/var/ywww/debug/phpDebug', 376 file_put_contents('/var/ywww/debug/phpDebug',
444 "anb failed: $queryInsert\n$err\n", 377 "anr failed: $queryInsertReviews\n",
445 FILE_APPEND); 378 FILE_APPEND);
446 exit($err); 379 exit($err);
447 } 380 }
448 $queryInsertReviews = "CALL b_insertReviews(\"$searchparameterdata\",\"$review1\",\"$review2\",\"$review3\")"; 381 }
449 if($review1 != "") { 382 mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop
450 $res = mysqli_query($link, $queryInsertReviews); 383 }
451 if (!$res) { 384
452 $err=mysqli_error( $link ); 385 echo $output;
453 mysqli_close($link); 386 }
454 file_put_contents('/var/ywww/debug/phpDebug',
455 "anr failed: $queryInsertReviews\n",
456 FILE_APPEND);
457 exit($err);
458 }
459 }
460 mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop
461 }
462
463 echo $output;
464 }
465 } 387 }
466 388
467 function getAmazonDetLocal($go,$language,$loc, 389 function getAmazonDetLocal($go,$language,$loc,
468 $page,$operation,$searchparameter, 390 $page,$operation,$searchparameter,
469 $searchparameterdata, 391 $searchparameterdata,