Mercurial > hg > ywww
comparison xml/getAmazonInfo.php @ 35:86f79bc1d142
refactor to split out shareable fn to actually do an Amazon API request
author | Charlie Root |
---|---|
date | Fri, 04 Jan 2019 12:54:12 -0500 |
parents | c9d9b76ecbf9 |
children | 2c0c95bd97a6 |
comparison
equal
deleted
inserted
replaced
34:5cae8d572998 | 35:86f79bc1d142 |
---|---|
1 <?php | 1 <?php |
2 | |
3 include_once "doAmazonRequest.inc"; | |
2 | 4 |
3 function getAmazonDet($isbn,$go,$localeIn) | 5 function getAmazonDet($isbn,$go,$localeIn) |
4 { | 6 { |
5 $Adefault=array( | 7 $Adefault=array( |
6 'language' =>'en', //what language to render the page in | 8 'language' =>'en', //what language to render the page in |
38 case "fr": | 40 case "fr": |
39 $loc = 4; | 41 $loc = 4; |
40 break; | 42 break; |
41 }; | 43 }; |
42 | 44 |
43 for ($i=1; $i<=3; $i++) { | 45 try { |
44 try { | 46 return getAmazonDetRemote($go,$language,$locale,$loc, |
45 return getAmazonDetRemote($go,$language,$locale,$loc, | 47 $page,$operation,$searchparameter, |
46 $page,$operation,$searchparameter, | 48 $searchparameterdata, |
47 $searchparameterdata, | 49 $show_array,$show_url,$show_xml); |
48 $show_array,$show_url,$show_xml); | |
49 } | |
50 catch (Exception $e) { | |
51 $code=$e->getMessage(); | |
52 file_put_contents('/var/ywww/debug/phpDebug', | |
53 "Bang: $code $i\n",FILE_APPEND); | |
54 if ($code=='RequestThrottled') { | |
55 usleep(200000); // Try to reduce throttling until we get a | |
56 // principled solution in place | |
57 $bail=False; | |
58 } | |
59 else { | |
60 $bail=True; | |
61 } | |
62 } | |
63 if ($bail) { break; } | |
64 } | 50 } |
65 return getAmazonDetLocal($go,$language,$loc, | 51 catch (Exception $e) { |
66 $page,$operation,$searchparameter, | 52 return getAmazonDetLocal($go,$language,$loc, |
67 $searchparameterdata, | 53 $page,$operation,$searchparameter, |
68 $show_array,$show_url,$show_xml); | 54 $searchparameterdata, |
55 $show_array,$show_url,$show_xml); | |
56 } | |
69 } | 57 } |
70 | 58 |
71 function getAmazonDetRemote($go,$language,$locale,$loc, | 59 function getAmazonDetRemote($go,$language,$locale,$loc, |
72 $page,$operation,$searchparameter, | 60 $page,$operation,$searchparameter, |
73 $searchparameterdata, | 61 $searchparameterdata, |
116 ), | 104 ), |
117 ); | 105 ); |
118 | 106 |
119 //if(go != 1) | 107 //if(go != 1) |
120 //include "aws_signed_request.php"; | 108 //include "aws_signed_request.php"; |
121 $public_key ="AKIAIHTNWC7L6LOUY4LQ"; | |
122 $private_key="zWQlIzndJDtXNfxEXH7K7YR7hzv3u77lOcqfqPde"; | |
123 //this is the data that is used to form the request for AWS | 109 //this is the data that is used to form the request for AWS |
124 //this is the part that is search specific | 110 //this is the part that is search specific |
125 $parameters=array( | 111 $parameters=array( |
126 'Operation' =>$operation , | 112 'Operation' =>$operation , |
127 //'Keywords' =>urlencode($search) , | 113 //'Keywords' =>urlencode($search) , |
138 // that caused an error: | 124 // that caused an error: |
139 // If idType equals ASIN, SearchIndex cannot be present | 125 // If idType equals ASIN, SearchIndex cannot be present |
140 //'SearchIndex' =>$searchindex , //Books for example. | 126 //'SearchIndex' =>$searchindex , //Books for example. |
141 $parameters['SearchIndex']=$searchindex; | 127 $parameters['SearchIndex']=$searchindex; |
142 } | 128 } |
143 $requestURI = $_SERVER['REQUEST_URI']; | 129 $xml=doAmazonRequest($Aserver[$locale]['ext'],$parameters,3); // may throw exception |
144 $requestIP = $_SERVER['REMOTE_ADDR']; | 130 set_error_handler(function () { |
145 $ext=$Aserver[$locale]['ext']; | 131 global $output; |
146 $file_data=$ext; | 132 file_put_contents('/var/ywww/debug/phpDebug', |
147 ksort($parameters); | 133 "Caught one?: ".$searchparameterdata, |
148 foreach ($parameters as $i=>$d) { | 134 FILE_APPEND); |
149 $file_data.='&'.$i.'='.$d; | 135 file_put_contents('/var/ywww/debug/phpDebug', |
150 } | 136 print_r($output, TRUE)."\n", |
151 $gotit=0; | 137 FILE_APPEND); |
152 $url=aws_signed_request($ext,$parameters,$public_key,$private_key); | 138 } ); |
153 $crl = curl_init(); | 139 $review = $xml->Items->Item->CustomerReviews->IFrameURL; |
154 $timeout = 5; | 140 restore_error_handler(); |
155 curl_setopt ($crl, CURLOPT_URL,$url); | 141 //echo $review; |
156 curl_setopt ($crl, CURLOPT_ENCODING , "gzip"); | |
157 curl_setopt ($crl, CURLOPT_RETURNTRANSFER, 1); | |
158 curl_setopt ($crl, CURLOPT_CONNECTTIMEOUT, $timeout); | |
159 $semaphore = new SyncSemaphore("Amazon"); | |
160 $gotit = $semaphore->lock(1000); | |
161 if (!$gotit) { | |
162 file_put_contents('/var/ywww/debug/phpDebug', | |
163 "Got it: ".(int)$gotit." $requestIP 1\n",FILE_APPEND); | |
164 $gotit=$semaphore->lock(1000); | |
165 file_put_contents('/var/ywww/debug/phpDebug', | |
166 "Got it: ".(int)$gotit." $requestIP 1a\n",FILE_APPEND); | |
167 } | |
168 $output = curl_exec($crl); | |
169 curl_close($crl); | |
170 usleep(500000); | |
171 if ($gotit) { | |
172 $semaphore->unlock(); | |
173 } | |
174 else { | |
175 file_put_contents('/var/ywww/debug/phpDebug', | |
176 "W/o lock for $requestIP 1b\n",FILE_APPEND); | |
177 } | |
178 $review = ""; | |
179 $review1 = ""; | |
180 $review2 = ""; | |
181 $review3 = ""; | |
182 // HST added this | |
183 $mm=array(); | |
184 if (preg_match("/<Error>/",$output,$mm)) { | |
185 $xml = new SimpleXMLElement($output); | |
186 $resName=$xml->getName(); | |
187 $code=$xml->Error->Code; | |
188 if (!$code) { | |
189 $code=$xml->Items->Request->Errors->Error->Code; | |
190 } | |
191 file_put_contents('/var/ywww/debug/phpDebug',"Losing: ".$mm[0].", $resName, $code, $requestIP, $requestURI\n",FILE_APPEND); | |
192 if ($code!='RequestThrottled') { | |
193 file_put_contents('/var/ywww/debug/phpDebug',"parms: $go\nurl: $url\n". | |
194 print_r($parameters,TRUE)."\n",FILE_APPEND); | |
195 if ($code=="") { | |
196 file_put_contents('/var/ywww/debug/phpDebug',"error elt:\n$output\n",FILE_APPEND); | |
197 } | |
198 } | |
199 throw new Exception($code); | |
200 } | |
201 else { | |
202 $xml = new SimpleXMLElement($output); | |
203 set_error_handler(function () { | |
204 global $output; | |
205 file_put_contents('/var/ywww/debug/phpDebug', | |
206 "Caught one?: ".$searchparameterdata, | |
207 FILE_APPEND); | |
208 file_put_contents('/var/ywww/debug/phpDebug', | |
209 print_r($output, TRUE)."\n", | |
210 FILE_APPEND); | |
211 } ); | |
212 $review = $xml->Items->Item->CustomerReviews->IFrameURL; | |
213 // The above is failing repeatedly -- | |
214 //PHP Notice: Trying to get property of non-object in | |
215 // /var/ywww/xml/getAmazonInfo.php on line [109] | |
216 // See the dumped structure at the end of this file for the | |
217 // cause | |
218 restore_error_handler(); | |
219 //echo $review; | |
220 } | |
221 if ($review != "") | 142 if ($review != "") |
222 { | 143 { |
144 $review1 = ""; | |
145 $review2 = ""; | |
146 $review3 = ""; | |
223 $text = @file_get_contents($review . "&truncate=300"); | 147 $text = @file_get_contents($review . "&truncate=300"); |
224 $removeTop1 = preg_replace('~<div class="crIFrameHeaderLeftColumn">(.*?)<div class="crIFrameHeaderHistogram">~si', '', $text); | 148 $removeTop1 = preg_replace('~<div class="crIFrameHeaderLeftColumn">(.*?)<div class="crIFrameHeaderHistogram">~si', '', $text); |
225 $removeTop2 = preg_replace('~<div class="crIFrameHeaderHistogram">(.*?)<div class="crIframeReviewList">~si', '', $text); | 149 $removeTop2 = preg_replace('~<div class="crIFrameHeaderHistogram">(.*?)<div class="crIframeReviewList">~si', '', $text); |
226 if (preg_match('~<body[^>]*>(.*?)</body>~si', $removeTop2, $body)){ $getBody = trim($body[1]); } else { $getBody = $removeTop2;} | 150 if (preg_match('~<body[^>]*>(.*?)</body>~si', $removeTop2, $body)){ $getBody = trim($body[1]); } else { $getBody = $removeTop2;} |
227 $removeDiv = preg_replace('~<div[^>]*>(.*?)</div>~si', '', $getBody); | 151 $removeDiv = preg_replace('~<div[^>]*>(.*?)</div>~si', '', $getBody); |
308 if($go == 1) { | 232 if($go == 1) { |
309 $item = $xml->Items->Item[0]; | 233 $item = $xml->Items->Item[0]; |
310 if ($item && $item->ItemAttributes && $item->ItemAttributes->Title) { | 234 if ($item && $item->ItemAttributes && $item->ItemAttributes->Title) { |
311 $title = $item->ItemAttributes->Title; } | 235 $title = $item->ItemAttributes->Title; } |
312 else { $title = "[no title]"; }; | 236 else { $title = "[no title]"; }; |
313 file_put_contents('/var/ywww/debug/phpDebug',"win: |$errorCode| ". | 237 file_put_contents('/var/ywww/debug/phpDebug',"win: $title\n", |
314 $title."\n", | |
315 FILE_APPEND); | 238 FILE_APPEND); |
316 return $output; | 239 return $output; |
317 } | 240 } |
318 else | 241 else { |
319 { | 242 if ($xml->Items->Item) { |
320 if ($xml->Items->Item) { | 243 $title = $xml->Items->Item[0]->ItemAttributes->Title; |
321 $title = $xml->Items->Item[0]->ItemAttributes->Title; | 244 $author = $xml->Items->Item[0]->ItemAttributes->Author; |
322 $author = $xml->Items->Item[0]->ItemAttributes->Author; | 245 $binding = $xml->Items->Item[0]->ItemAttributes->Binding; |
323 $binding = $xml->Items->Item[0]->ItemAttributes->Binding; | 246 $dewey = $xml->Items->Item[0]->ItemAttributes->DeweyDecimalNumber; |
324 $dewey = $xml->Items->Item[0]->ItemAttributes->DeweyDecimalNumber; | 247 if($dewey == "") |
325 if($dewey == "") | 248 $dewey = "null"; |
326 $dewey = "null"; | 249 $imageURL = $xml->Items->Item[0]->MediumImage->URL; |
327 $imageURL = $xml->Items->Item[0]->MediumImage->URL; | 250 $salesRank = $xml->Items->Item[0]->SalesRank; |
328 $salesRank = $xml->Items->Item[0]->SalesRank; | 251 $pubDate = $xml->Items->Item[0]->ItemAttributes->PublicationDate; |
329 $pubDate = $xml->Items->Item[0]->ItemAttributes->PublicationDate; | 252 if (strlen($pubDate)==4) { $pubDate=$pubDate."-01-01";} |
330 if (strlen($pubDate)==4) { $pubDate=$pubDate."-01-01";} | 253 if (strlen($pubDate)==7) { $pubDate=$pubDate."-01";} |
331 if (strlen($pubDate)==7) { $pubDate=$pubDate."-01";} | 254 if (strlen($pubDate)==0) { |
332 if (strlen($pubDate)==0) { | 255 $pubDate="null"; |
333 $pubDate="null"; | |
334 } | |
335 else { | |
336 $pubDate="\"$pubDate\""; | |
337 } | |
338 $publisher = $xml->Items->Item[0]->ItemAttributes->Publisher; | |
339 } | 256 } |
340 else { | 257 else { |
341 $title = $salesRank = ""; | 258 $pubDate="\"$pubDate\""; |
342 $dewey = "null"; | |
343 } | 259 } |
260 $publisher = $xml->Items->Item[0]->ItemAttributes->Publisher; | |
261 } | |
262 else { | |
263 $title = $salesRank = ""; | |
264 $dewey = "null"; | |
265 } | |
344 | 266 |
345 $genreID = ""; | 267 $genreID = ""; |
346 $genre = ""; | 268 $genre = ""; |
347 $genArr = array(); | 269 $genArr = array(); |
348 | 270 |
349 if ($xml->Items->Item[0] && $xml->Items->Item[0]->BrowseNodes) { | 271 if ($xml->Items->Item[0] && $xml->Items->Item[0]->BrowseNodes) { |
350 for($i=0;$i<sizeof($xml->Items->Item->BrowseNodes->BrowseNode);$i++){ | 272 for($i=0;$i<sizeof($xml->Items->Item->BrowseNodes->BrowseNode);$i++){ |
351 //sexy recursive function | 273 //sexy recursive function |
352 findGenre($xml->Items->Item->BrowseNodes->BrowseNode[$i], $genreID, $genre); | 274 findGenre($xml->Items->Item->BrowseNodes->BrowseNode[$i], $genreID, $genre); |
353 | 275 |
354 if($genre != "") | 276 if($genre != "") |
355 $genArr[strval($genreID)] = strval($genre); | 277 $genArr[strval($genreID)] = strval($genre); |
356 //$genArr[$i] = array(strval($genreID) => strval($genre)); | 278 //$genArr[$i] = array(strval($genreID) => strval($genre)); |
357 | 279 |
358 //echo $genre; | 280 //echo $genre; |
359 //echo $genreID; | 281 //echo $genreID; |
360 | 282 |
361 $genre = ""; | 283 $genre = ""; |
362 $genreID = ""; | 284 $genreID = ""; |
363 } | |
364 } | 285 } |
286 } | |
365 | 287 |
366 $g1 = "null"; | 288 $g1 = "null"; |
367 $g2 = "null"; | 289 $g2 = "null"; |
368 $g3 = "null"; | 290 $g3 = "null"; |
369 $loop = 1; | 291 $loop = 1; |
370 | 292 |
371 foreach ($genArr as $key => $value) { | 293 foreach ($genArr as $key => $value) { |
372 //echo "$key => $value"; | 294 //echo "$key => $value"; |
373 if ($key>2047) { | 295 if ($key>2047) { |
374 //HST added | 296 //HST added |
375 break; | 297 break; |
376 } | 298 } |
377 $queryG = "CALL b_addBrowseNode($key,\"$value\")"; //add the name value pair for genre to new table | 299 $queryG = "CALL b_addBrowseNode($key,\"$value\")"; //add the name value pair for genre to new table |
378 //echo $queryG; | 300 //echo $queryG; |
301 include "../../private/db.php"; | |
302 $resG = mysqli_query($link, $queryG); | |
303 mysqli_close($link); | |
304 | |
305 switch ($loop) { | |
306 case 1: | |
307 $g1 = $key; | |
308 break; | |
309 case 2: | |
310 $g2 = $key; | |
311 break; | |
312 case 3: | |
313 $g3 = $key; | |
314 break; | |
315 } | |
316 | |
317 $loop++; | |
318 } | |
319 | |
320 if($salesRank == "") | |
321 $salesRank = "null"; | |
322 | |
323 if($title != "") | |
324 { | |
379 include "../../private/db.php"; | 325 include "../../private/db.php"; |
380 $resG = mysqli_query($link, $queryG); | 326 if ($publisher->count()==0) { |
381 mysqli_close($link); | 327 $publisher="null"; |
328 } | |
329 else { | |
330 $publisher=mysqli_real_escape_string($link,$publisher); | |
331 if (strlen($publisher)>30) { | |
332 $publisher=rtrim(substr($publisher,0,30),"\\"); | |
333 } | |
334 $publisher="\"".$publisher."\""; | |
335 } | |
336 if ($author->count()==0) { | |
337 $author="unknown"; | |
338 } | |
339 else { | |
340 $author=mysqli_real_escape_string($link,$author); | |
341 if (strlen($author)>30) { | |
342 $author=rtrim(substr($author,0,30),"\\"); | |
343 } | |
344 } | |
345 $author="\"".$author."\""; | |
346 $title=mysqli_real_escape_string($link,$title); | |
347 if (strlen($title)>100) { | |
348 $title=rtrim(substr($title,0,100),"\\"); | |
349 } | |
350 $title="\"".$title."\""; | |
351 $review1 = mysqli_real_escape_string($link,$review1); | |
352 if (strlen($review1)>500) { $review1=rtrim(substr($review1,0,500),"\\");} | |
353 $review2 = mysqli_real_escape_string($link,$review2); | |
354 if (strlen($review2)>500) { $review2=rtrim(substr($review2,0,500),"\\");} | |
355 $review3 = mysqli_real_escape_string($link,$review3); | |
356 if (strlen($review3)>500) { $review3=rtrim(substr($review3,0,500),"\\");} | |
357 | |
358 $queryInsert = "CALL b_addNewBook(\"$searchparameterdata\",$title, $author,\"$binding\",\"$imageURL\", $dewey, $salesRank,$pubDate,$publisher,$g1,$g2,$g3,$loc)"; | |
359 //echo $queryInsert; | |
382 | 360 |
383 switch ($loop) { | 361 $res = mysqli_query($link, $queryInsert); |
384 case 1: | 362 if (!$res) { |
385 $g1 = $key; | 363 $err=mysqli_error( $link ); |
386 break; | 364 mysqli_close($link); |
387 case 2: | 365 file_put_contents('/var/ywww/debug/phpDebug', |
388 $g2 = $key; | 366 "anb failed: $queryInsert\n$err\n", |
389 break; | 367 FILE_APPEND); |
390 case 3: | 368 exit($err); |
391 $g3 = $key; | 369 } |
392 break; | 370 $queryInsertReviews = "CALL b_insertReviews(\"$searchparameterdata\",\"$review1\",\"$review2\",\"$review3\")"; |
393 } | 371 if($review1 != "") { |
394 | 372 $res = mysqli_query($link, $queryInsertReviews); |
395 $loop++; | |
396 } | |
397 | |
398 if($salesRank == "") | |
399 $salesRank = "null"; | |
400 | |
401 if($title != "") | |
402 { | |
403 include "../../private/db.php"; | |
404 if ($publisher->count()==0) { | |
405 $publisher="null"; | |
406 } | |
407 else { | |
408 $publisher=mysqli_real_escape_string($link,$publisher); | |
409 if (strlen($publisher)>30) { | |
410 $publisher=rtrim(substr($publisher,0,30),"\\"); | |
411 } | |
412 $publisher="\"".$publisher."\""; | |
413 } | |
414 if ($author->count()==0) { | |
415 $author="unknown"; | |
416 } | |
417 else { | |
418 $author=mysqli_real_escape_string($link,$author); | |
419 if (strlen($author)>30) { | |
420 $author=rtrim(substr($author,0,30),"\\"); | |
421 } | |
422 } | |
423 $author="\"".$author."\""; | |
424 $title=mysqli_real_escape_string($link,$title); | |
425 if (strlen($title)>100) { | |
426 $title=rtrim(substr($title,0,100),"\\"); | |
427 } | |
428 $title="\"".$title."\""; | |
429 $review1 = mysqli_real_escape_string($link,$review1); | |
430 if (strlen($review1)>500) { $review1=rtrim(substr($review1,0,500),"\\");} | |
431 $review2 = mysqli_real_escape_string($link,$review2); | |
432 if (strlen($review2)>500) { $review2=rtrim(substr($review2,0,500),"\\");} | |
433 $review3 = mysqli_real_escape_string($link,$review3); | |
434 if (strlen($review3)>500) { $review3=rtrim(substr($review3,0,500),"\\");} | |
435 | |
436 $queryInsert = "CALL b_addNewBook(\"$searchparameterdata\",$title, $author,\"$binding\",\"$imageURL\", $dewey, $salesRank,$pubDate,$publisher,$g1,$g2,$g3,$loc)"; | |
437 //echo $queryInsert; | |
438 | |
439 $res = mysqli_query($link, $queryInsert); | |
440 if (!$res) { | 373 if (!$res) { |
441 $err=mysqli_error( $link ); | 374 $err=mysqli_error( $link ); |
442 mysqli_close($link); | 375 mysqli_close($link); |
443 file_put_contents('/var/ywww/debug/phpDebug', | 376 file_put_contents('/var/ywww/debug/phpDebug', |
444 "anb failed: $queryInsert\n$err\n", | 377 "anr failed: $queryInsertReviews\n", |
445 FILE_APPEND); | 378 FILE_APPEND); |
446 exit($err); | 379 exit($err); |
447 } | 380 } |
448 $queryInsertReviews = "CALL b_insertReviews(\"$searchparameterdata\",\"$review1\",\"$review2\",\"$review3\")"; | 381 } |
449 if($review1 != "") { | 382 mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop |
450 $res = mysqli_query($link, $queryInsertReviews); | 383 } |
451 if (!$res) { | 384 |
452 $err=mysqli_error( $link ); | 385 echo $output; |
453 mysqli_close($link); | 386 } |
454 file_put_contents('/var/ywww/debug/phpDebug', | |
455 "anr failed: $queryInsertReviews\n", | |
456 FILE_APPEND); | |
457 exit($err); | |
458 } | |
459 } | |
460 mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop | |
461 } | |
462 | |
463 echo $output; | |
464 } | |
465 } | 387 } |
466 | 388 |
467 function getAmazonDetLocal($go,$language,$loc, | 389 function getAmazonDetLocal($go,$language,$loc, |
468 $page,$operation,$searchparameter, | 390 $page,$operation,$searchparameter, |
469 $searchparameterdata, | 391 $searchparameterdata, |