Mercurial > hg > ywww
comparison xml/getAmazonInfo.php @ 35:86f79bc1d142
refactor to split out shareable fn to actually do an Amazon API request
| author | Charlie Root |
|---|---|
| date | Fri, 04 Jan 2019 12:54:12 -0500 |
| parents | c9d9b76ecbf9 |
| children | 2c0c95bd97a6 |
comparison
equal
deleted
inserted
replaced
| 34:5cae8d572998 | 35:86f79bc1d142 |
|---|---|
| 1 <?php | 1 <?php |
| 2 | |
| 3 include_once "doAmazonRequest.inc"; | |
| 2 | 4 |
| 3 function getAmazonDet($isbn,$go,$localeIn) | 5 function getAmazonDet($isbn,$go,$localeIn) |
| 4 { | 6 { |
| 5 $Adefault=array( | 7 $Adefault=array( |
| 6 'language' =>'en', //what language to render the page in | 8 'language' =>'en', //what language to render the page in |
| 38 case "fr": | 40 case "fr": |
| 39 $loc = 4; | 41 $loc = 4; |
| 40 break; | 42 break; |
| 41 }; | 43 }; |
| 42 | 44 |
| 43 for ($i=1; $i<=3; $i++) { | 45 try { |
| 44 try { | 46 return getAmazonDetRemote($go,$language,$locale,$loc, |
| 45 return getAmazonDetRemote($go,$language,$locale,$loc, | 47 $page,$operation,$searchparameter, |
| 46 $page,$operation,$searchparameter, | 48 $searchparameterdata, |
| 47 $searchparameterdata, | 49 $show_array,$show_url,$show_xml); |
| 48 $show_array,$show_url,$show_xml); | |
| 49 } | |
| 50 catch (Exception $e) { | |
| 51 $code=$e->getMessage(); | |
| 52 file_put_contents('/var/ywww/debug/phpDebug', | |
| 53 "Bang: $code $i\n",FILE_APPEND); | |
| 54 if ($code=='RequestThrottled') { | |
| 55 usleep(200000); // Try to reduce throttling until we get a | |
| 56 // principled solution in place | |
| 57 $bail=False; | |
| 58 } | |
| 59 else { | |
| 60 $bail=True; | |
| 61 } | |
| 62 } | |
| 63 if ($bail) { break; } | |
| 64 } | 50 } |
| 65 return getAmazonDetLocal($go,$language,$loc, | 51 catch (Exception $e) { |
| 66 $page,$operation,$searchparameter, | 52 return getAmazonDetLocal($go,$language,$loc, |
| 67 $searchparameterdata, | 53 $page,$operation,$searchparameter, |
| 68 $show_array,$show_url,$show_xml); | 54 $searchparameterdata, |
| 55 $show_array,$show_url,$show_xml); | |
| 56 } | |
| 69 } | 57 } |
| 70 | 58 |
| 71 function getAmazonDetRemote($go,$language,$locale,$loc, | 59 function getAmazonDetRemote($go,$language,$locale,$loc, |
| 72 $page,$operation,$searchparameter, | 60 $page,$operation,$searchparameter, |
| 73 $searchparameterdata, | 61 $searchparameterdata, |
| 116 ), | 104 ), |
| 117 ); | 105 ); |
| 118 | 106 |
| 119 //if(go != 1) | 107 //if(go != 1) |
| 120 //include "aws_signed_request.php"; | 108 //include "aws_signed_request.php"; |
| 121 $public_key ="AKIAIHTNWC7L6LOUY4LQ"; | |
| 122 $private_key="zWQlIzndJDtXNfxEXH7K7YR7hzv3u77lOcqfqPde"; | |
| 123 //this is the data that is used to form the request for AWS | 109 //this is the data that is used to form the request for AWS |
| 124 //this is the part that is search specific | 110 //this is the part that is search specific |
| 125 $parameters=array( | 111 $parameters=array( |
| 126 'Operation' =>$operation , | 112 'Operation' =>$operation , |
| 127 //'Keywords' =>urlencode($search) , | 113 //'Keywords' =>urlencode($search) , |
| 138 // that caused an error: | 124 // that caused an error: |
| 139 // If idType equals ASIN, SearchIndex cannot be present | 125 // If idType equals ASIN, SearchIndex cannot be present |
| 140 //'SearchIndex' =>$searchindex , //Books for example. | 126 //'SearchIndex' =>$searchindex , //Books for example. |
| 141 $parameters['SearchIndex']=$searchindex; | 127 $parameters['SearchIndex']=$searchindex; |
| 142 } | 128 } |
| 143 $requestURI = $_SERVER['REQUEST_URI']; | 129 $xml=doAmazonRequest($Aserver[$locale]['ext'],$parameters,3); // may throw exception |
| 144 $requestIP = $_SERVER['REMOTE_ADDR']; | 130 set_error_handler(function () { |
| 145 $ext=$Aserver[$locale]['ext']; | 131 global $output; |
| 146 $file_data=$ext; | 132 file_put_contents('/var/ywww/debug/phpDebug', |
| 147 ksort($parameters); | 133 "Caught one?: ".$searchparameterdata, |
| 148 foreach ($parameters as $i=>$d) { | 134 FILE_APPEND); |
| 149 $file_data.='&'.$i.'='.$d; | 135 file_put_contents('/var/ywww/debug/phpDebug', |
| 150 } | 136 print_r($output, TRUE)."\n", |
| 151 $gotit=0; | 137 FILE_APPEND); |
| 152 $url=aws_signed_request($ext,$parameters,$public_key,$private_key); | 138 } ); |
| 153 $crl = curl_init(); | 139 $review = $xml->Items->Item->CustomerReviews->IFrameURL; |
| 154 $timeout = 5; | 140 restore_error_handler(); |
| 155 curl_setopt ($crl, CURLOPT_URL,$url); | 141 //echo $review; |
| 156 curl_setopt ($crl, CURLOPT_ENCODING , "gzip"); | |
| 157 curl_setopt ($crl, CURLOPT_RETURNTRANSFER, 1); | |
| 158 curl_setopt ($crl, CURLOPT_CONNECTTIMEOUT, $timeout); | |
| 159 $semaphore = new SyncSemaphore("Amazon"); | |
| 160 $gotit = $semaphore->lock(1000); | |
| 161 if (!$gotit) { | |
| 162 file_put_contents('/var/ywww/debug/phpDebug', | |
| 163 "Got it: ".(int)$gotit." $requestIP 1\n",FILE_APPEND); | |
| 164 $gotit=$semaphore->lock(1000); | |
| 165 file_put_contents('/var/ywww/debug/phpDebug', | |
| 166 "Got it: ".(int)$gotit." $requestIP 1a\n",FILE_APPEND); | |
| 167 } | |
| 168 $output = curl_exec($crl); | |
| 169 curl_close($crl); | |
| 170 usleep(500000); | |
| 171 if ($gotit) { | |
| 172 $semaphore->unlock(); | |
| 173 } | |
| 174 else { | |
| 175 file_put_contents('/var/ywww/debug/phpDebug', | |
| 176 "W/o lock for $requestIP 1b\n",FILE_APPEND); | |
| 177 } | |
| 178 $review = ""; | |
| 179 $review1 = ""; | |
| 180 $review2 = ""; | |
| 181 $review3 = ""; | |
| 182 // HST added this | |
| 183 $mm=array(); | |
| 184 if (preg_match("/<Error>/",$output,$mm)) { | |
| 185 $xml = new SimpleXMLElement($output); | |
| 186 $resName=$xml->getName(); | |
| 187 $code=$xml->Error->Code; | |
| 188 if (!$code) { | |
| 189 $code=$xml->Items->Request->Errors->Error->Code; | |
| 190 } | |
| 191 file_put_contents('/var/ywww/debug/phpDebug',"Losing: ".$mm[0].", $resName, $code, $requestIP, $requestURI\n",FILE_APPEND); | |
| 192 if ($code!='RequestThrottled') { | |
| 193 file_put_contents('/var/ywww/debug/phpDebug',"parms: $go\nurl: $url\n". | |
| 194 print_r($parameters,TRUE)."\n",FILE_APPEND); | |
| 195 if ($code=="") { | |
| 196 file_put_contents('/var/ywww/debug/phpDebug',"error elt:\n$output\n",FILE_APPEND); | |
| 197 } | |
| 198 } | |
| 199 throw new Exception($code); | |
| 200 } | |
| 201 else { | |
| 202 $xml = new SimpleXMLElement($output); | |
| 203 set_error_handler(function () { | |
| 204 global $output; | |
| 205 file_put_contents('/var/ywww/debug/phpDebug', | |
| 206 "Caught one?: ".$searchparameterdata, | |
| 207 FILE_APPEND); | |
| 208 file_put_contents('/var/ywww/debug/phpDebug', | |
| 209 print_r($output, TRUE)."\n", | |
| 210 FILE_APPEND); | |
| 211 } ); | |
| 212 $review = $xml->Items->Item->CustomerReviews->IFrameURL; | |
| 213 // The above is failing repeatedly -- | |
| 214 //PHP Notice: Trying to get property of non-object in | |
| 215 // /var/ywww/xml/getAmazonInfo.php on line [109] | |
| 216 // See the dumped structure at the end of this file for the | |
| 217 // cause | |
| 218 restore_error_handler(); | |
| 219 //echo $review; | |
| 220 } | |
| 221 if ($review != "") | 142 if ($review != "") |
| 222 { | 143 { |
| 144 $review1 = ""; | |
| 145 $review2 = ""; | |
| 146 $review3 = ""; | |
| 223 $text = @file_get_contents($review . "&truncate=300"); | 147 $text = @file_get_contents($review . "&truncate=300"); |
| 224 $removeTop1 = preg_replace('~<div class="crIFrameHeaderLeftColumn">(.*?)<div class="crIFrameHeaderHistogram">~si', '', $text); | 148 $removeTop1 = preg_replace('~<div class="crIFrameHeaderLeftColumn">(.*?)<div class="crIFrameHeaderHistogram">~si', '', $text); |
| 225 $removeTop2 = preg_replace('~<div class="crIFrameHeaderHistogram">(.*?)<div class="crIframeReviewList">~si', '', $text); | 149 $removeTop2 = preg_replace('~<div class="crIFrameHeaderHistogram">(.*?)<div class="crIframeReviewList">~si', '', $text); |
| 226 if (preg_match('~<body[^>]*>(.*?)</body>~si', $removeTop2, $body)){ $getBody = trim($body[1]); } else { $getBody = $removeTop2;} | 150 if (preg_match('~<body[^>]*>(.*?)</body>~si', $removeTop2, $body)){ $getBody = trim($body[1]); } else { $getBody = $removeTop2;} |
| 227 $removeDiv = preg_replace('~<div[^>]*>(.*?)</div>~si', '', $getBody); | 151 $removeDiv = preg_replace('~<div[^>]*>(.*?)</div>~si', '', $getBody); |
| 308 if($go == 1) { | 232 if($go == 1) { |
| 309 $item = $xml->Items->Item[0]; | 233 $item = $xml->Items->Item[0]; |
| 310 if ($item && $item->ItemAttributes && $item->ItemAttributes->Title) { | 234 if ($item && $item->ItemAttributes && $item->ItemAttributes->Title) { |
| 311 $title = $item->ItemAttributes->Title; } | 235 $title = $item->ItemAttributes->Title; } |
| 312 else { $title = "[no title]"; }; | 236 else { $title = "[no title]"; }; |
| 313 file_put_contents('/var/ywww/debug/phpDebug',"win: |$errorCode| ". | 237 file_put_contents('/var/ywww/debug/phpDebug',"win: $title\n", |
| 314 $title."\n", | |
| 315 FILE_APPEND); | 238 FILE_APPEND); |
| 316 return $output; | 239 return $output; |
| 317 } | 240 } |
| 318 else | 241 else { |
| 319 { | 242 if ($xml->Items->Item) { |
| 320 if ($xml->Items->Item) { | 243 $title = $xml->Items->Item[0]->ItemAttributes->Title; |
| 321 $title = $xml->Items->Item[0]->ItemAttributes->Title; | 244 $author = $xml->Items->Item[0]->ItemAttributes->Author; |
| 322 $author = $xml->Items->Item[0]->ItemAttributes->Author; | 245 $binding = $xml->Items->Item[0]->ItemAttributes->Binding; |
| 323 $binding = $xml->Items->Item[0]->ItemAttributes->Binding; | 246 $dewey = $xml->Items->Item[0]->ItemAttributes->DeweyDecimalNumber; |
| 324 $dewey = $xml->Items->Item[0]->ItemAttributes->DeweyDecimalNumber; | 247 if($dewey == "") |
| 325 if($dewey == "") | 248 $dewey = "null"; |
| 326 $dewey = "null"; | 249 $imageURL = $xml->Items->Item[0]->MediumImage->URL; |
| 327 $imageURL = $xml->Items->Item[0]->MediumImage->URL; | 250 $salesRank = $xml->Items->Item[0]->SalesRank; |
| 328 $salesRank = $xml->Items->Item[0]->SalesRank; | 251 $pubDate = $xml->Items->Item[0]->ItemAttributes->PublicationDate; |
| 329 $pubDate = $xml->Items->Item[0]->ItemAttributes->PublicationDate; | 252 if (strlen($pubDate)==4) { $pubDate=$pubDate."-01-01";} |
| 330 if (strlen($pubDate)==4) { $pubDate=$pubDate."-01-01";} | 253 if (strlen($pubDate)==7) { $pubDate=$pubDate."-01";} |
| 331 if (strlen($pubDate)==7) { $pubDate=$pubDate."-01";} | 254 if (strlen($pubDate)==0) { |
| 332 if (strlen($pubDate)==0) { | 255 $pubDate="null"; |
| 333 $pubDate="null"; | |
| 334 } | |
| 335 else { | |
| 336 $pubDate="\"$pubDate\""; | |
| 337 } | |
| 338 $publisher = $xml->Items->Item[0]->ItemAttributes->Publisher; | |
| 339 } | 256 } |
| 340 else { | 257 else { |
| 341 $title = $salesRank = ""; | 258 $pubDate="\"$pubDate\""; |
| 342 $dewey = "null"; | |
| 343 } | 259 } |
| 260 $publisher = $xml->Items->Item[0]->ItemAttributes->Publisher; | |
| 261 } | |
| 262 else { | |
| 263 $title = $salesRank = ""; | |
| 264 $dewey = "null"; | |
| 265 } | |
| 344 | 266 |
| 345 $genreID = ""; | 267 $genreID = ""; |
| 346 $genre = ""; | 268 $genre = ""; |
| 347 $genArr = array(); | 269 $genArr = array(); |
| 348 | 270 |
| 349 if ($xml->Items->Item[0] && $xml->Items->Item[0]->BrowseNodes) { | 271 if ($xml->Items->Item[0] && $xml->Items->Item[0]->BrowseNodes) { |
| 350 for($i=0;$i<sizeof($xml->Items->Item->BrowseNodes->BrowseNode);$i++){ | 272 for($i=0;$i<sizeof($xml->Items->Item->BrowseNodes->BrowseNode);$i++){ |
| 351 //sexy recursive function | 273 //sexy recursive function |
| 352 findGenre($xml->Items->Item->BrowseNodes->BrowseNode[$i], $genreID, $genre); | 274 findGenre($xml->Items->Item->BrowseNodes->BrowseNode[$i], $genreID, $genre); |
| 353 | 275 |
| 354 if($genre != "") | 276 if($genre != "") |
| 355 $genArr[strval($genreID)] = strval($genre); | 277 $genArr[strval($genreID)] = strval($genre); |
| 356 //$genArr[$i] = array(strval($genreID) => strval($genre)); | 278 //$genArr[$i] = array(strval($genreID) => strval($genre)); |
| 357 | 279 |
| 358 //echo $genre; | 280 //echo $genre; |
| 359 //echo $genreID; | 281 //echo $genreID; |
| 360 | 282 |
| 361 $genre = ""; | 283 $genre = ""; |
| 362 $genreID = ""; | 284 $genreID = ""; |
| 363 } | |
| 364 } | 285 } |
| 286 } | |
| 365 | 287 |
| 366 $g1 = "null"; | 288 $g1 = "null"; |
| 367 $g2 = "null"; | 289 $g2 = "null"; |
| 368 $g3 = "null"; | 290 $g3 = "null"; |
| 369 $loop = 1; | 291 $loop = 1; |
| 370 | 292 |
| 371 foreach ($genArr as $key => $value) { | 293 foreach ($genArr as $key => $value) { |
| 372 //echo "$key => $value"; | 294 //echo "$key => $value"; |
| 373 if ($key>2047) { | 295 if ($key>2047) { |
| 374 //HST added | 296 //HST added |
| 375 break; | 297 break; |
| 376 } | 298 } |
| 377 $queryG = "CALL b_addBrowseNode($key,\"$value\")"; //add the name value pair for genre to new table | 299 $queryG = "CALL b_addBrowseNode($key,\"$value\")"; //add the name value pair for genre to new table |
| 378 //echo $queryG; | 300 //echo $queryG; |
| 301 include "../../private/db.php"; | |
| 302 $resG = mysqli_query($link, $queryG); | |
| 303 mysqli_close($link); | |
| 304 | |
| 305 switch ($loop) { | |
| 306 case 1: | |
| 307 $g1 = $key; | |
| 308 break; | |
| 309 case 2: | |
| 310 $g2 = $key; | |
| 311 break; | |
| 312 case 3: | |
| 313 $g3 = $key; | |
| 314 break; | |
| 315 } | |
| 316 | |
| 317 $loop++; | |
| 318 } | |
| 319 | |
| 320 if($salesRank == "") | |
| 321 $salesRank = "null"; | |
| 322 | |
| 323 if($title != "") | |
| 324 { | |
| 379 include "../../private/db.php"; | 325 include "../../private/db.php"; |
| 380 $resG = mysqli_query($link, $queryG); | 326 if ($publisher->count()==0) { |
| 381 mysqli_close($link); | 327 $publisher="null"; |
| 328 } | |
| 329 else { | |
| 330 $publisher=mysqli_real_escape_string($link,$publisher); | |
| 331 if (strlen($publisher)>30) { | |
| 332 $publisher=rtrim(substr($publisher,0,30),"\\"); | |
| 333 } | |
| 334 $publisher="\"".$publisher."\""; | |
| 335 } | |
| 336 if ($author->count()==0) { | |
| 337 $author="unknown"; | |
| 338 } | |
| 339 else { | |
| 340 $author=mysqli_real_escape_string($link,$author); | |
| 341 if (strlen($author)>30) { | |
| 342 $author=rtrim(substr($author,0,30),"\\"); | |
| 343 } | |
| 344 } | |
| 345 $author="\"".$author."\""; | |
| 346 $title=mysqli_real_escape_string($link,$title); | |
| 347 if (strlen($title)>100) { | |
| 348 $title=rtrim(substr($title,0,100),"\\"); | |
| 349 } | |
| 350 $title="\"".$title."\""; | |
| 351 $review1 = mysqli_real_escape_string($link,$review1); | |
| 352 if (strlen($review1)>500) { $review1=rtrim(substr($review1,0,500),"\\");} | |
| 353 $review2 = mysqli_real_escape_string($link,$review2); | |
| 354 if (strlen($review2)>500) { $review2=rtrim(substr($review2,0,500),"\\");} | |
| 355 $review3 = mysqli_real_escape_string($link,$review3); | |
| 356 if (strlen($review3)>500) { $review3=rtrim(substr($review3,0,500),"\\");} | |
| 357 | |
| 358 $queryInsert = "CALL b_addNewBook(\"$searchparameterdata\",$title, $author,\"$binding\",\"$imageURL\", $dewey, $salesRank,$pubDate,$publisher,$g1,$g2,$g3,$loc)"; | |
| 359 //echo $queryInsert; | |
| 382 | 360 |
| 383 switch ($loop) { | 361 $res = mysqli_query($link, $queryInsert); |
| 384 case 1: | 362 if (!$res) { |
| 385 $g1 = $key; | 363 $err=mysqli_error( $link ); |
| 386 break; | 364 mysqli_close($link); |
| 387 case 2: | 365 file_put_contents('/var/ywww/debug/phpDebug', |
| 388 $g2 = $key; | 366 "anb failed: $queryInsert\n$err\n", |
| 389 break; | 367 FILE_APPEND); |
| 390 case 3: | 368 exit($err); |
| 391 $g3 = $key; | 369 } |
| 392 break; | 370 $queryInsertReviews = "CALL b_insertReviews(\"$searchparameterdata\",\"$review1\",\"$review2\",\"$review3\")"; |
| 393 } | 371 if($review1 != "") { |
| 394 | 372 $res = mysqli_query($link, $queryInsertReviews); |
| 395 $loop++; | |
| 396 } | |
| 397 | |
| 398 if($salesRank == "") | |
| 399 $salesRank = "null"; | |
| 400 | |
| 401 if($title != "") | |
| 402 { | |
| 403 include "../../private/db.php"; | |
| 404 if ($publisher->count()==0) { | |
| 405 $publisher="null"; | |
| 406 } | |
| 407 else { | |
| 408 $publisher=mysqli_real_escape_string($link,$publisher); | |
| 409 if (strlen($publisher)>30) { | |
| 410 $publisher=rtrim(substr($publisher,0,30),"\\"); | |
| 411 } | |
| 412 $publisher="\"".$publisher."\""; | |
| 413 } | |
| 414 if ($author->count()==0) { | |
| 415 $author="unknown"; | |
| 416 } | |
| 417 else { | |
| 418 $author=mysqli_real_escape_string($link,$author); | |
| 419 if (strlen($author)>30) { | |
| 420 $author=rtrim(substr($author,0,30),"\\"); | |
| 421 } | |
| 422 } | |
| 423 $author="\"".$author."\""; | |
| 424 $title=mysqli_real_escape_string($link,$title); | |
| 425 if (strlen($title)>100) { | |
| 426 $title=rtrim(substr($title,0,100),"\\"); | |
| 427 } | |
| 428 $title="\"".$title."\""; | |
| 429 $review1 = mysqli_real_escape_string($link,$review1); | |
| 430 if (strlen($review1)>500) { $review1=rtrim(substr($review1,0,500),"\\");} | |
| 431 $review2 = mysqli_real_escape_string($link,$review2); | |
| 432 if (strlen($review2)>500) { $review2=rtrim(substr($review2,0,500),"\\");} | |
| 433 $review3 = mysqli_real_escape_string($link,$review3); | |
| 434 if (strlen($review3)>500) { $review3=rtrim(substr($review3,0,500),"\\");} | |
| 435 | |
| 436 $queryInsert = "CALL b_addNewBook(\"$searchparameterdata\",$title, $author,\"$binding\",\"$imageURL\", $dewey, $salesRank,$pubDate,$publisher,$g1,$g2,$g3,$loc)"; | |
| 437 //echo $queryInsert; | |
| 438 | |
| 439 $res = mysqli_query($link, $queryInsert); | |
| 440 if (!$res) { | 373 if (!$res) { |
| 441 $err=mysqli_error( $link ); | 374 $err=mysqli_error( $link ); |
| 442 mysqli_close($link); | 375 mysqli_close($link); |
| 443 file_put_contents('/var/ywww/debug/phpDebug', | 376 file_put_contents('/var/ywww/debug/phpDebug', |
| 444 "anb failed: $queryInsert\n$err\n", | 377 "anr failed: $queryInsertReviews\n", |
| 445 FILE_APPEND); | 378 FILE_APPEND); |
| 446 exit($err); | 379 exit($err); |
| 447 } | 380 } |
| 448 $queryInsertReviews = "CALL b_insertReviews(\"$searchparameterdata\",\"$review1\",\"$review2\",\"$review3\")"; | 381 } |
| 449 if($review1 != "") { | 382 mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop |
| 450 $res = mysqli_query($link, $queryInsertReviews); | 383 } |
| 451 if (!$res) { | 384 |
| 452 $err=mysqli_error( $link ); | 385 echo $output; |
| 453 mysqli_close($link); | 386 } |
| 454 file_put_contents('/var/ywww/debug/phpDebug', | |
| 455 "anr failed: $queryInsertReviews\n", | |
| 456 FILE_APPEND); | |
| 457 exit($err); | |
| 458 } | |
| 459 } | |
| 460 mysqli_close($link); //do not remove. reset is needed otherwise mysqli_fetch_array doesn't work after first loop | |
| 461 } | |
| 462 | |
| 463 echo $output; | |
| 464 } | |
| 465 } | 387 } |
| 466 | 388 |
| 467 function getAmazonDetLocal($go,$language,$loc, | 389 function getAmazonDetLocal($go,$language,$loc, |
| 468 $page,$operation,$searchparameter, | 390 $page,$operation,$searchparameter, |
| 469 $searchparameterdata, | 391 $searchparameterdata, |
