******************************************************************** * This program is absolutely free. Do anything with it that you * like except claim that you wrote it. ******************************************************************** * VERSION HISTORY: * 2010-12-30: Added ability to use many different sites for trending * items. * 2010-05-18: Removed requirement for Google Checkout because I will * allow this to auto-purchase stuff. I don't have the * money to spend or space to store the junk. * 2010-02-08: Added banned sites. Some sites lie all the time. * 2010-01-01: This basically works - commented heavily so the public * can use it. * 2009-09-30: Added banned items. I only want items that can be * shipped. Trying to ignore downloadable items. * 2009-05-03: The XKCD Sucks page pretty much put out a challenge to * implement this program. So, I tried it. ******************************************************************** * USAGE: * At the command line, run "php cc.php". * If you want to use this as a library (as I do), comment out the * last line of this page (there is a comment down there explaining * which line to comment out.) Then, include this file from your * program. Once included, the command "cheepcheep()" will return an * array of items. Each item in the array will contain three indexes: * name: The item's name. * price: The item's price. * url: A url link to the item. * count: How many sites are selling the item ******************************************************************** * REQUIREMENTS: * This script is written for PHP5. I do not have PHP4 (or PHP3) any * more, so I cannot test the script in those versions. * This script also requires lynx (the web browser). If you have a * text-based web browser that functions identically to lynx, you can * replace the function call to lynx with your other web browser. ********************************************************************/ /** Settings. These may change over time. **/ /** Suggestion Page: The suggestion page should contain a list of trending items. Originally, Google's shopping page showed trending items. It doesn't anymore. So, I started using the "Trending now" section of other websites. I also changed how it locates the trending items: It loads the items. It looks for the header. Once found, it saves each listed item to the product array. It stops when it hits the footer. If the numbered variable is true, it will assume that the items are preceded by an increasing number and omit that. **/ /** This uses Google's recently found list (when it is not a holiday) **/ $suggestion_page = "http://www.google.com/products"; $suggestion_header = "A few of the items recently found with Google Product Search:"; $suggestion_footer = "|Google Home |Information for Merchants |Privacy |Help"; $suggestions_numbered = false; $suggestions_repeated = false; $suggestions_multi = true; /** These settings will use the Trending Section of ThisNext.com **/ // $suggestion_page = "http://www.thisnext.com/"; // $suggestion_header = "Trending Now"; // $suggestion_footer = "Featured Brand"; // $suggestions_numbered = true; // $suggestions_repeated = false; // $suggestions_multi = false; /** These settings will use the Trending Section of eBay.com **/ // $suggestion_page = "http://www.ebay.com/"; // $suggestion_header = "Trends on eBay Updated daily"; // $suggestion_footer = "BUY"; // $suggestions_numbered = false; // $suggestions_repeated = false; // $suggestions_multi = false; /** These settings will use the Popular Section of eBay.com **/ //$suggestion_page = "http://popular.ebay.com/"; //$suggestion_header = "Top Products"; //$suggestion_footer = ""; //$suggestions_numbered = true; //$suggestions_repeated = false; // $suggestions_multi = false; /** These settings will use the Trending Section of Twenga.com **/ // $suggestion_page = "http://www.twenga.com/top"; // $suggestion_header = "New sections"; // $suggestion_footer = "Twenga About us Privacy policy Careers"; // $suggestions_numbered = true; // $suggestions_repeated = false; // $suggestions_multi = false; /** These settings will use the daily suggestion section of MySimon.com **/ // $suggestion_page = "http://www.mysimon.com/shopping-picks"; // $suggestion_header = date("M. j, Y"); //"Shopping Picks"; // $suggestion_header = "Dec. 24, 2010"; // $suggestion_footer = "IFRAME:"; // $suggestions_numbered = false; // $suggestions_repeated = true; // $suggestions_multi = false; /** Product Page: The product page is Google's product search. This searches for cheap items using the products found in the previous suggestion search. **/ $product_page = "http://www.google.com/products?q="; $shop_page = "http://www.google.com/search?tbs=shop:1&sampleq=1&hl=en&sa=X&tbs=shop:1,p_ord:pd,price:1,ppr_min:0.50,ppr_max:1.00,ship:1&q="; /** An explanation of the shop_page variables (mainly my guess at what they mean)... http://www.google.com/search ?tbs=shop:1 I think this is shop:1 just to tell it to use the shopping page. &q=footballs q is the query string. &sampleq=1 I leave this as 1. &hl=en The query is in English. &safe=off Safe search. I leave it off because having it on is way too limiting. &sa=X I leave this as X. This next one is the options: p_ord is pd to sort from most expensive to lease expensive. price is 1 to sort by price ppr_min is 0.50 - the low price. ppr_max is 1.00 - the high price. ship is 1 - require no shipping cost check is 1 - require Google checkout. (I change that to 0). &tbs=shop:1,p_ord:pd,price:1,ppr_min:0.50,ppr_max:1.00,ship:1,check:1 &fp=2383846262876a I think this is a session key. **/ /** Banned items are words in the item description that are not allowed. **/ $banned_items = array( "billabong decal", "cash back", "coupon", "ebook", "gift cert", "kindle", "magic online", "mp3 album", "mtgo", "prepaid pin", "raffle", "recipe", "stock photo", "wanted" ); /** Banned sites are websites that will be ignored. **/ $banned_sites = array( "www.artsinheaven.com", "www.malanijewelers.com", "www.pixmac.com", "www.vacdepot.com", "mp3", "ebook" ); /** Lynx function call **/ // If you change this, the output must be identical to Lynx or the // screen scraper will fail miserably. $lynx = "lynx \"URL\" --dump"; function cheepcheep($print=false) { global $suggestion_page, $suggestion_header, $suggestion_footer, $suggestions_numbered, $suggestions_repeated, $suggestions_multi, $product_page, $shop_page, $banned_items, $banned_sites, $lynx; $debug = false; $suggestions_only = false; if($print) print "Searching:\n"; /** STEP 1: Get the trending items off the main suggestion page. **/ $fp = popen(str_replace("URL", $suggestion_page, $lynx), "r"); $found_header = false; $found_footer = false; $suggestions = array(); // This will contain a list of suggested item names. $last_line = ""; while($b = fgets($fp, 1024)) { $b = trim($b); if($suggestions_multi) $b = preg_replace("/\[[0-9]*\]/", "|", $b); else $b = preg_replace("/\[[0-9]*\]/", "", $b); if($found_header) { if($b == "" && sizeof($suggestions)==0) continue; if($found_footer) { // Do nothing } elseif($b == $suggestion_footer) $found_footer = true; else { if($b == "") continue; if($suggestions_numbered) $b = preg_replace("/^[0-9.\s\*]*/", "", $b); if($suggestions_repeated) { if($b == $last_line) $suggestions[] = $b; } else { if($suggestions_multi) { $ba = explode("|", $b); foreach($ba as $b) if(trim($b) != "") $suggestions[] = trim($b); } else { $suggestions[] = $b; } } } } elseif($b == $suggestion_header) { $found_header = true; if($debug) print "Found header\n"; } else { if($debug) print $b." IS NOT ".$suggestion_header."\n"; } $last_line = $b; } fclose($fp); if($debug) { print "SUGGESTIONS:"; print_r($suggestions); if($suggestions_only) die(); } /** STEP 2: Search for each of the suggested items. **/ $master_items = array(); // This will contain a list of all items found. foreach($suggestions as $suggestion) { if($print) print $suggestion; $url = $shop_page.$suggestion; //DEBUGGING: Uncomment if you want to see what URL it loads: //print "==LOADING: $url ==\n"; $fp = popen(str_replace("URL", $url, $lynx), "r"); $found_results = false; $end_results = false; $found_references = false; $result = 1; $items = array(); while($b = fgets($fp, 1024)) { $b = trim($b); if($b == "") continue; if($found_references) { foreach($items as $i=>$item) { $num = $item["index"].". "; if(substr($b, 0, strlen($num)) == $num) { $items[$i]["url"] = str_replace($num, "", $b); } } } elseif($end_results) { if($b == "References") $found_references = true; } elseif($found_results) { if($b == $result."." || $b == "References" || substr($b,0,strlen("Searches related to")) == "Searches related to") { if($b == "References") $found_references = true; $end_results = true; } else { $b = str_replace("|", " ", $b); $b = str_replace("[", "|", $b); $b = str_replace("]", "|", $b); $ba = explode("|", $b); if(sizeof($ba)==3) { $ba[0] = trim($ba[0]); $ba[1] = trim($ba[1]); $ba[2] = trim($ba[2]); $ba[2] = preg_replace("/<[^>]*>([^<]*)<\/[^>]*>/", "$1", $ba[2]); if($ba[0] == $result.".") { if($print) print "."; //print "Added item $result : ".$ba[2]."\n"; $items[$result] = array(); $items[$result]["index"] = $ba[1]; $items[$result]["name"] = $ba[2]; $result++; } } elseif($b{0} == "$" && $result>1) { $items[$result-1]["price"] = floatval(str_replace("$","",$b)); //print "I found ".($result-1)." : price of $b = ".$items[$result-1]["price"]."\n"; } } } elseif($b == "Search Results") { $found_results = true; } } fclose($fp); foreach($items as $idx=>$item) { if($item["price"]<0.01) continue; $banned = false; foreach($banned_sites as $bs) { if(strpos($item["url"], $bs) !== false) { unset($items[$idx]); $banned = true; } } foreach($banned_items as $bs) { if(stripos($item["name"], $bs) !== false) { unset($items[$idx]); $banned = true; } } if($banned) continue; $item["count"] = 99999; $url = "http://www.google.com/search?hl=en&tbs=shop%3A1&aq=f&q=".urlencode($item["name"]); $fp = popen(str_replace("URL", $url, $lynx), "r"); while($b = fgets($fp, 1024)) { $b = trim($b); if(strpos($b, "result") !== false && strpos($b, "second") !== false) { $ba = explode(" ", $b); foreach($ba as $bb) { $bb = str_replace(",","",$bb); if($bb > 0 && intval($bb)."" == $bb) $item["count"] = intval($bb); } } } fclose($fp); if($print) print "+"; $master_items[] = $item; } if($print) print "\n"; } if($print) print "Done\n"; /** Bubble sort the products from most rare to most common **/ // There are rarely more than 20 products, bubble sort is fine. $done = false; while(!$done) { $done = true; for($i=1; $i $min_count) continue; print "\n".$item["name"]."\n"; print " $".number_format($item["price"],2)."\n"; print " ".$item["url"]."\n"; } } return $master_items; } // If you want to run this as a library, comment out the following line of code. cheepcheep(true); ?>