eensyweensy

a poor man's webspider
git clone https://wehaveforgeathome.hates.computer/eensyweensy.git
Log | Files | Refs | LICENSE

commit 5274c248b6e7a2d2541d34bb0a245002769da159
parent bf7e9be04666a5fa32c1fe5432f2c243c0f56829
Author: Ryan Wolf <rwolf@borderstylo.com>
Date:   Mon, 22 Feb 2010 22:59:39 -0800

everything seems to be working, now on to the blog post

Diffstat:
Meensyweensy.php | 23+++++++++++++++++++++--
Meensyweensy.user.js | 16+++++++++-------
2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/eensyweensy.php b/eensyweensy.php @@ -12,7 +12,8 @@ if (!isset($_POST["data"])) { $data = json_decode(stripslashes($_REQUEST["data"]), true); $results = array(); -if (is_null($data["on_last_page"]) || !is_array($data["urls"])) { +if (is_null($data["href"]) || is_null($data["on_last_page"]) + || !is_array($data["urls"])) { $results["error"] = "validation error: missing on_last_page or urls"; } else { @@ -23,7 +24,25 @@ else { fwrite($file_handle, "$url\n"); } fclose($file_handle); - $results["error"] = "yay"; + + if ($data["on_last_page"] == "true") { + $results["url"] = "http://hampsterdance.com"; + } + else { + function next_url ($matches) { + if (count($matches) == 2) { + return $matches[1] . "?page=2"; + } + $page = (int) $matches[3]; + $page++; + return $matches[1] . "?page=$page"; + }; + $results["url"] = preg_replace_callback( + "/^([^\?]+)(\?page=(\d*))?$/", + "next_url", + $data["href"] + ); + } } echo json_encode($results); diff --git a/eensyweensy.user.js b/eensyweensy.user.js @@ -12,7 +12,7 @@ var Spider = function () { // defaults this.jquery = 'http://jquery.com/src/jquery-latest.js'; this.home = 'http://localhost/eensyweensy/eensyweensy.php'; - this.grabber = function ($) {}; + this.grabber = function ($) { return {}; }; }; Spider.prototype.insert_jquery = function () { @@ -27,15 +27,15 @@ Spider.prototype.insert_jquery = function () { window.setTimeout(GM_wait, 100); } else { - that.$ = unsafeWindow.jQuery; - that.grab_data(); + var $ = unsafeWindow.jQuery; + that.grab_data($); } } GM_wait(); }; Spider.prototype.grab_data = function ($) { - var data = this.grabber(this.$); + var data = this.grabber($); this.phone_home(data); }; @@ -53,17 +53,19 @@ Spider.prototype.phone_home = function (data) { }; Spider.prototype.handle_results = function (response) { - if (!response) { alert("no response"); return; } var data; // JSON.parse barfs top-level errors on failure try { data = JSON.parse(response.responseText); } catch (e) { - alert(JSON.stringify(response)); // debugging parties! + alert("debugging party:\n" + JSON.stringify(response)); return; } if (data.error) { alert(data.error); return; } + if (!data.url) { alert("no error, but no url to go to"); return; } + + document.location.assign(data.url); }; Spider.prototype.go = function () { @@ -80,7 +82,7 @@ spider.grabber = function ($) { var next_page_button = $('.next_page'); var on_last_page = next_page_button.hasClass('disabled'); var data = { - href: document.location.href; + href: document.location.href, urls: urls, on_last_page: on_last_page, };