* @copyright 2010 Nic Jansma * @link http://www.nicj.net */ // ****************** fill in these options ****************** // // base working directory to store issues // $baseDir = '/home/foo/kindle/'; // // get images for the articles // $withImages = true; // // sender's email // $fromEmail = 'foo@foo.com'; // // your kindle's email // $toEmail = 'foo@kindle.com'; // // if true, gets a specific back-issue // $getBackIssue = false; $backIssueDate = '20090110'; $backIssueDateLong = 'Jan 10, 2009'; // // economist.com login and password // $GLOBALS['loginEmail'] = 'foo@foo.com'; $GLOBALS['loginPass'] = 'password'; // ****************** fill in above options ****************** // where to store cookies $GLOBALS['cookieJarFile'] = $baseDir . 'economist-cookies.txt'; // unlimited execution time set_time_limit(0); // // log in and set cookies // economistLogin(); // // validate we have access // if (economistValidateAccess() === false) { echo "Could not log in to economist.com: Username or password mismatch!\n"; exit; } // // set $date and $dateLong of issue to get // $date = ''; $dateLong = ''; if ($getBackIssue) { $date = $backIssueDate; $dateLong = $backIssueDateLong; } else { economistGetCurrentIssueDate($date, $dateLong); } // create work dir $dateDirectory = $baseDir . "economist_$date"; if (is_dir($dateDirectory)) { echo "No new Economists! Already have economist_$date.\n"; exit; } echo "Economist $dateLong ($date)\n"; mkdir($dateDirectory); // work vars $urls = array(); $ids = array(); // // page generation // createOPF($dateDirectory, $date, $dateLong); createTOC($dateDirectory, $date, $dateLong, $urls, $ids); createHTML($dateDirectory, $withImages, $urls, $ids); // // mobi conversion // $opfFile = "{$dateDirectory}/economist_$date.opf"; $mobiFile = "{$dateDirectory}/economist_$date.mobi"; echo "Running: /usr/local/bin/mobigen_linux -c1 $opfFile... "; system("/usr/local/bin/mobigen_linux -c1 $opfFile"); echo " done!\n"; // // email // emailEconomist($fromEmail, $toEmail, $mobiFile, "economist_$date.mobi"); /** * Validate access to economist.com * * @return bool True if access to economist.com is valid * */ function economistValidateAccess() { $homePageContents = economistGetUrl('http://www.economist.com/printedition/'); if (strpos($homePageContents, 'This page is now available to subscribers only.') !== false) { return false; } else { return true; } } /** * Gets the current Economist issue date * * @param string &$date Date (20100102 form) * @param string &$dateLong Long date (January 2, 2010 form) * * @return void * */ function economistGetCurrentIssueDate(&$date, &$dateLong) { $homePageContents = economistGetUrl('http://www.economist.com/printedition/'); // January 17th 2009 $start = strpos($homePageContents, '') + strlen(''); $end = strpos($homePageContents, ' 'January', '02' => 'February', '03' => 'March', '04' => 'April', '05' => 'May', '06' => 'June', '07' => 'July', '08' => 'August', '09' => 'September', '10' => 'October', '11' => 'November', '12' => 'December', ); $pieces = explode(' ', $pageDate); $day = (strlen($pieces[1]) === 4) ? substr($pieces[1], 0, 2) : '0' . substr($pieces[1], 0, 1); return $pieces[2] . array_search($pieces[0], $monthnames) . $day; } /** * Converts January 17th 2009 to Jan 17, 2009 * * @param string $pageDate The page's date (eg. January 17th 2009) * * @return string Long date format (eg. Jan 17, 2009) * */ function convertDateLong($pageDate) { $pieces = explode(' ', $pageDate); return substr($pieces[0], 0, 3) . ' ' . substr($pieces[1], 0, -2) . ', ' . $pieces[2]; } /** * Creates an OPF file * * @param string $dateDirectory Issue's directory * @param string $date Issue's date * @param string $dateLong Issue's long date * * @return void * */ function createOPF($dateDirectory, $date, $dateLong) { echo "Creating OPF file: economist_$date.opf..."; $opfFile = $dateDirectory . "/economist_$date.opf"; $opfh = fopen($opfFile, 'w'); fwrite($opfh, ' The Economist en-us 69D99D4B30 , '.$dateLong.' '); fclose($opfh); echo " done!\n"; } /** * Creates a Table of Contents * * @param string $dateDirectory Issue's directory * @param string $date Issue's date * @param string $dateLong Issue's long date * @param array &$urls URLs of all articles (output) * @param array &$ids Article IDs (output) * * @return void * */ function createTOC($dateDirectory, $date, $dateLong, &$urls, &$ids) { echo 'Creating table of contents file: mbp_toc.html...'; $pageContents = economistGetUrl("http://www.economist.com/printedition/index.cfm?d=$date"); $pageContents = strstr($pageContents, '
'); $searchPos = false; $endPos = 0; while (($searchPos = strpos($pageContents, '
', $endPos)) !== false) { $endPos = $searchPos + strlen(''); } $pageContents = substr($pageContents, 0, $endPos); // remove banners $pageContents = preg_replace('/
([[:space:]])*
([[:space:]])*
(.*?)<\/div>([[:space:]])*<\/div>([[:space:]])*<\/div>/s', '', $pageContents); // remove british flags $pageContents = preg_replace('/\[Britain only\]/', '', $pageContents); $pageContents = preg_replace('/

\s*Articles flagged with this icon are printed only in the British edition of The Economist<\/em><\/p>/s', '', $pageContents); // remove right column stuff $pageContents = preg_replace('/