123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164 |
- #!/usr/bin/env php
- <?php
- /*
- * StatusNet - the distributed open-source microblogging tool
- * Copyright (C) 2010 StatusNet, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Affero General Public License for more details.
- *
- * You should have received a copy of the GNU Affero General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- define('INSTALLDIR', dirname(__DIR__));
- define('PUBLICDIR', INSTALLDIR . DIRECTORY_SEPARATOR . 'public');
- $shortoptions = 'i:n:f:';
- $longoptions = array('id=', 'nickname=', 'file=');
- $helptext = <<<END_OF_IMPORTTWITTERATOM_HELP
- importtwitteratom.php [options]
- import an Atom feed from Twitter as notices by a user
- -i --id ID of user to update
- -n --nickname nickname of the user to update
- -f --file file to import (Atom-only for now)
- END_OF_IMPORTTWITTERATOM_HELP;
- require_once INSTALLDIR.'/scripts/commandline.inc';
- function getAtomFeedDocument()
- {
- $filename = get_option_value('f', 'file');
- if (empty($filename)) {
- show_help();
- exit(1);
- }
- if (!file_exists($filename)) {
- throw new Exception("No such file '$filename'.");
- }
- if (!is_file($filename)) {
- throw new Exception("Not a regular file: '$filename'.");
- }
- if (!is_readable($filename)) {
- throw new Exception("File '$filename' not readable.");
- }
- $xml = file_get_contents($filename);
- $dom = DOMDocument::loadXML($xml);
- if ($dom->documentElement->namespaceURI != Activity::ATOM ||
- $dom->documentElement->localName != 'feed') {
- throw new Exception("'$filename' is not an Atom feed.");
- }
- return $dom;
- }
- function importActivityStream($user, $doc)
- {
- $feed = $doc->documentElement;
- $entries = $feed->getElementsByTagNameNS(Activity::ATOM, 'entry');
- for ($i = $entries->length - 1; $i >= 0; $i--) {
- $entry = $entries->item($i);
- $activity = new Activity($entry, $feed);
- $object = $activity->objects[0];
- if (!have_option('q', 'quiet')) {
- print $activity->content . "\n";
- }
- $html = common_purify(getTweetHtml($object->link));
- $content = html_entity_decode(strip_tags($html), ENT_QUOTES, 'UTF-8');
- $notice = Notice::saveNew($user->id,
- $content,
- 'importtwitter',
- array('uri' => $object->id,
- 'url' => $object->link,
- 'rendered' => $html,
- 'created' => common_sql_date($activity->time),
- 'replies' => array(),
- 'groups' => array()));
- }
- }
- function getTweetHtml($url)
- {
- try {
- $client = new HTTPClient();
- $response = $client->get($url);
- } catch (Exception $e) {
- print "ERROR: HTTP response " . $e->getMessage() . "\n";
- return false;
- }
- if (!$response->isOk()) {
- print "ERROR: HTTP response " . $response->getCode() . "\n";
- return false;
- }
- $body = $response->getBody();
- return tweetHtmlFromBody($body);
- }
- function tweetHtmlFromBody($body)
- {
- $doc = DOMDocument::loadHTML($body);
- $xpath = new DOMXPath($doc);
- $spans = $xpath->query('//span[@class="entry-content"]');
- if ($spans->length == 0) {
- print "ERROR: No content in tweet page.\n";
- return '';
- }
- $span = $spans->item(0);
- $children = $span->childNodes;
- $text = '';
- for ($i = 0; $i < $children->length; $i++) {
- $child = $children->item($i);
- if ($child instanceof DOMElement &&
- $child->tagName == 'a' &&
- !preg_match('#^https?://#', $child->getAttribute('href'))) {
- $child->setAttribute('href', 'http://twitter.com' . $child->getAttribute('href'));
- }
- $text .= $doc->saveXML($child);
- }
- return $text;
- }
- try {
- $doc = getAtomFeedDocument();
- $user = getUser();
- importActivityStream($user, $doc);
- } catch (Exception $e) {
- print $e->getMessage()."\n";
- exit(1);
- }
|