importtwitteratom.php 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. #!/usr/bin/env php
  2. <?php
  3. /*
  4. * StatusNet - the distributed open-source microblogging tool
  5. * Copyright (C) 2010 StatusNet, Inc.
  6. *
  7. * This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Affero General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Affero General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Affero General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. define('INSTALLDIR', realpath(dirname(__FILE__) . '/..'));
  21. $shortoptions = 'i:n:f:';
  22. $longoptions = array('id=', 'nickname=', 'file=');
  23. $helptext = <<<END_OF_IMPORTTWITTERATOM_HELP
  24. importtwitteratom.php [options]
  25. import an Atom feed from Twitter as notices by a user
  26. -i --id ID of user to update
  27. -n --nickname nickname of the user to update
  28. -f --file file to import (Atom-only for now)
  29. END_OF_IMPORTTWITTERATOM_HELP;
  30. require_once INSTALLDIR.'/scripts/commandline.inc';
  31. function getAtomFeedDocument()
  32. {
  33. $filename = get_option_value('f', 'file');
  34. if (empty($filename)) {
  35. show_help();
  36. exit(1);
  37. }
  38. if (!file_exists($filename)) {
  39. throw new Exception("No such file '$filename'.");
  40. }
  41. if (!is_file($filename)) {
  42. throw new Exception("Not a regular file: '$filename'.");
  43. }
  44. if (!is_readable($filename)) {
  45. throw new Exception("File '$filename' not readable.");
  46. }
  47. $xml = file_get_contents($filename);
  48. $dom = DOMDocument::loadXML($xml);
  49. if ($dom->documentElement->namespaceURI != Activity::ATOM ||
  50. $dom->documentElement->localName != 'feed') {
  51. throw new Exception("'$filename' is not an Atom feed.");
  52. }
  53. return $dom;
  54. }
  55. function importActivityStream($user, $doc)
  56. {
  57. $feed = $doc->documentElement;
  58. $entries = $feed->getElementsByTagNameNS(Activity::ATOM, 'entry');
  59. for ($i = $entries->length - 1; $i >= 0; $i--) {
  60. $entry = $entries->item($i);
  61. $activity = new Activity($entry, $feed);
  62. $object = $activity->objects[0];
  63. if (!have_option('q', 'quiet')) {
  64. print $activity->content . "\n";
  65. }
  66. $html = common_purify(getTweetHtml($object->link));
  67. $content = html_entity_decode(strip_tags($html), ENT_QUOTES, 'UTF-8');
  68. $notice = Notice::saveNew($user->id,
  69. $content,
  70. 'importtwitter',
  71. array('uri' => $object->id,
  72. 'url' => $object->link,
  73. 'rendered' => $html,
  74. 'created' => common_sql_date($activity->time),
  75. 'replies' => array(),
  76. 'groups' => array()));
  77. }
  78. }
  79. function getTweetHtml($url)
  80. {
  81. try {
  82. $client = new HTTPClient();
  83. $response = $client->get($url);
  84. } catch (Exception $e) {
  85. print "ERROR: HTTP response " . $e->getMessage() . "\n";
  86. return false;
  87. }
  88. if (!$response->isOk()) {
  89. print "ERROR: HTTP response " . $response->getCode() . "\n";
  90. return false;
  91. }
  92. $body = $response->getBody();
  93. return tweetHtmlFromBody($body);
  94. }
  95. function tweetHtmlFromBody($body)
  96. {
  97. $doc = DOMDocument::loadHTML($body);
  98. $xpath = new DOMXPath($doc);
  99. $spans = $xpath->query('//span[@class="entry-content"]');
  100. if ($spans->length == 0) {
  101. print "ERROR: No content in tweet page.\n";
  102. return '';
  103. }
  104. $span = $spans->item(0);
  105. $children = $span->childNodes;
  106. $text = '';
  107. for ($i = 0; $i < $children->length; $i++) {
  108. $child = $children->item($i);
  109. if ($child instanceof DOMElement &&
  110. $child->tagName == 'a' &&
  111. !preg_match('#^https?://#', $child->getAttribute('href'))) {
  112. $child->setAttribute('href', 'http://twitter.com' . $child->getAttribute('href'));
  113. }
  114. $text .= $doc->saveXML($child);
  115. }
  116. return $text;
  117. }
  118. try {
  119. $doc = getAtomFeedDocument();
  120. $user = getUser();
  121. importActivityStream($user, $doc);
  122. } catch (Exception $e) {
  123. print $e->getMessage()."\n";
  124. exit(1);
  125. }