importtwitteratom.php 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. #!/usr/bin/env php
  2. <?php
  3. /*
  4. * StatusNet - the distributed open-source microblogging tool
  5. * Copyright (C) 2010 StatusNet, Inc.
  6. *
  7. * This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Affero General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Affero General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Affero General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. define('INSTALLDIR', dirname(__DIR__));
  21. define('PUBLICDIR', INSTALLDIR . DIRECTORY_SEPARATOR . 'public');
  22. $shortoptions = 'i:n:f:';
  23. $longoptions = array('id=', 'nickname=', 'file=');
  24. $helptext = <<<END_OF_IMPORTTWITTERATOM_HELP
  25. importtwitteratom.php [options]
  26. import an Atom feed from Twitter as notices by a user
  27. -i --id ID of user to update
  28. -n --nickname nickname of the user to update
  29. -f --file file to import (Atom-only for now)
  30. END_OF_IMPORTTWITTERATOM_HELP;
  31. require_once INSTALLDIR.'/scripts/commandline.inc';
  32. function getAtomFeedDocument()
  33. {
  34. $filename = get_option_value('f', 'file');
  35. if (empty($filename)) {
  36. show_help();
  37. exit(1);
  38. }
  39. if (!file_exists($filename)) {
  40. throw new Exception("No such file '$filename'.");
  41. }
  42. if (!is_file($filename)) {
  43. throw new Exception("Not a regular file: '$filename'.");
  44. }
  45. if (!is_readable($filename)) {
  46. throw new Exception("File '$filename' not readable.");
  47. }
  48. $xml = file_get_contents($filename);
  49. $dom = DOMDocument::loadXML($xml);
  50. if ($dom->documentElement->namespaceURI != Activity::ATOM ||
  51. $dom->documentElement->localName != 'feed') {
  52. throw new Exception("'$filename' is not an Atom feed.");
  53. }
  54. return $dom;
  55. }
  56. function importActivityStream($user, $doc)
  57. {
  58. $feed = $doc->documentElement;
  59. $entries = $feed->getElementsByTagNameNS(Activity::ATOM, 'entry');
  60. for ($i = $entries->length - 1; $i >= 0; $i--) {
  61. $entry = $entries->item($i);
  62. $activity = new Activity($entry, $feed);
  63. $object = $activity->objects[0];
  64. if (!have_option('q', 'quiet')) {
  65. print $activity->content . "\n";
  66. }
  67. $html = common_purify(getTweetHtml($object->link));
  68. $content = html_entity_decode(strip_tags($html), ENT_QUOTES, 'UTF-8');
  69. $notice = Notice::saveNew($user->id,
  70. $content,
  71. 'importtwitter',
  72. array('uri' => $object->id,
  73. 'url' => $object->link,
  74. 'rendered' => $html,
  75. 'created' => common_sql_date($activity->time),
  76. 'replies' => array(),
  77. 'groups' => array()));
  78. }
  79. }
  80. function getTweetHtml($url)
  81. {
  82. try {
  83. $client = new HTTPClient();
  84. $response = $client->get($url);
  85. } catch (Exception $e) {
  86. print "ERROR: HTTP response " . $e->getMessage() . "\n";
  87. return false;
  88. }
  89. if (!$response->isOk()) {
  90. print "ERROR: HTTP response " . $response->getCode() . "\n";
  91. return false;
  92. }
  93. $body = $response->getBody();
  94. return tweetHtmlFromBody($body);
  95. }
  96. function tweetHtmlFromBody($body)
  97. {
  98. $doc = DOMDocument::loadHTML($body);
  99. $xpath = new DOMXPath($doc);
  100. $spans = $xpath->query('//span[@class="entry-content"]');
  101. if ($spans->length == 0) {
  102. print "ERROR: No content in tweet page.\n";
  103. return '';
  104. }
  105. $span = $spans->item(0);
  106. $children = $span->childNodes;
  107. $text = '';
  108. for ($i = 0; $i < $children->length; $i++) {
  109. $child = $children->item($i);
  110. if ($child instanceof DOMElement &&
  111. $child->tagName == 'a' &&
  112. !preg_match('#^https?://#', $child->getAttribute('href'))) {
  113. $child->setAttribute('href', 'http://twitter.com' . $child->getAttribute('href'));
  114. }
  115. $text .= $doc->saveXML($child);
  116. }
  117. return $text;
  118. }
  119. try {
  120. $doc = getAtomFeedDocument();
  121. $user = getUser();
  122. importActivityStream($user, $doc);
  123. } catch (Exception $e) {
  124. print $e->getMessage()."\n";
  125. exit(1);
  126. }