twitterimport.php 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608
  1. <?php
  2. // This file is part of GNU social - https://www.gnu.org/software/social
  3. //
  4. // GNU social is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU Affero General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // GNU social is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU Affero General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU Affero General Public License
  15. // along with GNU social. If not, see <http://www.gnu.org/licenses/>.
  16. /**
  17. * @category Plugin
  18. * @package GNUsocial
  19. * @author Zach Copley <zach@status.net>
  20. * @author Julien C <chaumond@gmail.com>
  21. * @author Brion Vibber <brion@status.net>
  22. * @copyright 2009-2010 StatusNet, Inc.
  23. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  24. */
  25. defined('GNUSOCIAL') || die();
  26. require_once dirname(__DIR__) . '/twitter.php';
  27. /**
  28. * Encapsulation of the Twitter status -> notice incoming bridge import.
  29. * Is used by both the polling twitterstatusfetcher.php daemon, and the
  30. * in-progress streaming import.
  31. *
  32. * @copyright 2009-2010 StatusNet, Inc.
  33. * @license https://www.gnu.org/licenses/agpl.html GNU AGPL v3 or later
  34. */
  35. class TwitterImport
  36. {
  37. public $avatarsizename = 'reasonably_small'; // a Twitter size name for 128x128 px
  38. public $avatarsize = 128; // they're square...
  39. public function importStatus($status)
  40. {
  41. // Hacktastic: filter out stuff coming from this StatusNet
  42. $source = mb_strtolower(common_config('integration', 'source'));
  43. if (preg_match("/$source/", mb_strtolower($status->source))) {
  44. common_debug(__METHOD__ . ' - Skipping import of status ' .
  45. twitter_id($status) . " with source {$source}");
  46. return null;
  47. }
  48. // Don't save it if the user is protected
  49. // FIXME: save it but treat it as private
  50. if ($status->user->protected) {
  51. return null;
  52. }
  53. $notice = $this->saveStatus($status);
  54. return $notice;
  55. }
  56. public function name()
  57. {
  58. return get_class($this);
  59. }
  60. public function saveStatus($status)
  61. {
  62. $profile = $this->ensureProfile($status->user);
  63. if (empty($profile)) {
  64. common_log(LOG_ERR, __METHOD__ . ' - Problem saving notice. No associated Profile.');
  65. return null;
  66. }
  67. $statusId = twitter_id($status);
  68. $statusUri = $this->makeStatusURI($status->user->screen_name, $statusId);
  69. // check to see if we've already imported the status
  70. $n2s = Notice_to_status::getKV('status_id', $statusId);
  71. if (!empty($n2s)) {
  72. common_log(
  73. LOG_INFO,
  74. __METHOD__ . " - Ignoring duplicate import: {$statusId}"
  75. );
  76. return Notice::getKV('id', $n2s->notice_id);
  77. }
  78. $dupe = Notice::getKV('uri', $statusUri);
  79. if ($dupe instanceof Notice) {
  80. // Add it to our record
  81. Notice_to_status::saveNew($dupe->id, $statusId);
  82. common_log(
  83. LOG_INFO,
  84. __METHOD__ . " - Ignoring duplicate import: {$statusId}"
  85. );
  86. return $dupe;
  87. }
  88. // If it's a retweet, save it as a repeat!
  89. if (!empty($status->retweeted_status)) {
  90. common_log(LOG_INFO, "Status {$statusId} is a retweet of " . twitter_id($status->retweeted_status) . ".");
  91. $original = $this->saveStatus($status->retweeted_status);
  92. if (empty($original)) {
  93. return null;
  94. } else {
  95. $author = $original->getProfile();
  96. // TRANS: Message used to repeat a notice. RT is the abbreviation of 'retweet'.
  97. // TRANS: %1$s is the repeated user's name, %2$s is the repeated notice.
  98. $content = sprintf(
  99. _m('RT @%1$s %2$s'),
  100. $author->nickname,
  101. $original->content
  102. );
  103. if (Notice::contentTooLong($content)) {
  104. $contentlimit = Notice::maxContent();
  105. $content = mb_substr($content, 0, $contentlimit - 4) . ' ...';
  106. }
  107. $repeat = Notice::saveNew(
  108. $profile->id,
  109. $content,
  110. 'twitter',
  111. [
  112. 'repeat_of' => $original->id,
  113. 'uri' => $statusUri,
  114. 'is_local' => Notice::GATEWAY,
  115. 'object_type' => ActivityObject::NOTE,
  116. 'verb' => ActivityVerb::POST,
  117. ]
  118. );
  119. common_log(LOG_INFO, "Saved {$repeat->id} as a repeat of {$original->id}");
  120. Notice_to_status::saveNew($repeat->id, $statusId);
  121. return $repeat;
  122. }
  123. }
  124. $notice = new Notice();
  125. $notice->profile_id = $profile->id;
  126. $notice->uri = $statusUri;
  127. $notice->url = $statusUri;
  128. $notice->verb = ActivityVerb::POST;
  129. $notice->object_type = ActivityObject::NOTE;
  130. $notice->created = strftime(
  131. '%Y-%m-%d %H:%M:%S',
  132. strtotime($status->created_at)
  133. );
  134. $notice->source = 'twitter';
  135. $notice->reply_to = null;
  136. $replyTo = twitter_id($status, 'in_reply_to_status_id');
  137. if (!empty($replyTo)) {
  138. common_log(LOG_INFO, "Status {$statusId} is a reply to status {$replyTo}");
  139. $n2s = Notice_to_status::getKV('status_id', $replyTo);
  140. if (empty($n2s)) {
  141. common_log(LOG_INFO, "Couldn't find local notice for status {$replyTo}");
  142. } else {
  143. $reply = Notice::getKV('id', $n2s->notice_id);
  144. if (empty($reply)) {
  145. common_log(LOG_INFO, "Couldn't find local notice for status {$replyTo}");
  146. } else {
  147. common_log(LOG_INFO, "Found local notice {$reply->id} for status {$replyTo}");
  148. $notice->reply_to = $reply->id;
  149. $notice->conversation = $reply->conversation;
  150. }
  151. }
  152. }
  153. $notice->is_local = Notice::GATEWAY;
  154. $notice->content = html_entity_decode($this->linkify($status, false), ENT_QUOTES, 'UTF-8');
  155. $notice->rendered = $this->linkify($status, true);
  156. if (Event::handle('StartNoticeSave', array(&$notice))) {
  157. if (empty($notice->conversation)) {
  158. $conv = Conversation::create();
  159. common_log(LOG_INFO, "No known conversation for status {$statusId} so a new one ({$conv->getID()}) was created.");
  160. $notice->conversation = $conv->getID();
  161. }
  162. $id = $notice->insert();
  163. if ($id === false) {
  164. common_log_db_error($notice, 'INSERT', __FILE__);
  165. common_log(LOG_ERR, __METHOD__ . ' - Problem saving notice.');
  166. }
  167. Event::handle('EndNoticeSave', array($notice));
  168. }
  169. Notice_to_status::saveNew($notice->id, $statusId);
  170. $this->saveStatusMentions($notice, $status);
  171. $this->saveStatusAttachments($notice, $status);
  172. $notice->blowOnInsert();
  173. return $notice;
  174. }
  175. /**
  176. * Make an URI for a status.
  177. *
  178. * @param object $status status object
  179. *
  180. * @return string URI
  181. */
  182. public function makeStatusURI($username, $id)
  183. {
  184. return 'https://twitter.com/'
  185. . $username
  186. . '/status/'
  187. . $id;
  188. }
  189. /**
  190. * Look up a Profile by profileurl field. Profile::getKV() was
  191. * not working consistently.
  192. *
  193. * @param string $nickname local nickname of the Twitter user
  194. * @param string $profileurl the profile url
  195. *
  196. * @return mixed value the first Profile with that url, or null
  197. */
  198. protected function getProfileByUrl($nickname, $profileurl)
  199. {
  200. $profile = new Profile();
  201. $profile->nickname = $nickname;
  202. $profile->profileurl = $profileurl;
  203. $profile->limit(1);
  204. if (!$profile->find(true)) {
  205. $profile->profileurl = str_replace('https://', 'http://', $profileurl);
  206. if (!$profile->find(true)) {
  207. throw new NoResultException($profile);
  208. }
  209. }
  210. return $profile;
  211. }
  212. protected function ensureProfile($twuser)
  213. {
  214. // check to see if there's already a profile for this user
  215. $profileurl = 'https://twitter.com/' . $twuser->screen_name;
  216. try {
  217. $profile = $this->getProfileByUrl($twuser->screen_name, $profileurl);
  218. $this->updateAvatar($twuser, $profile);
  219. return $profile;
  220. } catch (NoResultException $e) {
  221. common_debug(__METHOD__ . ' - Adding profile and remote profile ' .
  222. "for Twitter user: $profileurl.");
  223. }
  224. $profile = new Profile();
  225. $profile->query('START TRANSACTION');
  226. $profile->nickname = $twuser->screen_name;
  227. $profile->fullname = $twuser->name;
  228. $profile->homepage = $twuser->url;
  229. $profile->bio = $twuser->description;
  230. $profile->location = $twuser->location;
  231. $profile->profileurl = $profileurl;
  232. $profile->created = common_sql_now();
  233. try {
  234. $id = $profile->insert(); // insert _should_ throw exception on failure
  235. if (empty($id)) {
  236. throw new Exception('Failed insert');
  237. }
  238. } catch (Exception $e) {
  239. common_log(LOG_WARNING, __METHOD__ . " Couldn't insert profile: " . $e->getMessage());
  240. common_log_db_error($profile, 'INSERT', __FILE__);
  241. $profile->query("ROLLBACK");
  242. return false;
  243. }
  244. $profile->query("COMMIT");
  245. $this->updateAvatar($twuser, $profile);
  246. return $profile;
  247. }
  248. /*
  249. * Checks whether we have to update the profile's avatar
  250. *
  251. * @return true when updated, false on failure, null when no action taken
  252. */
  253. protected function updateAvatar($twuser, Profile $profile)
  254. {
  255. $path_parts = pathinfo($twuser->profile_image_url);
  256. $ext = isset($path_parts['extension'])
  257. ? '.'.$path_parts['extension']
  258. : ''; // some lack extension
  259. $img_root = basename($path_parts['basename'], '_normal'.$ext); // cut off extension
  260. $filename = "Twitter_{$twuser->id}_{$img_root}_{$this->avatarsizename}{$ext}";
  261. try {
  262. $avatar = Avatar::getUploaded($profile);
  263. if ($avatar->filename === $filename) {
  264. return null;
  265. }
  266. common_debug(sprintf(
  267. '%s - Updating profile avatar (profile_id=%d) from %s to %s',
  268. __METHOD__,
  269. $profile->id,
  270. $avatar->filename,
  271. $filename
  272. ));
  273. // else we continue with creating a new avatar
  274. } catch (NoAvatarException $e) {
  275. // Avatar was not found. We can catch NoAvatarException or FileNotFoundException
  276. // but generally we just want to continue creating a new avatar.
  277. common_debug(__METHOD__ . " - No avatar found for (profile_id={$profile->id})");
  278. }
  279. $url = "{$path_parts['dirname']}/{$img_root}_{$this->avatarsizename}{$ext}";
  280. $mediatype = $this->getMediatype(mb_substr($ext, 1));
  281. try {
  282. $this->newAvatar($profile, $url, $filename, $mediatype);
  283. } catch (Exception $e) {
  284. if (file_exists(Avatar::path($filename))) {
  285. unlink(Avatar::path($filename));
  286. }
  287. return false;
  288. }
  289. return true;
  290. }
  291. protected function getMediatype($ext)
  292. {
  293. $mediatype = null;
  294. switch (strtolower($ext)) {
  295. case 'jpeg':
  296. case 'jpg':
  297. $mediatype = 'image/jpeg';
  298. break;
  299. case 'gif':
  300. $mediatype = 'image/gif';
  301. break;
  302. default:
  303. $mediatype = 'image/png';
  304. }
  305. return $mediatype;
  306. }
  307. protected function newAvatar(Profile $profile, $url, $filename, $mediatype)
  308. {
  309. // Clear out old avatars, won't do anything if there are none
  310. Avatar::deleteFromProfile($profile);
  311. // throws exception if unable to fetch
  312. $this->fetchRemoteUrl($url, Avatar::path($filename));
  313. $avatar = new Avatar();
  314. $avatar->profile_id = $profile->id;
  315. $avatar->original = true; // this is an original/"uploaded" avatar
  316. $avatar->mediatype = $mediatype;
  317. $avatar->filename = $filename;
  318. $avatar->width = $this->avatarsize;
  319. $avatar->height = $this->avatarsize;
  320. $avatar->created = common_sql_now();
  321. $id = $avatar->insert();
  322. if (empty($id)) {
  323. common_log(LOG_WARNING, __METHOD__ . " Couldn't insert avatar - " . $e->getMessage());
  324. common_log_db_error($avatar, 'INSERT', __FILE__);
  325. throw new ServerException('Could not insert avatar');
  326. }
  327. common_debug(__METHOD__ . " - Saved new avatar for {$profile->id}.");
  328. return $avatar;
  329. }
  330. /**
  331. * Fetch a remote avatar image and save to local storage.
  332. *
  333. * @param string $url avatar source URL
  334. * @param string $filename bare local filename for download
  335. * @return bool true on success, false on failure
  336. */
  337. protected function fetchRemoteUrl($url, $filename)
  338. {
  339. common_debug(__METHOD__ . " - Fetching Twitter avatar: {$url} to {$filename}");
  340. $request = HTTPClient::start();
  341. $request->setConfig('connect_timeout', 3); // I had problems with throttling
  342. $request->setConfig('timeout', 6); // and locking the process sucks.
  343. $response = $request->get($url);
  344. if ($response->isOk()) {
  345. if (!file_put_contents($filename, $response->getBody())) {
  346. throw new ServerException('Failed saving fetched file');
  347. }
  348. } else {
  349. throw new Exception('Unexpected HTTP status code');
  350. }
  351. return true;
  352. }
  353. const URL = 1;
  354. const HASHTAG = 2;
  355. const MENTION = 3;
  356. public function linkify($status, $html = false)
  357. {
  358. $text = $status->text;
  359. if (empty($status->entities)) {
  360. $statusId = twitter_id($status);
  361. common_log(LOG_WARNING, "No entities data for {$statusId}; trying to fake up links ourselves.");
  362. $text = common_replace_urls_callback($text, 'common_linkify');
  363. $text = preg_replace_callback(
  364. '/(^|\&quot\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/',
  365. function ($m) {
  366. return $m[1] . '#'.TwitterStatusFetcher::tagLink($m[2]);
  367. },
  368. $text
  369. );
  370. $text = preg_replace_callback(
  371. '/(^|\s+)@([a-z0-9A-Z_]{1,64})/',
  372. function ($m) {
  373. return $m[1] . '@'.TwitterStatusFetcher::atLink($m[2]);
  374. },
  375. $text
  376. );
  377. return $text;
  378. }
  379. // Move all the entities into order so we can
  380. // replace them and escape surrounding plaintext
  381. // in order
  382. $toReplace = array();
  383. if (!empty($status->entities->urls)) {
  384. foreach ($status->entities->urls as $url) {
  385. $toReplace[$url->indices[0]] = array(self::URL, $url);
  386. }
  387. }
  388. if (!empty($status->entities->hashtags)) {
  389. foreach ($status->entities->hashtags as $hashtag) {
  390. $toReplace[$hashtag->indices[0]] = array(self::HASHTAG, $hashtag);
  391. }
  392. }
  393. if (!empty($status->entities->user_mentions)) {
  394. foreach ($status->entities->user_mentions as $mention) {
  395. $toReplace[$mention->indices[0]] = array(self::MENTION, $mention);
  396. }
  397. }
  398. // sort in forward order by key
  399. ksort($toReplace);
  400. $result = '';
  401. $cursor = 0;
  402. foreach ($toReplace as $part) {
  403. list($type, $object) = $part;
  404. $start = $object->indices[0];
  405. $end = $object->indices[1];
  406. if ($cursor < $start) {
  407. // Copy in the preceding plaintext
  408. $result .= $this->twitEscape(mb_substr($text, $cursor, $start - $cursor));
  409. $cursor = $start;
  410. }
  411. $orig = $this->twitEscape(mb_substr($text, $start, $end - $start));
  412. switch ($type) {
  413. case self::URL:
  414. $linkText = $this->makeUrlLink($object, $orig, $html);
  415. break;
  416. case self::HASHTAG:
  417. if ($html) {
  418. $linkText = $this->makeHashtagLink($object, $orig);
  419. } else {
  420. $linkText = $orig;
  421. }
  422. break;
  423. case self::MENTION:
  424. if ($html) {
  425. $linkText = $this->makeMentionLink($object, $orig);
  426. } else {
  427. $linkText = $orig;
  428. }
  429. break;
  430. default:
  431. $linkText = $orig;
  432. continue;
  433. }
  434. $result .= $linkText;
  435. $cursor = $end;
  436. }
  437. $last = $this->twitEscape(mb_substr($text, $cursor));
  438. $result .= $last;
  439. return $result;
  440. }
  441. public function twitEscape($str)
  442. {
  443. // Twitter seems to preemptive turn < and > into &lt; and &gt;
  444. // but doesn't for &, so while you may have some magic protection
  445. // against XSS by not bothing to escape manually, you still get
  446. // invalid XHTML. Thanks!
  447. //
  448. // Looks like their web interface pretty much sends anything
  449. // through intact, so.... to do equivalent, decode all entities
  450. // and then re-encode the special ones.
  451. return htmlspecialchars(html_entity_decode($str, ENT_COMPAT, 'UTF-8'));
  452. }
  453. public function makeUrlLink($object, $orig, $html)
  454. {
  455. if ($html) {
  456. return '<a href="'.htmlspecialchars($object->expanded_url).'" class="extlink">'.htmlspecialchars($object->display_url).'</a>';
  457. } else {
  458. return htmlspecialchars($object->expanded_url);
  459. }
  460. }
  461. public function makeHashtagLink($object, $orig)
  462. {
  463. return "#" . self::tagLink($object->text, substr($orig, 1));
  464. }
  465. public function makeMentionLink($object, $orig)
  466. {
  467. return "@".self::atLink($object->screen_name, $object->name, substr($orig, 1));
  468. }
  469. public static function tagLink($tag, $orig)
  470. {
  471. return "<a href='https://twitter.com/search?q=%23{$tag}' class='hashtag'>{$orig}</a>";
  472. }
  473. public static function atLink($screenName, $fullName, $orig)
  474. {
  475. if (!empty($fullName)) {
  476. return "<a href='https://twitter.com/{$screenName}' title='{$fullName}'>{$orig}</a>";
  477. } else {
  478. return "<a href='https://twitter.com/{$screenName}'>{$orig}</a>";
  479. }
  480. }
  481. public function saveStatusMentions($notice, $status)
  482. {
  483. $mentions = array();
  484. if (empty($status->entities) || empty($status->entities->user_mentions)) {
  485. return;
  486. }
  487. foreach ($status->entities->user_mentions as $mention) {
  488. try {
  489. $flink = Foreign_link::getByForeignID($mention->id, TWITTER_SERVICE);
  490. $user = $flink->getUser();
  491. $reply = new Reply();
  492. $reply->notice_id = $notice->id;
  493. $reply->profile_id = $user->id;
  494. $reply->modified = $notice->created;
  495. common_log(LOG_INFO, __METHOD__ . ": saving reply: notice {$notice->id} to profile {$user->id}");
  496. $id = $reply->insert();
  497. } catch (NoSuchUserException $e) {
  498. common_log(LOG_WARNING, 'No local user found for Foreign_link with id: '.$mention->id);
  499. } catch (NoResultException $e) {
  500. common_log(LOG_WARNING, 'No foreign link or profile found for Foreign_link with id: '.$mention->id);
  501. }
  502. }
  503. }
  504. /**
  505. * Record URL links from the notice. Needed to get thumbnail records
  506. * for referenced photo and video posts, etc.
  507. *
  508. * @param Notice $notice
  509. * @param object $status
  510. */
  511. public function saveStatusAttachments(Notice $notice, $status)
  512. {
  513. if (common_config('attachments', 'process_links')) {
  514. if (!empty($status->entities) && !empty($status->entities->urls)) {
  515. foreach ($status->entities->urls as $url) {
  516. try {
  517. File::processNew($url->url, $notice);
  518. } catch (ServerException $e) {
  519. // Could not process attached URL
  520. }
  521. }
  522. }
  523. }
  524. }
  525. }