twitterimport.php 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. <?php
  2. /**
  3. * StatusNet, the distributed open-source microblogging tool
  4. *
  5. * PHP version 5
  6. *
  7. * LICENCE: This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Affero General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Affero General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Affero General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. *
  20. * @category Plugin
  21. * @package StatusNet
  22. * @author Zach Copley <zach@status.net>
  23. * @author Julien C <chaumond@gmail.com>
  24. * @author Brion Vibber <brion@status.net>
  25. * @copyright 2009-2010 StatusNet, Inc.
  26. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  27. * @link http://status.net/
  28. */
  29. if (!defined('STATUSNET')) {
  30. exit(1);
  31. }
  32. require_once dirname(__DIR__) . '/twitter.php';
  33. /**
  34. * Encapsulation of the Twitter status -> notice incoming bridge import.
  35. * Is used by both the polling twitterstatusfetcher.php daemon, and the
  36. * in-progress streaming import.
  37. *
  38. * @category Plugin
  39. * @package StatusNet
  40. * @author Zach Copley <zach@status.net>
  41. * @author Julien C <chaumond@gmail.com>
  42. * @author Brion Vibber <brion@status.net>
  43. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  44. * @link http://status.net/
  45. * @link http://twitter.com/
  46. */
  47. class TwitterImport
  48. {
  49. public $avatarsizename = 'reasonably_small'; // a Twitter size name for 128x128 px
  50. public $avatarsize = 128; // they're square...
  51. public function importStatus($status)
  52. {
  53. // Hacktastic: filter out stuff coming from this StatusNet
  54. $source = mb_strtolower(common_config('integration', 'source'));
  55. if (preg_match("/$source/", mb_strtolower($status->source))) {
  56. common_debug(__METHOD__ . ' - Skipping import of status ' .
  57. twitter_id($status) . " with source {$source}");
  58. return null;
  59. }
  60. // Don't save it if the user is protected
  61. // FIXME: save it but treat it as private
  62. if ($status->user->protected) {
  63. return null;
  64. }
  65. $notice = $this->saveStatus($status);
  66. return $notice;
  67. }
  68. function name()
  69. {
  70. return get_class($this);
  71. }
  72. function saveStatus($status)
  73. {
  74. $profile = $this->ensureProfile($status->user);
  75. if (empty($profile)) {
  76. common_log(LOG_ERR, __METHOD__ . ' - Problem saving notice. No associated Profile.');
  77. return null;
  78. }
  79. $statusId = twitter_id($status);
  80. $statusUri = $this->makeStatusURI($status->user->screen_name, $statusId);
  81. // check to see if we've already imported the status
  82. $n2s = Notice_to_status::getKV('status_id', $statusId);
  83. if (!empty($n2s)) {
  84. common_log(
  85. LOG_INFO,
  86. __METHOD__ . " - Ignoring duplicate import: {$statusId}"
  87. );
  88. return Notice::getKV('id', $n2s->notice_id);
  89. }
  90. $dupe = Notice::getKV('uri', $statusUri);
  91. if($dupe instanceof Notice) {
  92. // Add it to our record
  93. Notice_to_status::saveNew($dupe->id, $statusId);
  94. common_log(
  95. LOG_INFO,
  96. __METHOD__ . " - Ignoring duplicate import: {$statusId}"
  97. );
  98. return $dupe;
  99. }
  100. // If it's a retweet, save it as a repeat!
  101. if (!empty($status->retweeted_status)) {
  102. common_log(LOG_INFO, "Status {$statusId} is a retweet of " . twitter_id($status->retweeted_status) . ".");
  103. $original = $this->saveStatus($status->retweeted_status);
  104. if (empty($original)) {
  105. return null;
  106. } else {
  107. $author = $original->getProfile();
  108. // TRANS: Message used to repeat a notice. RT is the abbreviation of 'retweet'.
  109. // TRANS: %1$s is the repeated user's name, %2$s is the repeated notice.
  110. $content = sprintf(_m('RT @%1$s %2$s'),
  111. $author->nickname,
  112. $original->content);
  113. if (Notice::contentTooLong($content)) {
  114. $contentlimit = Notice::maxContent();
  115. $content = mb_substr($content, 0, $contentlimit - 4) . ' ...';
  116. }
  117. $repeat = Notice::saveNew($profile->id,
  118. $content,
  119. 'twitter',
  120. array('repeat_of' => $original->id,
  121. 'uri' => $statusUri,
  122. 'is_local' => Notice::GATEWAY));
  123. common_log(LOG_INFO, "Saved {$repeat->id} as a repeat of {$original->id}");
  124. Notice_to_status::saveNew($repeat->id, $statusId);
  125. return $repeat;
  126. }
  127. }
  128. $notice = new Notice();
  129. $notice->profile_id = $profile->id;
  130. $notice->uri = $statusUri;
  131. $notice->url = $statusUri;
  132. $notice->verb = ActivityVerb::POST;
  133. $notice->created = strftime(
  134. '%Y-%m-%d %H:%M:%S',
  135. strtotime($status->created_at)
  136. );
  137. $notice->source = 'twitter';
  138. $notice->reply_to = null;
  139. $replyTo = twitter_id($status, 'in_reply_to_status_id');
  140. if (!empty($replyTo)) {
  141. common_log(LOG_INFO, "Status {$statusId} is a reply to status {$replyTo}");
  142. $n2s = Notice_to_status::getKV('status_id', $replyTo);
  143. if (empty($n2s)) {
  144. common_log(LOG_INFO, "Couldn't find local notice for status {$replyTo}");
  145. } else {
  146. $reply = Notice::getKV('id', $n2s->notice_id);
  147. if (empty($reply)) {
  148. common_log(LOG_INFO, "Couldn't find local notice for status {$replyTo}");
  149. } else {
  150. common_log(LOG_INFO, "Found local notice {$reply->id} for status {$replyTo}");
  151. $notice->reply_to = $reply->id;
  152. $notice->conversation = $reply->conversation;
  153. }
  154. }
  155. }
  156. $notice->is_local = Notice::GATEWAY;
  157. $notice->content = html_entity_decode($this->linkify($status, FALSE), ENT_QUOTES, 'UTF-8');
  158. $notice->rendered = $this->linkify($status, TRUE);
  159. if (Event::handle('StartNoticeSave', array(&$notice))) {
  160. if (empty($notice->conversation)) {
  161. $conv = Conversation::create();
  162. common_log(LOG_INFO, "No known conversation for status {$statusId} so a new one ({$conv->getID()}) was created.");
  163. $notice->conversation = $conv->getID();
  164. }
  165. $id = $notice->insert();
  166. if ($id === false) {
  167. common_log_db_error($notice, 'INSERT', __FILE__);
  168. common_log(LOG_ERR, __METHOD__ . ' - Problem saving notice.');
  169. }
  170. Event::handle('EndNoticeSave', array($notice));
  171. }
  172. Notice_to_status::saveNew($notice->id, $statusId);
  173. $this->saveStatusMentions($notice, $status);
  174. $this->saveStatusAttachments($notice, $status);
  175. $notice->blowOnInsert();
  176. return $notice;
  177. }
  178. /**
  179. * Make an URI for a status.
  180. *
  181. * @param object $status status object
  182. *
  183. * @return string URI
  184. */
  185. function makeStatusURI($username, $id)
  186. {
  187. return 'https://twitter.com/'
  188. . $username
  189. . '/status/'
  190. . $id;
  191. }
  192. /**
  193. * Look up a Profile by profileurl field. Profile::getKV() was
  194. * not working consistently.
  195. *
  196. * @param string $nickname local nickname of the Twitter user
  197. * @param string $profileurl the profile url
  198. *
  199. * @return mixed value the first Profile with that url, or null
  200. */
  201. protected function getProfileByUrl($nickname, $profileurl)
  202. {
  203. $profile = new Profile();
  204. $profile->nickname = $nickname;
  205. $profile->profileurl = $profileurl;
  206. $profile->limit(1);
  207. if (!$profile->find(true)) {
  208. $profile->profileurl = str_replace('https://', 'http://', $profileurl);
  209. if (!$profile->find(true)) {
  210. throw new NoResultException($profile);
  211. }
  212. }
  213. return $profile;
  214. }
  215. protected function ensureProfile($twuser)
  216. {
  217. // check to see if there's already a profile for this user
  218. $profileurl = 'https://twitter.com/' . $twuser->screen_name;
  219. try {
  220. $profile = $this->getProfileByUrl($twuser->screen_name, $profileurl);
  221. $this->updateAvatar($twuser, $profile);
  222. return $profile;
  223. } catch (NoResultException $e) {
  224. common_debug(__METHOD__ . ' - Adding profile and remote profile ' .
  225. "for Twitter user: $profileurl.");
  226. }
  227. $profile = new Profile();
  228. $profile->query("BEGIN");
  229. $profile->nickname = $twuser->screen_name;
  230. $profile->fullname = $twuser->name;
  231. $profile->homepage = $twuser->url;
  232. $profile->bio = $twuser->description;
  233. $profile->location = $twuser->location;
  234. $profile->profileurl = $profileurl;
  235. $profile->created = common_sql_now();
  236. try {
  237. $id = $profile->insert(); // insert _should_ throw exception on failure
  238. if (empty($id)) {
  239. throw new Exception('Failed insert');
  240. }
  241. } catch(Exception $e) {
  242. common_log(LOG_WARNING, __METHOD__ . " Couldn't insert profile: " . $e->getMessage());
  243. common_log_db_error($profile, 'INSERT', __FILE__);
  244. $profile->query("ROLLBACK");
  245. return false;
  246. }
  247. $profile->query("COMMIT");
  248. $this->updateAvatar($twuser, $profile);
  249. return $profile;
  250. }
  251. /*
  252. * Checks whether we have to update the profile's avatar
  253. *
  254. * @return true when updated, false on failure, null when no action taken
  255. */
  256. protected function updateAvatar($twuser, Profile $profile)
  257. {
  258. $path_parts = pathinfo($twuser->profile_image_url);
  259. $ext = isset($path_parts['extension'])
  260. ? '.'.$path_parts['extension']
  261. : ''; // some lack extension
  262. $img_root = basename($path_parts['basename'], '_normal'.$ext); // cut off extension
  263. $filename = "Twitter_{$twuser->id}_{$img_root}_{$this->avatarsizename}{$ext}";
  264. try {
  265. $avatar = Avatar::getUploaded($profile);
  266. if ($avatar->filename === $filename) {
  267. return null;
  268. }
  269. common_debug(__METHOD__ . " - Updating profile avatar (profile_id={$profile->id}) " .
  270. "from {$avatar->filename} to {$filename}");
  271. // else we continue with creating a new avatar
  272. } catch (NoAvatarException $e) {
  273. // Avatar was not found. We can catch NoAvatarException or FileNotFoundException
  274. // but generally we just want to continue creating a new avatar.
  275. common_debug(__METHOD__ . " - No avatar found for (profile_id={$profile->id})");
  276. }
  277. $url = "{$path_parts['dirname']}/{$img_root}_{$this->avatarsizename}{$ext}";
  278. $mediatype = $this->getMediatype(mb_substr($ext, 1));
  279. try {
  280. $this->newAvatar($profile, $url, $filename, $mediatype);
  281. } catch (Exception $e) {
  282. if (file_exists(Avatar::path($filename))) {
  283. unlink(Avatar::path($filename));
  284. }
  285. return false;
  286. }
  287. return true;
  288. }
  289. protected function getMediatype($ext)
  290. {
  291. $mediatype = null;
  292. switch (strtolower($ext)) {
  293. case 'jpeg':
  294. case 'jpg':
  295. $mediatype = 'image/jpeg';
  296. break;
  297. case 'gif':
  298. $mediatype = 'image/gif';
  299. break;
  300. default:
  301. $mediatype = 'image/png';
  302. }
  303. return $mediatype;
  304. }
  305. protected function newAvatar(Profile $profile, $url, $filename, $mediatype)
  306. {
  307. // Clear out old avatars, won't do anything if there are none
  308. Avatar::deleteFromProfile($profile);
  309. // throws exception if unable to fetch
  310. $this->fetchRemoteUrl($url, Avatar::path($filename));
  311. $avatar = new Avatar();
  312. $avatar->profile_id = $profile->id;
  313. $avatar->original = 1; // this is an original/"uploaded" avatar
  314. $avatar->mediatype = $mediatype;
  315. $avatar->filename = $filename;
  316. $avatar->width = $this->avatarsize;
  317. $avatar->height = $this->avatarsize;
  318. $avatar->created = common_sql_now();
  319. $id = $avatar->insert();
  320. if (empty($id)) {
  321. common_log(LOG_WARNING, __METHOD__ . " Couldn't insert avatar - " . $e->getMessage());
  322. common_log_db_error($avatar, 'INSERT', __FILE__);
  323. throw new ServerException('Could not insert avatar');
  324. }
  325. common_debug(__METHOD__ . " - Saved new avatar for {$profile->id}.");
  326. return $avatar;
  327. }
  328. /**
  329. * Fetch a remote avatar image and save to local storage.
  330. *
  331. * @param string $url avatar source URL
  332. * @param string $filename bare local filename for download
  333. * @return bool true on success, false on failure
  334. */
  335. protected function fetchRemoteUrl($url, $filename)
  336. {
  337. common_debug(__METHOD__ . " - Fetching Twitter avatar: {$url} to {$filename}");
  338. $request = HTTPClient::start();
  339. $request->setConfig('connect_timeout', 3); // I had problems with throttling
  340. $request->setConfig('timeout', 6); // and locking the process sucks.
  341. $response = $request->get($url);
  342. if ($response->isOk()) {
  343. if (!file_put_contents($filename, $response->getBody())) {
  344. throw new ServerException('Failed saving fetched file');
  345. }
  346. } else {
  347. throw new Exception('Unexpected HTTP status code');
  348. }
  349. return true;
  350. }
  351. const URL = 1;
  352. const HASHTAG = 2;
  353. const MENTION = 3;
  354. function linkify($status, $html = FALSE)
  355. {
  356. $text = $status->text;
  357. if (empty($status->entities)) {
  358. $statusId = twitter_id($status);
  359. common_log(LOG_WARNING, "No entities data for {$statusId}; trying to fake up links ourselves.");
  360. $text = common_replace_urls_callback($text, 'common_linkify');
  361. $text = preg_replace_callback('/(^|\&quot\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/',
  362. function ($m) { return $m[1].'#'.TwitterStatusFetcher::tagLink($m[2]); }, $text);
  363. $text = preg_replace_callback('/(^|\s+)@([a-z0-9A-Z_]{1,64})/',
  364. function ($m) { return $m[1].'@'.TwitterStatusFetcher::atLink($m[2]); }, $text);
  365. return $text;
  366. }
  367. // Move all the entities into order so we can
  368. // replace them and escape surrounding plaintext
  369. // in order
  370. $toReplace = array();
  371. if (!empty($status->entities->urls)) {
  372. foreach ($status->entities->urls as $url) {
  373. $toReplace[$url->indices[0]] = array(self::URL, $url);
  374. }
  375. }
  376. if (!empty($status->entities->hashtags)) {
  377. foreach ($status->entities->hashtags as $hashtag) {
  378. $toReplace[$hashtag->indices[0]] = array(self::HASHTAG, $hashtag);
  379. }
  380. }
  381. if (!empty($status->entities->user_mentions)) {
  382. foreach ($status->entities->user_mentions as $mention) {
  383. $toReplace[$mention->indices[0]] = array(self::MENTION, $mention);
  384. }
  385. }
  386. // sort in forward order by key
  387. ksort($toReplace);
  388. $result = '';
  389. $cursor = 0;
  390. foreach ($toReplace as $part) {
  391. list($type, $object) = $part;
  392. $start = $object->indices[0];
  393. $end = $object->indices[1];
  394. if ($cursor < $start) {
  395. // Copy in the preceding plaintext
  396. $result .= $this->twitEscape(mb_substr($text, $cursor, $start - $cursor));
  397. $cursor = $start;
  398. }
  399. $orig = $this->twitEscape(mb_substr($text, $start, $end - $start));
  400. switch($type) {
  401. case self::URL:
  402. $linkText = $this->makeUrlLink($object, $orig, $html);
  403. break;
  404. case self::HASHTAG:
  405. if ($html) {
  406. $linkText = $this->makeHashtagLink($object, $orig);
  407. }else{
  408. $linkText = $orig;
  409. }
  410. break;
  411. case self::MENTION:
  412. if ($html) {
  413. $linkText = $this->makeMentionLink($object, $orig);
  414. }else{
  415. $linkText = $orig;
  416. }
  417. break;
  418. default:
  419. $linkText = $orig;
  420. continue;
  421. }
  422. $result .= $linkText;
  423. $cursor = $end;
  424. }
  425. $last = $this->twitEscape(mb_substr($text, $cursor));
  426. $result .= $last;
  427. return $result;
  428. }
  429. function twitEscape($str)
  430. {
  431. // Twitter seems to preemptive turn < and > into &lt; and &gt;
  432. // but doesn't for &, so while you may have some magic protection
  433. // against XSS by not bothing to escape manually, you still get
  434. // invalid XHTML. Thanks!
  435. //
  436. // Looks like their web interface pretty much sends anything
  437. // through intact, so.... to do equivalent, decode all entities
  438. // and then re-encode the special ones.
  439. return htmlspecialchars(html_entity_decode($str, ENT_COMPAT, 'UTF-8'));
  440. }
  441. function makeUrlLink($object, $orig, $html)
  442. {
  443. if ($html) {
  444. return '<a href="'.htmlspecialchars($object->expanded_url).'" class="extlink">'.htmlspecialchars($object->display_url).'</a>';
  445. }else{
  446. return htmlspecialchars($object->expanded_url);
  447. }
  448. }
  449. function makeHashtagLink($object, $orig)
  450. {
  451. return "#" . self::tagLink($object->text, substr($orig, 1));
  452. }
  453. function makeMentionLink($object, $orig)
  454. {
  455. return "@".self::atLink($object->screen_name, $object->name, substr($orig, 1));
  456. }
  457. static function tagLink($tag, $orig)
  458. {
  459. return "<a href='https://twitter.com/search?q=%23{$tag}' class='hashtag'>{$orig}</a>";
  460. }
  461. static function atLink($screenName, $fullName, $orig)
  462. {
  463. if (!empty($fullName)) {
  464. return "<a href='https://twitter.com/{$screenName}' title='{$fullName}'>{$orig}</a>";
  465. } else {
  466. return "<a href='https://twitter.com/{$screenName}'>{$orig}</a>";
  467. }
  468. }
  469. function saveStatusMentions($notice, $status)
  470. {
  471. $mentions = array();
  472. if (empty($status->entities) || empty($status->entities->user_mentions)) {
  473. return;
  474. }
  475. foreach ($status->entities->user_mentions as $mention) {
  476. try {
  477. $flink = Foreign_link::getByForeignID($mention->id, TWITTER_SERVICE);
  478. $user = $flink->getUser();
  479. $reply = new Reply();
  480. $reply->notice_id = $notice->id;
  481. $reply->profile_id = $user->id;
  482. $reply->modified = $notice->created;
  483. common_log(LOG_INFO, __METHOD__ . ": saving reply: notice {$notice->id} to profile {$user->id}");
  484. $id = $reply->insert();
  485. } catch (NoSuchUserException $e) {
  486. common_log(LOG_WARNING, 'No local user found for Foreign_link with id: '.$mention->id);
  487. } catch (NoResultException $e) {
  488. common_log(LOG_WARNING, 'No foreign link or profile found for Foreign_link with id: '.$mention->id);
  489. }
  490. }
  491. }
  492. /**
  493. * Record URL links from the notice. Needed to get thumbnail records
  494. * for referenced photo and video posts, etc.
  495. *
  496. * @param Notice $notice
  497. * @param object $status
  498. */
  499. function saveStatusAttachments(Notice $notice, $status)
  500. {
  501. if (common_config('attachments', 'process_links')) {
  502. if (!empty($status->entities) && !empty($status->entities->urls)) {
  503. foreach ($status->entities->urls as $url) {
  504. try {
  505. File::processNew($url->url, $notice);
  506. } catch (ServerException $e) {
  507. // Could not process attached URL
  508. }
  509. }
  510. }
  511. }
  512. }
  513. }