twitterimport.php 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582
  1. <?php
  2. /**
  3. * StatusNet, the distributed open-source microblogging tool
  4. *
  5. * PHP version 5
  6. *
  7. * LICENCE: This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Affero General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Affero General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Affero General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. *
  20. * @category Plugin
  21. * @package StatusNet
  22. * @author Zach Copley <zach@status.net>
  23. * @author Julien C <chaumond@gmail.com>
  24. * @author Brion Vibber <brion@status.net>
  25. * @copyright 2009-2010 StatusNet, Inc.
  26. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  27. * @link http://status.net/
  28. */
  29. if (!defined('STATUSNET')) {
  30. exit(1);
  31. }
  32. require_once dirname(__DIR__) . '/twitter.php';
  33. /**
  34. * Encapsulation of the Twitter status -> notice incoming bridge import.
  35. * Is used by both the polling twitterstatusfetcher.php daemon, and the
  36. * in-progress streaming import.
  37. *
  38. * @category Plugin
  39. * @package StatusNet
  40. * @author Zach Copley <zach@status.net>
  41. * @author Julien C <chaumond@gmail.com>
  42. * @author Brion Vibber <brion@status.net>
  43. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
  44. * @link http://status.net/
  45. * @link http://twitter.com/
  46. */
  47. class TwitterImport
  48. {
  49. public $avatarsizename = 'reasonably_small'; // a Twitter size name for 128x128 px
  50. public $avatarsize = 128; // they're square...
  51. public function importStatus($status)
  52. {
  53. // Hacktastic: filter out stuff coming from this StatusNet
  54. $source = mb_strtolower(common_config('integration', 'source'));
  55. if (preg_match("/$source/", mb_strtolower($status->source))) {
  56. common_debug(__METHOD__ . ' - Skipping import of status ' .
  57. twitter_id($status) . " with source {$source}");
  58. return null;
  59. }
  60. // Don't save it if the user is protected
  61. // FIXME: save it but treat it as private
  62. if ($status->user->protected) {
  63. return null;
  64. }
  65. $notice = $this->saveStatus($status);
  66. return $notice;
  67. }
  68. function name()
  69. {
  70. return get_class($this);
  71. }
  72. function saveStatus($status)
  73. {
  74. $profile = $this->ensureProfile($status->user);
  75. if (empty($profile)) {
  76. common_log(LOG_ERR, __METHOD__ . ' - Problem saving notice. No associated Profile.');
  77. return null;
  78. }
  79. $statusId = twitter_id($status);
  80. $statusUri = $this->makeStatusURI($status->user->screen_name, $statusId);
  81. // check to see if we've already imported the status
  82. $n2s = Notice_to_status::getKV('status_id', $statusId);
  83. if (!empty($n2s)) {
  84. common_log(
  85. LOG_INFO,
  86. __METHOD__ . " - Ignoring duplicate import: {$statusId}"
  87. );
  88. return Notice::getKV('id', $n2s->notice_id);
  89. }
  90. // If it's a retweet, save it as a repeat!
  91. if (!empty($status->retweeted_status)) {
  92. common_log(LOG_INFO, "Status {$statusId} is a retweet of " . twitter_id($status->retweeted_status) . ".");
  93. $original = $this->saveStatus($status->retweeted_status);
  94. if (empty($original)) {
  95. return null;
  96. } else {
  97. $author = $original->getProfile();
  98. // TRANS: Message used to repeat a notice. RT is the abbreviation of 'retweet'.
  99. // TRANS: %1$s is the repeated user's name, %2$s is the repeated notice.
  100. $content = sprintf(_m('RT @%1$s %2$s'),
  101. $author->nickname,
  102. $original->content);
  103. if (Notice::contentTooLong($content)) {
  104. $contentlimit = Notice::maxContent();
  105. $content = mb_substr($content, 0, $contentlimit - 4) . ' ...';
  106. }
  107. $repeat = Notice::saveNew($profile->id,
  108. $content,
  109. 'twitter',
  110. array('repeat_of' => $original->id,
  111. 'uri' => $statusUri,
  112. 'is_local' => Notice::GATEWAY));
  113. common_log(LOG_INFO, "Saved {$repeat->id} as a repeat of {$original->id}");
  114. Notice_to_status::saveNew($repeat->id, $statusId);
  115. return $repeat;
  116. }
  117. }
  118. $notice = new Notice();
  119. $notice->profile_id = $profile->id;
  120. $notice->uri = $statusUri;
  121. $notice->url = $statusUri;
  122. $notice->created = strftime(
  123. '%Y-%m-%d %H:%M:%S',
  124. strtotime($status->created_at)
  125. );
  126. $notice->source = 'twitter';
  127. $notice->reply_to = null;
  128. $replyTo = twitter_id($status, 'in_reply_to_status_id');
  129. if (!empty($replyTo)) {
  130. common_log(LOG_INFO, "Status {$statusId} is a reply to status {$replyTo}");
  131. $n2s = Notice_to_status::getKV('status_id', $replyTo);
  132. if (empty($n2s)) {
  133. common_log(LOG_INFO, "Couldn't find local notice for status {$replyTo}");
  134. } else {
  135. $reply = Notice::getKV('id', $n2s->notice_id);
  136. if (empty($reply)) {
  137. common_log(LOG_INFO, "Couldn't find local notice for status {$replyTo}");
  138. } else {
  139. common_log(LOG_INFO, "Found local notice {$reply->id} for status {$replyTo}");
  140. $notice->reply_to = $reply->id;
  141. $notice->conversation = $reply->conversation;
  142. }
  143. }
  144. }
  145. $notice->is_local = Notice::GATEWAY;
  146. $notice->content = html_entity_decode($this->linkify($status, FALSE), ENT_QUOTES, 'UTF-8');
  147. $notice->rendered = $this->linkify($status, TRUE);
  148. if (Event::handle('StartNoticeSave', array(&$notice))) {
  149. $id = $notice->insert();
  150. if ($id === false) {
  151. common_log_db_error($notice, 'INSERT', __FILE__);
  152. common_log(LOG_ERR, __METHOD__ . ' - Problem saving notice.');
  153. }
  154. if (empty($notice->conversation)) {
  155. $orig = clone($notice);
  156. $conv = Conversation::create($notice);
  157. common_log(LOG_INFO, "No known conversation for status {$statusId} so a new one ({$conv->id}) was created.");
  158. $notice->conversation = $conv->id;
  159. $notice->update($orig);
  160. }
  161. Event::handle('EndNoticeSave', array($notice));
  162. }
  163. Notice_to_status::saveNew($notice->id, $statusId);
  164. $this->saveStatusMentions($notice, $status);
  165. $this->saveStatusAttachments($notice, $status);
  166. $notice->blowOnInsert();
  167. return $notice;
  168. }
  169. /**
  170. * Make an URI for a status.
  171. *
  172. * @param object $status status object
  173. *
  174. * @return string URI
  175. */
  176. function makeStatusURI($username, $id)
  177. {
  178. return 'http://twitter.com/#!/'
  179. . $username
  180. . '/status/'
  181. . $id;
  182. }
  183. /**
  184. * Look up a Profile by profileurl field. Profile::getKV() was
  185. * not working consistently.
  186. *
  187. * @param string $nickname local nickname of the Twitter user
  188. * @param string $profileurl the profile url
  189. *
  190. * @return mixed value the first Profile with that url, or null
  191. */
  192. protected function getProfileByUrl($nickname, $profileurl)
  193. {
  194. $profile = new Profile();
  195. $profile->nickname = $nickname;
  196. $profile->profileurl = $profileurl;
  197. $profile->limit(1);
  198. if (!$profile->find(true)) {
  199. throw new NoResultException($profile);
  200. }
  201. return $profile;
  202. }
  203. protected function ensureProfile($twuser)
  204. {
  205. // check to see if there's already a profile for this user
  206. $profileurl = 'http://twitter.com/' . $twuser->screen_name;
  207. try {
  208. $profile = $this->getProfileByUrl($twuser->screen_name, $profileurl);
  209. $this->updateAvatar($twuser, $profile);
  210. return $profile;
  211. } catch (NoResultException $e) {
  212. common_debug(__METHOD__ . ' - Adding profile and remote profile ' .
  213. "for Twitter user: $profileurl.");
  214. }
  215. $profile = new Profile();
  216. $profile->query("BEGIN");
  217. $profile->nickname = $twuser->screen_name;
  218. $profile->fullname = $twuser->name;
  219. $profile->homepage = $twuser->url;
  220. $profile->bio = $twuser->description;
  221. $profile->location = $twuser->location;
  222. $profile->profileurl = $profileurl;
  223. $profile->created = common_sql_now();
  224. try {
  225. $id = $profile->insert(); // insert _should_ throw exception on failure
  226. if (empty($id)) {
  227. throw new Exception('Failed insert');
  228. }
  229. } catch(Exception $e) {
  230. common_log(LOG_WARNING, __METHOD__ . " Couldn't insert profile: " . $e->getMessage());
  231. common_log_db_error($profile, 'INSERT', __FILE__);
  232. $profile->query("ROLLBACK");
  233. return false;
  234. }
  235. $profile->query("COMMIT");
  236. $this->updateAvatar($twuser, $profile);
  237. return $profile;
  238. }
  239. /*
  240. * Checks whether we have to update the profile's avatar
  241. *
  242. * @return true when updated, false on failure, null when no action taken
  243. */
  244. protected function updateAvatar($twuser, Profile $profile)
  245. {
  246. $path_parts = pathinfo($twuser->profile_image_url);
  247. $ext = isset($path_parts['extension'])
  248. ? '.'.$path_parts['extension']
  249. : ''; // some lack extension
  250. $img_root = basename($path_parts['basename'], '_normal'.$ext); // cut off extension
  251. $filename = "Twitter_{$twuser->id}_{$img_root}_{$this->avatarsizename}{$ext}";
  252. try {
  253. $avatar = Avatar::getUploaded($profile);
  254. if ($avatar->filename === $filename) {
  255. return null;
  256. }
  257. common_debug(__METHOD__ . " - Updating profile avatar (profile_id={$profile->id}) " .
  258. "from {$avatar->filename} to {$filename}");
  259. // else we continue with creating a new avatar
  260. } catch (NoAvatarException $e) {
  261. // Avatar was not found. We can catch NoAvatarException or FileNotFoundException
  262. // but generally we just want to continue creating a new avatar.
  263. common_debug(__METHOD__ . " - No avatar found for (profile_id={$profile->id})");
  264. }
  265. $url = "{$path_parts['dirname']}/{$img_root}_{$this->avatarsizename}{$ext}";
  266. $mediatype = $this->getMediatype(mb_substr($ext, 1));
  267. try {
  268. $this->newAvatar($profile, $url, $filename, $mediatype);
  269. } catch (Exception $e) {
  270. if (file_exists(Avatar::path($filename))) {
  271. unlink(Avatar::path($filename));
  272. }
  273. return false;
  274. }
  275. return true;
  276. }
  277. protected function getMediatype($ext)
  278. {
  279. $mediatype = null;
  280. switch (strtolower($ext)) {
  281. case 'jpeg':
  282. case 'jpg':
  283. $mediatype = 'image/jpeg';
  284. break;
  285. case 'gif':
  286. $mediatype = 'image/gif';
  287. break;
  288. default:
  289. $mediatype = 'image/png';
  290. }
  291. return $mediatype;
  292. }
  293. protected function newAvatar(Profile $profile, $url, $filename, $mediatype)
  294. {
  295. // Clear out old avatars, won't do anything if there are none
  296. Avatar::deleteFromProfile($profile);
  297. // throws exception if unable to fetch
  298. $this->fetchRemoteUrl($url, Avatar::path($filename));
  299. $avatar = new Avatar();
  300. $avatar->profile_id = $profile->id;
  301. $avatar->original = 1; // this is an original/"uploaded" avatar
  302. $avatar->mediatype = $mediatype;
  303. $avatar->filename = $filename;
  304. $avatar->url = Avatar::url($filename);
  305. $avatar->width = $this->avatarsize;
  306. $avatar->height = $this->avatarsize;
  307. $avatar->created = common_sql_now();
  308. $id = $avatar->insert();
  309. if (empty($id)) {
  310. common_log(LOG_WARNING, __METHOD__ . " Couldn't insert avatar - " . $e->getMessage());
  311. common_log_db_error($avatar, 'INSERT', __FILE__);
  312. throw new ServerException('Could not insert avatar');
  313. }
  314. common_debug(__METHOD__ . " - Saved new avatar for {$profile->id}.");
  315. return $avatar;
  316. }
  317. /**
  318. * Fetch a remote avatar image and save to local storage.
  319. *
  320. * @param string $url avatar source URL
  321. * @param string $filename bare local filename for download
  322. * @return bool true on success, false on failure
  323. */
  324. protected function fetchRemoteUrl($url, $filename)
  325. {
  326. common_debug(__METHOD__ . " - Fetching Twitter avatar: {$url} to {$filename}");
  327. $request = HTTPClient::start();
  328. $request->setConfig('connect_timeout', 3); // I had problems with throttling
  329. $request->setConfig('timeout', 6); // and locking the process sucks.
  330. $response = $request->get($url);
  331. if ($response->isOk()) {
  332. if (!file_put_contents($filename, $response->getBody())) {
  333. throw new ServerException('Failed saving fetched file');
  334. }
  335. } else {
  336. throw new Exception('Unexpected HTTP status code');
  337. }
  338. return true;
  339. }
  340. const URL = 1;
  341. const HASHTAG = 2;
  342. const MENTION = 3;
  343. function linkify($status, $html = FALSE)
  344. {
  345. $text = $status->text;
  346. if (empty($status->entities)) {
  347. $statusId = twitter_id($status);
  348. common_log(LOG_WARNING, "No entities data for {$statusId}; trying to fake up links ourselves.");
  349. $text = common_replace_urls_callback($text, 'common_linkify');
  350. $text = preg_replace_callback('/(^|\&quot\;|\'|\(|\[|\{|\s+)#([\pL\pN_\-\.]{1,64})/',
  351. function ($m) { return $m[1].'#'.TwitterStatusFetcher::tagLink($m[2]); }, $text);
  352. $text = preg_replace_callback('/(^|\s+)@([a-z0-9A-Z_]{1,64})/',
  353. function ($m) { return $m[1].'@'.TwitterStatusFetcher::atLink($m[2]); }, $text);
  354. return $text;
  355. }
  356. // Move all the entities into order so we can
  357. // replace them and escape surrounding plaintext
  358. // in order
  359. $toReplace = array();
  360. if (!empty($status->entities->urls)) {
  361. foreach ($status->entities->urls as $url) {
  362. $toReplace[$url->indices[0]] = array(self::URL, $url);
  363. }
  364. }
  365. if (!empty($status->entities->hashtags)) {
  366. foreach ($status->entities->hashtags as $hashtag) {
  367. $toReplace[$hashtag->indices[0]] = array(self::HASHTAG, $hashtag);
  368. }
  369. }
  370. if (!empty($status->entities->user_mentions)) {
  371. foreach ($status->entities->user_mentions as $mention) {
  372. $toReplace[$mention->indices[0]] = array(self::MENTION, $mention);
  373. }
  374. }
  375. // sort in forward order by key
  376. ksort($toReplace);
  377. $result = '';
  378. $cursor = 0;
  379. foreach ($toReplace as $part) {
  380. list($type, $object) = $part;
  381. $start = $object->indices[0];
  382. $end = $object->indices[1];
  383. if ($cursor < $start) {
  384. // Copy in the preceding plaintext
  385. $result .= $this->twitEscape(mb_substr($text, $cursor, $start - $cursor));
  386. $cursor = $start;
  387. }
  388. $orig = $this->twitEscape(mb_substr($text, $start, $end - $start));
  389. switch($type) {
  390. case self::URL:
  391. $linkText = $this->makeUrlLink($object, $orig, $html);
  392. break;
  393. case self::HASHTAG:
  394. if ($html) {
  395. $linkText = $this->makeHashtagLink($object, $orig);
  396. }else{
  397. $linkText = $orig;
  398. }
  399. break;
  400. case self::MENTION:
  401. if ($html) {
  402. $linkText = $this->makeMentionLink($object, $orig);
  403. }else{
  404. $linkText = $orig;
  405. }
  406. break;
  407. default:
  408. $linkText = $orig;
  409. continue;
  410. }
  411. $result .= $linkText;
  412. $cursor = $end;
  413. }
  414. $last = $this->twitEscape(mb_substr($text, $cursor));
  415. $result .= $last;
  416. return $result;
  417. }
  418. function twitEscape($str)
  419. {
  420. // Twitter seems to preemptive turn < and > into &lt; and &gt;
  421. // but doesn't for &, so while you may have some magic protection
  422. // against XSS by not bothing to escape manually, you still get
  423. // invalid XHTML. Thanks!
  424. //
  425. // Looks like their web interface pretty much sends anything
  426. // through intact, so.... to do equivalent, decode all entities
  427. // and then re-encode the special ones.
  428. return htmlspecialchars(html_entity_decode($str, ENT_COMPAT, 'UTF-8'));
  429. }
  430. function makeUrlLink($object, $orig, $html)
  431. {
  432. if ($html) {
  433. return '<a href="'.htmlspecialchars($object->expanded_url).'" class="extlink">'.htmlspecialchars($object->display_url).'</a>';
  434. }else{
  435. return htmlspecialchars($object->expanded_url);
  436. }
  437. }
  438. function makeHashtagLink($object, $orig)
  439. {
  440. return "#" . self::tagLink($object->text, substr($orig, 1));
  441. }
  442. function makeMentionLink($object, $orig)
  443. {
  444. return "@".self::atLink($object->screen_name, $object->name, substr($orig, 1));
  445. }
  446. static function tagLink($tag, $orig)
  447. {
  448. return "<a href='https://twitter.com/search?q=%23{$tag}' class='hashtag'>{$orig}</a>";
  449. }
  450. static function atLink($screenName, $fullName, $orig)
  451. {
  452. if (!empty($fullName)) {
  453. return "<a href='http://twitter.com/#!/{$screenName}' title='{$fullName}'>{$orig}</a>";
  454. } else {
  455. return "<a href='http://twitter.com/#!/{$screenName}'>{$orig}</a>";
  456. }
  457. }
  458. function saveStatusMentions($notice, $status)
  459. {
  460. $mentions = array();
  461. if (empty($status->entities) || empty($status->entities->user_mentions)) {
  462. return;
  463. }
  464. foreach ($status->entities->user_mentions as $mention) {
  465. $flink = Foreign_link::getByForeignID($mention->id, TWITTER_SERVICE);
  466. if (!empty($flink)) {
  467. $user = User::getKV('id', $flink->user_id);
  468. if (!empty($user)) {
  469. $reply = new Reply();
  470. $reply->notice_id = $notice->id;
  471. $reply->profile_id = $user->id;
  472. $reply->modified = $notice->created;
  473. common_log(LOG_INFO, __METHOD__ . ": saving reply: notice {$notice->id} to profile {$user->id}");
  474. $id = $reply->insert();
  475. }
  476. }
  477. }
  478. }
  479. /**
  480. * Record URL links from the notice. Needed to get thumbnail records
  481. * for referenced photo and video posts, etc.
  482. *
  483. * @param Notice $notice
  484. * @param object $status
  485. */
  486. function saveStatusAttachments($notice, $status)
  487. {
  488. if (common_config('attachments', 'process_links')) {
  489. if (!empty($status->entities) && !empty($status->entities->urls)) {
  490. foreach ($status->entities->urls as $url) {
  491. try {
  492. File::processNew($url->url, $notice->id);
  493. } catch (ServerException $e) {
  494. // Could not process attached URL
  495. }
  496. }
  497. }
  498. }
  499. }
  500. }