spamfilter.php 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. <?php
  2. /**
  3. * StatusNet - the distributed open-source microblogging tool
  4. * Copyright (C) 2012, StatusNet, Inc.
  5. *
  6. * Spam filter class
  7. *
  8. * PHP version 5
  9. *
  10. * This program is free software: you can redistribute it and/or modify
  11. * it under the terms of the GNU Affero General Public License as published by
  12. * the Free Software Foundation, either version 3 of the License, or
  13. * (at your option) any later version.
  14. *
  15. * This program is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  18. * GNU Affero General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Affero General Public License
  21. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  22. *
  23. * @category Spam
  24. * @package StatusNet
  25. * @author Evan Prodromou <evan@status.net>
  26. * @copyright 2012 StatusNet, Inc.
  27. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
  28. * @link http://status.net/
  29. */
  30. if (!defined('STATUSNET')) {
  31. // This check helps protect against security problems;
  32. // your code file can't be executed directly from the web.
  33. exit(1);
  34. }
  35. /**
  36. * Spam filter class
  37. *
  38. * Local proxy for remote filter
  39. *
  40. * @category Spam
  41. * @package StatusNet
  42. * @author Evan Prodromou <evan@status.net>
  43. * @copyright 2012 StatusNet, Inc.
  44. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
  45. * @link http://status.net/
  46. */
  47. class SpamFilter extends OAuthClient {
  48. const HAM = 'ham';
  49. const SPAM = 'spam';
  50. public $server;
  51. function __construct($server, $consumerKey, $secret) {
  52. parent::__construct($consumerKey, $secret);
  53. $this->server = $server;
  54. }
  55. protected function toActivity($notice) {
  56. // FIXME: need this to autoload ActivityStreamsMediaLink
  57. $doc = new ActivityStreamJSONDocument();
  58. $activity = $notice->asActivity(null);
  59. return $activity;
  60. }
  61. public function test($notice) {
  62. $activity = $this->toActivity($notice);
  63. return $this->testActivity($activity);
  64. }
  65. public function testActivity($activity) {
  66. $response = $this->postJSON($this->server . "/is-this-spam", $activity->asArray());
  67. $result = json_decode($response->getBody());
  68. return $result;
  69. }
  70. public function train($notice, $category) {
  71. $activity = $this->toActivity($notice);
  72. return $this->trainActivity($activity, $category);
  73. }
  74. public function trainActivity($activity, $category) {
  75. switch ($category) {
  76. case self::HAM:
  77. $endpoint = '/this-is-ham';
  78. break;
  79. case self::SPAM:
  80. $endpoint = '/this-is-spam';
  81. break;
  82. default:
  83. throw new Exception("Unknown category: " + $category);
  84. }
  85. $response = $this->postJSON($this->server . $endpoint, $activity->asArray());
  86. // We don't do much with the results
  87. return true;
  88. }
  89. public function trainOnError($notice, $category) {
  90. $activity = $this->toActivity($notice);
  91. return $this->trainActivityOnError($activity, $category);
  92. }
  93. public function trainActivityOnError($activity, $category) {
  94. $result = $this->testActivity($activity);
  95. if (($category === self::SPAM && $result->isSpam) ||
  96. ($category === self::HAM && !$result->isSpam)) {
  97. return true;
  98. } else {
  99. return $this->trainActivity($activity, $category);
  100. }
  101. }
  102. function postJSON($url, $body)
  103. {
  104. $request = OAuthRequest::from_consumer_and_token($this->consumer,
  105. $this->token,
  106. 'POST',
  107. $url);
  108. $request->sign_request($this->sha1_method,
  109. $this->consumer,
  110. $this->token);
  111. $hclient = new HTTPClient($url);
  112. $hclient->setConfig(array('connect_timeout' => 120,
  113. 'timeout' => 120,
  114. 'follow_redirects' => true,
  115. 'ssl_verify_peer' => false,
  116. 'ssl_verify_host' => false));
  117. $hclient->setMethod(HTTP_Request2::METHOD_POST);
  118. $hclient->setBody(json_encode($body));
  119. $hclient->setHeader('Content-Type', 'application/json');
  120. $hclient->setHeader($request->to_header());
  121. // Twitter is strict about accepting invalid "Expect" headers
  122. // No reason not to clear it still here -ESP
  123. $hclient->setHeader('Expect', '');
  124. try {
  125. $response = $hclient->send();
  126. $code = $response->getStatus();
  127. if (!$response->isOK()) {
  128. throw new OAuthClientException($response->getBody(), $code);
  129. }
  130. return $response;
  131. } catch (Exception $e) {
  132. throw new OAuthClientException($e->getMessage(), $e->getCode());
  133. }
  134. }
  135. }