linkhtml.php 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. <?php
  2. /**
  3. * Implementation of discovery using HTML <link> element
  4. *
  5. * Discovers XRD file for a user by fetching the URL and reading any
  6. * <link> elements in the HTML response.
  7. *
  8. * @category Discovery
  9. * @package StatusNet
  10. * @author James Walker <james@status.net>
  11. * @copyright 2010 StatusNet, Inc.
  12. * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPL 3.0
  13. * @link http://status.net/
  14. */
  15. class LRDDMethod_LinkHTML extends LRDDMethod
  16. {
  17. /**
  18. * For HTTP IDs, fetch the URL and look for <link> elements
  19. * in the HTML response.
  20. *
  21. * @todo fail out of WebFinger URIs faster
  22. */
  23. public function discover($uri)
  24. {
  25. $response = self::fetchUrl($uri);
  26. return self::parse($response->getBody());
  27. }
  28. /**
  29. * Parse HTML and return <link> elements
  30. *
  31. * Given an HTML string, scans the string for <link> elements
  32. *
  33. * @param string $html HTML to scan
  34. *
  35. * @return array array of associative arrays in JRD-ish array format
  36. */
  37. public function parse($html)
  38. {
  39. $links = array();
  40. preg_match('/<head(\s[^>]*)?>(.*?)<\/head>/is', $html, $head_matches);
  41. $head_html = $head_matches[2];
  42. preg_match_all('/<link\s[^>]*>/i', $head_html, $link_matches);
  43. foreach ($link_matches[0] as $link_html) {
  44. $link_url = null;
  45. $link_rel = null;
  46. $link_type = null;
  47. preg_match('/\srel=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $rel_matches);
  48. if ( isset($rel_matches[3]) ) {
  49. $link_rel = $rel_matches[3];
  50. } else if ( isset($rel_matches[1]) ) {
  51. $link_rel = $rel_matches[1];
  52. }
  53. preg_match('/\shref=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $href_matches);
  54. if ( isset($href_matches[3]) ) {
  55. $link_uri = $href_matches[3];
  56. } else if ( isset($href_matches[1]) ) {
  57. $link_uri = $href_matches[1];
  58. }
  59. preg_match('/\stype=(("|\')([^\\2]*?)\\2|[^"\'\s]+)/i', $link_html, $type_matches);
  60. if ( isset($type_matches[3]) ) {
  61. $link_type = $type_matches[3];
  62. } else if ( isset($type_matches[1]) ) {
  63. $link_type = $type_matches[1];
  64. }
  65. $links[] = new XML_XRD_Element_Link($link_rel, $link_uri, $link_type);
  66. }
  67. return $links;
  68. }
  69. }