ARC2_MicroformatsExtractor.php 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. <?php
  2. /*
  3. homepage: http://arc.semsol.org/
  4. license: http://arc.semsol.org/license
  5. class: ARC2 microformats Extractor
  6. author: Benjamin Nowack
  7. version: 2010-11-16
  8. */
  9. ARC2::inc('ARC2_PoshRdfExtractor');
  10. class ARC2_MicroformatsExtractor extends ARC2_PoshRdfExtractor {
  11. function __construct($a, &$caller) {
  12. parent::__construct($a, $caller);
  13. }
  14. function __init() {
  15. parent::__init();
  16. $this->terms = $this->getTerms();
  17. $this->ns_prefix = 'mf';
  18. $this->a['ns']['mf'] = 'http://poshrdf.org/ns/mf#';
  19. $this->caller->detected_formats['posh-rdf'] = 1;
  20. }
  21. /* */
  22. function preProcessNode($n) {
  23. if (!$n) return $n;
  24. /* remove existing poshRDF hooks */
  25. if (!is_array($n['a'])) $n['a'] = array();
  26. $n['a']['class'] = isset($n['a']['class']) ? preg_replace('/\s?rdf\-(s|p|o|o-xml)/', '', $n['a']['class']): '';
  27. if (!isset($n['a']['rel'])) $n['a']['rel'] = '';
  28. /* inject poshRDF hooks */
  29. foreach ($this->terms as $term => $infos) {
  30. if ((!in_array('rel', $infos) && $this->hasClass($n, $term)) || $this->hasRel($n, $term)) {
  31. if ($this->v('scope', '', $infos)) $infos[] = 'p';
  32. foreach (array('s', 'p', 'o', 'o-xml') as $type) {
  33. if (in_array($type, $infos)) {
  34. $n['a']['class'] .= ' rdf-' . $type;
  35. $n['a']['class'] = preg_replace('/(^|\s)' . $term . '(\s|$)/s', '\\1mf-' . $term . '\\2', $n['a']['class']);
  36. $n['a']['rel'] = preg_replace('/(^|\s)' . $term . '(\s|$)/s', '\\1mf-' . $term . '\\2', $n['a']['rel']);
  37. }
  38. }
  39. }
  40. }
  41. $n['a']['class m'] = preg_split('/ /', $n['a']['class']);
  42. $n['a']['rel m'] = preg_split('/ /', $n['a']['rel']);
  43. return $n;
  44. }
  45. function getPredicates($n, $ns) {
  46. $ns = array('mf' => $ns['mf']);
  47. return parent::getPredicates($n, $ns);
  48. }
  49. function tweakObject($o, $p, $ct) {
  50. $ns = $ct['ns']['mf'];
  51. /* rel-tag, skill => extract from URL */
  52. if (in_array($p, array($ns . 'tag', $ns . 'skill'))) {
  53. $o = preg_replace('/^.*\/([^\/]+)/', '\\1', trim($o, '/'));
  54. $o = urldecode(rawurldecode($o));
  55. }
  56. return $o;
  57. }
  58. /* */
  59. function getTerms() {
  60. /* no need to define 'p' if scope is not empty */
  61. return array(
  62. 'acquaintance' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  63. 'additional-name' => array('o', 'scope' => array('n')),
  64. 'adr' => array('s', 'o', 'scope' => array('_doc', 'vcard')),
  65. 'affiliation' => array('s', 'o', 'scope' => array('hresume')),
  66. 'author' => array('s', 'o', 'scope' => array('hentry')),
  67. 'bday' => array('o', 'scope' => array('vcard')),
  68. 'bio' => array('o', 'scope' => array('vcard')),
  69. 'best' => array('o', 'scope' => array('hreview')),
  70. 'bookmark' => array('o', 'scope' => array('_doc', 'hentry', 'hreview')),
  71. 'class' => array('o', 'scope' => array('vcard', 'vevent')),
  72. 'category' => array('o', 's', 'scope' => array('vcard', 'vevent')),
  73. 'child' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  74. 'co-resident' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  75. 'co-worker' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  76. 'colleague' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  77. 'contact' => array('o', 'scope' => array('_doc', 'hresume', 'hentry')),
  78. 'country-name' => array('o', 'scope' => array('adr')),
  79. 'crush' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  80. 'date' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  81. 'description' => array('o', 'scope' => array('vevent', 'hreview', 'xfolkentry')),
  82. 'directory' => array('o', 'rel', 'scope' => array('_doc', 'hfeed', 'hentry', 'hreview')),
  83. 'dtend' => array('o', 'scope' => array('vevent')),
  84. 'dtreviewed' => array('o', 'scope' => array('hreview')),
  85. 'dtstamp' => array('o', 'scope' => array('vevent')),
  86. 'dtstart' => array('o', 'scope' => array('vevent')),
  87. 'duration' => array('o', 'scope' => array('vevent')),
  88. 'education' => array('s', 'o', 'scope' => array('hresume')),
  89. 'email' => array('s', 'o', 'scope' => array('vcard')),
  90. 'entry-title' => array('o', 'scope' => array('hentry')),
  91. 'entry-content' => array('o-xml', 'scope' => array('hentry')),
  92. 'entry-summary' => array('o', 'scope' => array('hentry')),
  93. 'experience' => array('s', 'o', 'scope' => array('hresume')),
  94. 'extended-address' => array('o', 'scope' => array('adr')),
  95. 'family-name' => array('o', 'scope' => array('n')),
  96. 'fn' => array('o', 'plain', 'scope' => array('vcard', 'item')),
  97. 'friend' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  98. 'geo' => array('s', 'scope' => array('_doc', 'vcard', 'vevent')),
  99. 'given-name' => array('o', 'scope' => array('n')),
  100. 'hentry' => array('s', 'o', 'scope' => array('_doc', 'hfeed')),
  101. 'hfeed' => array('s', 'scope' => array('_doc')),
  102. 'honorific-prefix' => array('o', 'scope' => array('n')),
  103. 'honorific-suffix' => array('o', 'scope' => array('n')),
  104. 'hresume' => array('s', 'scope' => array('_doc')),
  105. 'hreview' => array('s', 'scope' => array('_doc')),
  106. 'item' => array('s', 'scope' => array('hreview')),
  107. 'key' => array('o', 'scope' => array('vcard')),
  108. 'kin' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  109. 'label' => array('o', 'scope' => array('vcard')),
  110. 'last-modified' => array('o', 'scope' => array('vevent')),
  111. 'latitude' => array('o', 'scope' => array('geo')),
  112. 'license' => array('o', 'rel', 'scope' => array('_doc', 'hfeed', 'hentry', 'hreview')),
  113. 'locality' => array('o', 'scope' => array('adr')),
  114. 'location' => array('o', 'scope' => array('vevent')),
  115. 'logo' => array('o', 'scope' => array('vcard')),
  116. 'longitude' => array('o', 'scope' => array('geo')),
  117. 'mailer' => array('o', 'scope' => array('vcard')),
  118. 'me' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  119. 'met' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  120. 'muse' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  121. 'n' => array('s', 'o', 'scope' => array('vcard')),
  122. 'neighbor' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  123. 'nickname' => array('o', 'plain', 'scope' => array('vcard')),
  124. 'nofollow' => array('o', 'rel', 'scope' => array('_doc')),
  125. 'note' => array('o', 'scope' => array('vcard')),
  126. 'org' => array('o', 'xplain', 'scope' => array('vcard')),
  127. 'parent' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  128. 'permalink' => array('o', 'scope' => array('hreview')),
  129. 'photo' => array('o', 'scope' => array('vcard', 'item')),
  130. 'post-office-box' => array('o', 'scope' => array('adr')),
  131. 'postal-code' => array('o', 'scope' => array('adr')),
  132. 'publication' => array('s', 'o', 'scope' => array('hresume')),
  133. 'published' => array('o', 'scope' => array('hentry')),
  134. 'rating' => array('o', 'scope' => array('hreview')),
  135. 'region' => array('o', 'scope' => array('adr')),
  136. 'rev' => array('o', 'scope' => array('vcard')),
  137. 'reviewer' => array('s', 'o', 'scope' => array('hreview')),
  138. 'role' => array('o', 'plain', 'scope' => array('vcard')),
  139. 'sibling' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  140. 'skill' => array('o', 'scope' => array('hresume')),
  141. 'sort-string' => array('o', 'scope' => array('vcard')),
  142. 'sound' => array('o', 'scope' => array('vcard')),
  143. 'spouse' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  144. 'status' => array('o', 'plain', 'scope' => array('vevent')),
  145. 'street-address' => array('o', 'scope' => array('adr')),
  146. 'summary' => array('o', 'scope' => array('vevent', 'hreview', 'hresume')),
  147. 'sweetheart' => array('o', 'rel', 'scope' => array('_doc', 'hentry')),
  148. 'tag' => array('o', 'rel', 'scope' => array('_doc', 'category', 'hfeed', 'hentry', 'skill', 'hreview', 'xfolkentry')),
  149. 'taggedlink' => array('o', 'scope' => array('xfolkentry')),
  150. 'title' => array('o', 'scope' => array('vcard')),
  151. 'type' => array('o', 'scope' => array('adr', 'email', 'hreview', 'tel')),
  152. 'tz' => array('o', 'scope' => array('vcard')),
  153. 'uid' => array('o', 'scope' => array('vcard', 'vevent')),
  154. 'updated' => array('o', 'scope' => array('hentry')),
  155. 'url' => array('o', 'scope' => array('vcard', 'vevent', 'item')),
  156. 'value' => array('o', 'scope' => array('email', 'adr', 'tel')),
  157. 'vcard' => array('s', 'scope' => array('author', 'reviewer', 'affiliation', 'contact')),
  158. 'version' => array('o', 'scope' => array('hreview')),
  159. 'vevent' => array('s', 'scope' => array('_doc')),
  160. 'worst' => array('o', 'scope' => array('hreview')),
  161. 'xfolkentry' => array('s', 'scope' => array('_doc')),
  162. );
  163. }
  164. /* */
  165. }