PicoPygments.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. <?php
  2. class PicoPygments extends AbstractPicoPlugin
  3. {
  4. const API_VERSION=3;
  5. // only act if this var is set to true
  6. private $yeah=FALSE;
  7. // display a label with the language in a corner?
  8. private $label=FALSE;
  9. // setting up formatteroptions string to pass to python.
  10. private $formatteroptions='nowrap=True';
  11. // setting up lexeroptions string to pass to python.
  12. private $lexeroptions='encoding="utf-8"';
  13. // caching is on by default
  14. private $cache_dir="/dev/shm/cache/pico-production/pygments";
  15. // parsedown prepends this to the language for code block classes, e.g.: <code class="language-php">... not (currently) configurable
  16. private $langprefix="language-";
  17. // stylesheet
  18. private $stylesheet='';
  19. // CSS prefix to ignore when searching for colors
  20. private $cssprefix='custom-';
  21. // stylesheet fallback
  22. private $stylesheet_fallback='_var_with_fallback';
  23. private function __msg__($str,$xtra="") {
  24. if($this->debug === TRUE) {
  25. $arrow='⇛ ';
  26. // normal debug output
  27. echo $arrow."PicoPygments (".debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS,2)[1]['function']."): ".$str."<br/>\n";
  28. // additional (super verbose) debug output
  29. if($xtra !== "" && $this->debug_xtra === TRUE) echo $arrow."PicoPygments (".debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS,2)[1]['function']."): ".$xtra."<br/>\n";
  30. }
  31. }
  32. private function array_info($array,$sep='') {
  33. foreach($array as $k => $v) { echo $sep.$k." => ".$v."\n"; if(is_array($v)) $this->array_info($v,"\t"); }
  34. }
  35. public function onConfigLoaded(array &$config)
  36. { // debugging output yes or no
  37. if($config['debug'] === TRUE) { $this->debug=TRUE; ini_set('display_errors', 'On'); error_reporting(E_ALL); $this->__msg__("Debug output enabled."); }
  38. else $this->debug=FALSE;
  39. if(isset($config['PicoPygments']['debug_xtra']) && $config['PicoPygments']['debug_xtra'] === TRUE) $this->debug_xtra=TRUE;
  40. else $this->debug_xtra=FALSE;
  41. //////////////// User-configurable variables
  42. // twig template to activate this plugin for - default: post
  43. if (isset($config['PicoPygments']['template']) && $config['PicoPygments']['template'] != "") $this->template=$config['PicoPygments']['template'];
  44. else $this->template="post";
  45. // the code block's class will become this, plus "language-$lang"
  46. if (isset($config['PicoPygments']['cssclass'])) $this->cssclass=$config['PicoPygments']['cssclass'];
  47. else $this->cssclass='pcpg';
  48. // Python command - default: "python3 -I" - yes, PHP apparently does PATH lookups.
  49. if (isset($config['PicoPygments']['python_cmd']) && $config['PicoPygments']['python_cmd'] != "") $this->python_cmd=$config['PicoPygments']['python_cmd'];
  50. else $this->python_cmd="python3 -I";
  51. // caching already parsed code blocks - make the directory if required
  52. if (isset($config['PicoPygments']['cache_dir'])) {
  53. if($config['PicoPygments']['cache_dir'] === FALSE || $config['PicoPygments']['cache_dir'] === '') unset($this->cache_dir);
  54. else $this->cache_dir=$config['PicoPygments']['cache_dir'];
  55. }
  56. if (isset($this->cache_dir)) {
  57. if(!is_dir($this->cache_dir)) mkdir($this->cache_dir,0700,TRUE);
  58. if(is_dir($this->cache_dir)) $this->__msg__("Picopyg cache dir is now ".$this->cache_dir);
  59. else {
  60. $this->__msg__("Failed to access directory ".$this->cache_dir .", continuing without caching.");
  61. unset($this->cache_dir);
  62. }
  63. }
  64. // display a label with the language in a corner?
  65. if (isset($config['PicoPygments']['label']) && $config['PicoPygments']['label'] === TRUE) $this->label=TRUE;
  66. // setting up formatteroptions string to pass to python.
  67. if (isset($config['PicoPygments']['formatteroptions']) && $config['PicoPygments']['formatteroptions'] !== FALSE) $this->formatteroptions=$config['PicoPygments']['formatteroptions'];
  68. // setting up lexeroptions string to pass to python.
  69. if (isset($config['PicoPygments']['lexeroptions']) && $config['PicoPygments']['lexeroptions'] !== FALSE) $this->lexeroptions=$config['PicoPygments']['lexeroptions'];
  70. //~ if($this->debug === TRUE) $this->array_info($config);
  71. // If a stylesheet isn't explicitely pointed to, use a fallback (with magic color variables!)
  72. if ( isset($config['PicoPygments']['stylesheet']) and $config['PicoPygments']['stylesheet'] != '' and substr($config['PicoPygments']['stylesheet'],0,1) === '/') $this->stylesheet=$config['PicoPygments']['stylesheet'];
  73. else {
  74. $style=$this->stylesheet_fallback;
  75. $files=array(
  76. __DIR__.'/css/'.$this->cssprefix.$style.'.css',
  77. __DIR__.'/css/'.$style.'.css',
  78. );
  79. foreach($files as $file) {
  80. if(file_exists($file)) {
  81. $this->stylesheet=str_replace($_SERVER['DOCUMENT_ROOT'],'',$file);
  82. break;
  83. }
  84. }
  85. }
  86. unset($c);
  87. }
  88. public function onMetaParsed(array $meta)
  89. {
  90. // Only Do Things if pycopyg hasn't been disabled
  91. if(isset($meta['picopygments']) && $meta['picopygments'] === FALSE ) {
  92. $this->__msg__("function ". __FUNCTION__ .' PicoPygments disabled for this page');
  93. return;
  94. }
  95. $this->__msg__("function ". __FUNCTION__ .' found template "'. $meta['template'] .'"');
  96. // Only Do Things if we have the correct template
  97. if($meta['template'] === $this->template || $this->template === "all" ) $this->yeah=TRUE;
  98. }
  99. public function onContentParsed(&$content)
  100. {
  101. if($this->yeah === FALSE ) {
  102. $this->__msg__("We're not doing anything. Bye!");
  103. return;
  104. }
  105. if(trim($content)=="") {
  106. $this->__msg__("Content is empty. Bye!");
  107. return;
  108. }
  109. /////////////////////////////// SERIOUSLY BEGINS HERE ////////////////////////////////
  110. //~ $this->__msg__("","Dumping \$content: ".$content);
  111. // dom document of $content
  112. $dom=new DOMDocument();
  113. $dom->preserveWhiteSpace = true;
  114. // $content is a html fragment. We like to modify its content with DOMDocument, but it prefers complete documents and completes them
  115. // if necessary. This leads to strangeness. The best thing I found is to give it a complete document with all required headers and tags,
  116. // then strip those off in the end. That way DOMDocument doesn't meddle, and we have control. Hah, hopefully.
  117. $dom->loadHTML('<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body>'.$content.'</body></html>',
  118. LIBXML_NONET|LIBXML_COMPACT|LIBXML_HTML_NODEFDTD|LIBXML_NOWARNING);
  119. $trim_before=92; // What we need to trim when saving (the above string added to $content)
  120. $trim_after=-15; // - " -
  121. //~ $this->__msg__("Loaded HTML \$content into \$dom","Dumping \$dom: ".$dom->saveHTML());
  122. // collecting code nodes that have the string fragment $this->langprefix in their classes
  123. $xpath=new domXPath($dom);
  124. $query='//code[contains(concat(" ", @class, " "),"'. $this->langprefix .'")]'; //stackoverflow.com/a/1390680
  125. $xpathQuery=$xpath->query($query);
  126. if(count($xpathQuery) === 0) { $this->__msg__('no code blocks with "'.$this->langprefix .'*" classes? Nothing to do, we\'re out!'); return; }
  127. // static base for cached file's names - need to add code block specific language & checksum later
  128. $filename_base=str_replace(array('"','=',',',' '),'-',$this->formatteroptions .'-'. $this->lexeroptions);
  129. // assuming that pygments will always escape a literal < or > as "&lt;" or "&gt;",
  130. // a string like this should never show up in its output, and be a safe separator:
  131. $sep='<<<>>><<<>>><<<>>>';
  132. $exe_str='';
  133. $strlen_langprefix=strlen($this->langprefix);
  134. $new_item=array();
  135. $new_item_filename=array();
  136. for($i=0;$i<count($xpathQuery);$i++) {
  137. $langclass=$xpathQuery->item($i)->getAttribute('class');
  138. $nv=$xpathQuery->item($i)->nodeValue;
  139. $lang=substr($langclass,$strlen_langprefix); // remove classprefix, usually "language-"
  140. // sanitize the language name, because parsedown-extra doesn't (see README.md)
  141. $lang=ltrim($lang,'{.#'); $lang=rtrim($lang,'}');
  142. $this->__msg__("Language class: $langclass - Language: $lang");
  143. $this->__msg__("\$xpathQuery - processing code node \$item[$i]","nodeValue: $nv");
  144. $checksum=hash('crc32b',$nv); // a non-cryptographic, short'n'fast hash. md5($nv) maybe safer because longer, but slower?
  145. $filename=$checksum.'-'.$filename_base.'-'.$lang;
  146. $new_item_filename[$i]=$filename;
  147. if(isset($this->cache_dir) && file_exists($this->cache_dir .'/'. $filename))
  148. { $filename=$this->cache_dir .'/'. $filename;
  149. $new_item[$i]=file_get_contents($filename);
  150. $this->__msg__("Got cached contents from ".$filename,"code node after: (\$new_item[$i]): $new_item[$i]");
  151. }
  152. else {
  153. // file isn't cached (yet): append this one to what will become the final python command $exe_str
  154. $nv=addslashes($nv);
  155. $exe_str=$exe_str.<<<PYGMENTS_CODE_ENDS_HERE
  156. code="""$nv"""
  157. try:
  158. lexer=get_lexer_by_name("$lang", $this->lexeroptions )
  159. lexer.add_filter('tokenmerge')
  160. print(highlight(code, lexer, HtmlFormatter( $this->formatteroptions )),end = "");
  161. except:
  162. pass
  163. print('$sep',end = "");
  164. PYGMENTS_CODE_ENDS_HERE;
  165. $new_item[$i]='';
  166. }
  167. } // end iterating through $xpathQuery
  168. // the whole python-pygments block is conditional to this:
  169. if($exe_str !== '') {
  170. // add imports to beginning
  171. $exe_str='from pygments import highlight;from pygments.lexers import get_lexer_by_name;from pygments.formatters import HtmlFormatter;'.$exe_str;
  172. $exe_str = escapeshellarg($exe_str);
  173. $this->__msg__('Sending \$exe_str to '.$this->python_cmd .' - formatteroptions: '.$this->formatteroptions .', lexeroptions: '.$this->lexeroptions,'\$exe_str: " -c": '.$exe_str);
  174. // execute that command, implode result into long string, then explode it into array again divided by $sep:
  175. $output = array();
  176. exec($this->python_cmd .' -c '.$exe_str,$output);
  177. $exe_str = implode("\n",$output);
  178. $this->__msg__("","Raw result: $exe_str");
  179. $output = explode($sep,$exe_str);
  180. $output_count=count($output)-1;
  181. unset($output[$output_count]); // there's always a superfluous $sep at the end, creating an additional empty element
  182. $this->__msg__("We have $output_count pygment output results");
  183. // ideally count($new_item) and $output_count should be identical.
  184. for($i=0,$j=0;$i<count($new_item),$j<$output_count;$i++) {
  185. $output[$j]=trim($output[$j]);
  186. if($new_item[$i] === '') {
  187. if($output[$j] !== '') {
  188. $new_item[$i]=$output[$j];
  189. $this->__msg__("Assigned pygments \$output[$j] to code block (\$new_item[$i])","Content: $new_item[$i]");
  190. if(isset($this->cache_dir) && is_dir($this->cache_dir)) {
  191. $filename=$this->cache_dir .'/'. $new_item_filename[$i];
  192. $this->__msg__("Writing HTML to cache: ".$filename);
  193. file_put_contents($filename,$new_item[$i]);
  194. }
  195. }
  196. $j++;
  197. }
  198. }
  199. } // end if($exe_str !== '')
  200. // replace (code nodes with language-class) (previous xpathQuery) with the syntax-highlighted ones
  201. for($i=0;$i<count($new_item);$i++) {
  202. $this->__msg__("Replacing original code block with \$new_item[$i] (unlesss \$new_item[$i] is empty).", "Content: $new_item[$i]");
  203. if(trim($new_item[$i]) === '') continue; // if it's empty, don't replace anything, use the original code block
  204. $item=$xpathQuery->item($i);
  205. $lang=$item->getAttribute('class');
  206. $classes=$this->cssclass .' '.$lang; // let's pull them together for easier application
  207. $fragment = $dom->createDocumentFragment();
  208. if($item->parentNode->tagName === 'pre' && $this->label === TRUE) {
  209. $item=$item->parentNode;
  210. $pre = $dom->createElement('pre');
  211. $pre->setAttribute("class",$this->cssclass);
  212. $new_item[$i]='<div class="corner">'.substr($lang,$strlen_langprefix).'</div><code class="'.$classes.'">'.$new_item[$i].'</code>';
  213. $fragment->appendXML($new_item[$i]);
  214. $fragment->encoding="UTF-8";
  215. $pre->appendChild($fragment);
  216. $item->parentNode->replaceChild($pre,$item);
  217. }
  218. else {
  219. // if the code block is enclosed by a pre, set the cssclass on it
  220. if($item->parentNode->tagName === 'pre') $item->parentNode->setAttribute("class",$this->cssclass);
  221. $code = $dom->createElement('code');
  222. $code->setAttribute("class",$classes);
  223. $fragment->appendXML($new_item[$i]);
  224. $fragment->encoding="UTF-8";
  225. $code->appendChild($fragment);
  226. $item->parentNode->replaceChild($code,$item);
  227. }
  228. } // end replace the code nodes
  229. $content = $dom->saveHTML();
  230. // remove the tag we added at loadHTML
  231. $content = substr($content,$trim_before,$trim_after);
  232. // Finally, adding stylesheet:
  233. $content = '<link rel="stylesheet" href="'. $this->stylesheet .'" type="text/css" />'.$content;
  234. }
  235. }