bibtexParser.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. <?php
  2. //
  3. // +------------------------------------------------------------------------+
  4. // | phpBIB
  5. // | $Id: bibtexParser.php,v 2.0 2005/07/22 08:58:54 dfolio Exp $
  6. // | Creation Date : Tue Apr 28 2005
  7. // | LastUpdate $Date: 2005/07/22 08:58:54 $
  8. // +------------------------------------------------------------------------+
  9. // | Copyright (c) 2003-2005 David FOLIO, Antre Team
  10. // | Email dfolio@free.fr
  11. // | Web http://dfolio.free.fr
  12. // +------------------------------------------------------------------------+
  13. // | This source file is subject to BSD License, that is available
  14. // | at http://opensource.org/licenses/bsd-license.php
  15. // | or http://dfolio.free.fr/license.txt
  16. // +------------------------------------------------------------------------+
  17. //
  18. /** BibTeX file parser
  19. *
  20. * This script provide a bibTeX parser, and several method to render
  21. * bibliography.
  22. *
  23. * It's provide a support for any categoy starting by '@'.
  24. * Exception for the special case <var>@STRING</var> which define a bibtex
  25. * constant. Nevertheless, if the category, or any fields are not common in
  26. * bibtex, they are not rendering when the bibliography is build...
  27. * The most common bibtex category :
  28. * - @ARTICLE
  29. * {@link Bibliography::buildArticle()}
  30. * - @BOOK, @INBOOK, @BOOKLET
  31. * {@link Bibliography::buildBook()}
  32. * - @PROCEEDINGS, @INCOLLECTION, @CONFERENCE, @INPROCEEDINGS
  33. * {@link Bibliography::buildInProceedings()}
  34. * - @MASTERSTHESIS, @PHDTHESIS
  35. * {@link Bibliography::buildThesis()}}
  36. * - @MANUAL, @AUDIOVISUAL, @TECHREPORT
  37. * {@link Bibliography::buildReport()}}
  38. * - @MISC, @WEBPAGE
  39. * - @UNPUBLISHED, and other: not yet supported...
  40. *
  41. *
  42. *
  43. * This script is provided "as is" for free. No support is available. I am
  44. * not responsible for any loss due to the use of this program.
  45. * You may modify the program for your own purpose. If you think your
  46. * modification may benefit others, please send me a copy to:
  47. * {@link mailto:dfolio@free.fr dfolio@free.fr}.
  48. * Thank you!
  49. *
  50. * If you think it is helpful, please kindly add a link pointing to:
  51. * {@link http://dfolio.free.fr}
  52. *
  53. * @version $Revision: 2.0 $
  54. * @author David FOLIO <dfolio@free.fr>
  55. * @copyright Copyright &copy; 2003-2005, Antre Team
  56. * @license http://dfolio.free.fr/license.txt
  57. *
  58. * @package phpBIB
  59. *
  60. * @todo define some other index such as author, year...
  61. */
  62. if (!defined("BIB_PARSE_DEBUG")) define("BIB_PARSE_DEBUG",false);
  63. /** An array which contains field to exclude (must be in lower case)*/
  64. if (!isset($BIB_EXCLUDES_FIELD)) $BIB_EXCLUDES_FIELD=array();
  65. /** Class BibTexParser: BibTeX file parser
  66. *
  67. * This class provide a bibTeX file parser and several methods for bibliography
  68. * management.
  69. *
  70. * This class was designed to be called statically. In fact you don't need to
  71. * define an object...
  72. *
  73. * @author David FOLIO
  74. * @version 1.0
  75. * @package phpBIB
  76. */
  77. class BibTexParser{
  78. /** The bibliography data
  79. *
  80. * This is an array which contains bibliography entry
  81. * @var Array
  82. */
  83. var $files;
  84. var $bibArr=array();
  85. /** define if cache is enabled
  86. * @var Boolean
  87. * @access private
  88. */
  89. var $_useCache=false;
  90. /** The constructor
  91. * @param $bibfiles a list of bibTeX file to load
  92. * @param $useCache (dis)enable the cache management
  93. */
  94. function BibTexParser($bibfiles,$useCache=false){
  95. $this->bibArr=array();
  96. $this->files=$bibfiles;
  97. $this->_useCache=$useCache;
  98. $this->load($this->files,$useCache);
  99. }
  100. /** Define the biblio data entry
  101. * @param Array the biblio data entry, which is build from BibtexParser
  102. * @see {@link BibtexParser}
  103. */
  104. function setBibData($biblio){
  105. if (!is_array($biblio)){
  106. trigger_error("BibTexParser::setBiblio > Bad parameter biblio=".
  107. print_r($biblio,true).
  108. " which must be an array of biblio data entry\n",
  109. E_USER_ERROR);
  110. return false;
  111. }
  112. $this->bibArr=$biblio;
  113. }
  114. /** Get the biblio data entry
  115. * @return Array the biblio data entry array
  116. */
  117. function getBibData(){return $this->bibArr;}
  118. /** Load and parse bibfiles
  119. * @param Mixed could be a single bibfile, or an array of bibfiles.
  120. * @param Boolean (dis)enable the cache management
  121. */
  122. function load($bibfiles,$useCache=false){
  123. $this->_useCache=$useCache;
  124. if ((!$this->_useCache) &&(isset($_SESSION["PHPBIB_BIBLIO"]))){
  125. unset($_SESSION["PHPBIB_BIBLIO"]);
  126. }elseif ($this->_useCache&& isset($_SESSION["PHPBIB_BIBLIO"])){
  127. if (BIB_PARSE_DEBUG) echo "<!-- load bib from cache-->";
  128. $this->bibArr=$_SESSION["PHPBIB_BIBLIO"];
  129. return true;
  130. }
  131. if (is_string($bibfiles)) $bibfiles=array($bibfiles);
  132. if (!is_array($bibfiles)){
  133. trigger_error("BibTexParser::load > Bad parameter bibfiles=".
  134. print_r($bibfiles,true).
  135. " which must be an array of string bibfile name or just a string bibfile name\n",
  136. E_USER_ERROR);
  137. return false;
  138. }
  139. $this->bibArr=BibTexParser::parse($bibfiles);
  140. if ($this->_useCache) {$_SESSION["PHPBIB_BIBLIO"]=$this->bibArr; }
  141. return (true);
  142. }
  143. /** Save the biblio to a file
  144. * @param String $filename the file where to store the given biblio
  145. * @param Array the biblio data entry to save
  146. */
  147. static function write($filename,$biblio=null){
  148. return @file_put_contents($filename, serialize($biblio));
  149. }
  150. /** Read the biblio to a file
  151. * @param String $filename the file where to store the given biblio
  152. * @param Array the biblio data entry to save
  153. */
  154. static function read($filename){
  155. return @unserialize(file_get_contents($filename));
  156. }
  157. /** Delete the current biblio.
  158. * If there the cache is enable, also remove the biblio from the cache.
  159. *
  160. * @param Boolean (dis)enable the cache management
  161. */
  162. function destroy($useCache=false){
  163. if ($useCache&&isset($_SESSION["PHPBIB_BIBLIO"])) unset($_SESSION["PHPBIB_BIBLIO"]);
  164. unset($this->bibArr);
  165. //$this=NULL;
  166. return true;
  167. }
  168. /** Parse bibfiles
  169. *
  170. * Here is the main parser method. This function can parse a single
  171. * bibTeX file or several defined in an array.
  172. * This function return the parser result in an array like:
  173. * <pre>
  174. * array(
  175. * 'category'=>array('bibkey'=>'the corresponding category',...),
  176. * 'a field'=>array('bibkey'=>'the corresponding field',...),
  177. * ...
  178. * );
  179. * </pre>
  180. *
  181. * All field (excepted the specified excluded one) are added in the resulting
  182. * array.
  183. *
  184. * If an error occur during the parse (like open bibfile fail) the function
  185. * return <var>FALSE</var>.
  186. * @param Mixed bibfiles.
  187. * An array (or a string for single) of bibfile (with their path!)
  188. * @return Mixed
  189. *
  190. */
  191. function parse($bibfiles,$excludeFields=null){
  192. global $BIB_EXCLUDES_FIELD;
  193. if (!isset($BIB_EXCLUDES_FIELD)) $BIB_EXCLUDES_FIELD=array();
  194. if (empty($excludeFields)) $excludeFields=$BIB_EXCLUDES_FIELD;
  195. if (!is_array($bibfiles)) $bibfiles=array($bibfiles);
  196. //init
  197. $count=-1;$cst_count=0;$bibkey=false;$unclose_field=false;$current_fieldname=$current_fieldval=false;
  198. $arFields=$item=$fieldvalue=$fieldname=$cst_reg=$cval_reg=array();
  199. foreach($bibfiles as $bibfile){
  200. if(!(file_exists($bibfile))){
  201. trigger_error("[".__CLASS__."::parse] bibfile $bibfile does not exists\n",E_USER_ERROR);
  202. return false;
  203. }
  204. //get file lines
  205. $lines = file ($bibfile);
  206. $currentFields="";//if no empty: multi-lines fields
  207. $fieldVal="";
  208. foreach ($lines as $lineindex => $line) {
  209. $seg=trim($line);$beginField=false;$fieldVal="";
  210. if (empty($seg)||($seg=='\0')||($seg=='\n')||$seg[0]=='%') continue;
  211. $segupper=strtoupper($seg);
  212. //constant
  213. if (strpos($segupper,'@STRING')!==false) {
  214. list($cst,$cval)=BibtexParser::bibstring($seg);
  215. $cst_reg[$cst_count]='/'.$cst.'\s*#?\s*(.*)/';
  216. $cval_reg[$cst_count]=''.trim($cval).' $1';$cst_count++;
  217. continue;
  218. }elseif (preg_match('/@(.*) { (.*)/x', $seg,$matches)>0){/*get category*/
  219. $entry=trim($matches[1]);$rest=trim($matches[2]);
  220. if (empty($rest)||($rest=='\0')||($rest=='\n')||$rest=='%') {$bibkey=false;continue;}
  221. $bibkey=trim($rest,"\t\r\n\x0B\x20 ,");
  222. $item["category"][$bibkey]=strtoupper($entry);
  223. $count++;$fieldcount=-1;
  224. continue;
  225. } // #of item increase
  226. elseif ((strpos($seg,'='))!==false ){ // one field begins
  227. $beginField=true;
  228. if (empty($bibkey)){
  229. trigger_error("[".__CLASS__."::parse] Malformed bibfile no bibkey in used\n".
  230. "at line $lineindex in $bibfile\n Segment:".$seg."\n",E_USER_ERROR);
  231. return false;
  232. }
  233. if (preg_match('/(url|pdf|ps)\s*=\s*(.*)/', $seg,$matches)){
  234. //print_r($matches);
  235. $currentFields=strtolower(trim($matches[1]));
  236. if (!empty($excludeFields)&&in_array($currentFields,$excludeFields)) continue;
  237. $fieldVal=trim($matches[2],"\t\r\n\x0B\x20 ,;\"{}");
  238. $item[$currentFields][$bibkey]=$fieldVal;
  239. }else{
  240. if (preg_match('/(.*)\s*=\s*(.*)/', $seg,$matches)<1){
  241. if (BIB_PARSE_DEBUG) echo "<p>At line $lineindex in $bibfile\n Segment:".$seg."</p>";
  242. continue;
  243. }else{
  244. $currentFields=strtolower(trim($matches[1]));
  245. if (!empty($excludeFields)&&in_array($currentFields,$excludeFields)) continue;
  246. $fieldVal=preg_replace($cst_reg,$cval_reg,trim($matches[2],"\t\r\n\x0B\x20 ,;\"{}"));
  247. $haveConst=(trim($fieldVal)!==trim($matches[2]));
  248. $item[$currentFields][$bibkey]=trim($fieldVal,"\t\r\n\x0B\x20 ,;\"{}");
  249. }
  250. }
  251. }
  252. //multi-lines fields
  253. if ((!$beginField)&&!empty($currentFields)&&(!empty($bibkey))&&isset($item[$currentFields][$bibkey])){
  254. $fieldVal=trim($seg,"\t\r\n\x0B\x20 \"{}");
  255. if (!empty($fieldVal)) $item[$currentFields][$bibkey].= " ".$fieldVal;
  256. if (preg_match('/("|});?/',$seg,$matches)){$item[$currentFields][$bibkey]=trim($item[$currentFields][$bibkey],"\t\r\n\x0B\x20 ,;\"{}"); $currentFields="";}
  257. }else if (isset($item[$currentFields][$bibkey]))
  258. $item[$currentFields][$bibkey]=trim($item[$currentFields][$bibkey],"\t\r\n\x0B\x20 ,\"{}");
  259. }//end foreach lines
  260. }//end foreach bibfiles
  261. foreach ($item as $fn=>$fields) {
  262. foreach ($fields as $key=>$val) {
  263. /*if (strpos($val,'$')!=false){
  264. $mvArr= preg_split('/\$/', $val);$val='';
  265. for($i=1; $i<count($mvArr);$i+=2){
  266. $val.=$mvArr[$i-1].' '.BibtexParser::bibMath($mvArr[$i]);
  267. }
  268. }*/
  269. $val=trim($val,"\t\r\n\x0B\x20 ,\"{}");
  270. $item[$fn][$key]=BibtexParser::strtr(str_replace(array('"','{','}','$'),'',$val));
  271. //echo "<!--[$fn] [$key]: ".$item[$fn][$key]." // $val -->\n";
  272. }
  273. }
  274. /*
  275. elseif (isset($item[$currentFields][$bibkey])){
  276. */
  277. return $item;
  278. }
  279. /** Parse a <var>@STRING</var> (bibTeX constant defintion)
  280. *
  281. * This function parse a bibTeX constant <var>@STRING</var>, and then provide
  282. * an array with the constant name and the constant value:
  283. * <pre>arrray(const_name, const_value)</pre>.
  284. * If the specified string does not match a bibTeX constant
  285. * <var>@STRING</var>, the function return <var>FALSE</var>.
  286. * @param (String) the string to parse
  287. * @return (Mixed)
  288. */
  289. function bibstring($str){
  290. if(preg_match('/@string{(.*)=(.*)}/i',$str,$matches))
  291. if (count($matches)>2) return array(trim($matches[1]),trim($matches[2]));
  292. return false;
  293. }
  294. /** Simple math translation*/
  295. function bibMath($str){
  296. if (empty($str)) return '';
  297. if(strpos($str,'_')!==false) $str=preg_replace('/\$(.*)_(.*)\$/','<sub>$1</sub>',$str);
  298. if(strpos($str,'^')!==false) $str=preg_replace('/^(.*)\s+/','<sup>$1</sup>',$str);
  299. return str_replace('$','|',$str);
  300. }
  301. /** Translate (la)TeX character to HTML entities
  302. *
  303. * This function returns a copy of str, translating all occurrences of each
  304. * character according a <var>$map</var> array.
  305. * The map array it's an array like:
  306. * <pre>
  307. * array("from latex"=>"to HTML markup",...)
  308. * </pre>
  309. *
  310. * This function is useful to translate user-supplied (la)Tex command to HTML
  311. * markup. But beacrefull, this function replace only (la)TeX command without
  312. * arguments!
  313. * @param (String) the string to translate.
  314. * @param (Array) additionnal <var>$map</var>. Use '-1' to desactivate the
  315. * call to {@link getLatexTranslationTable} and to not use
  316. * {@link get_html_translation_table}
  317. * @return (String)
  318. */
  319. function strtr($str,$map=array()){
  320. if ($map!=-1){
  321. if (!is_array($map)) $map=array();
  322. $map=array_merge(BibTexParser::getLatexTranslationTable(),$map);
  323. $map= array_merge(get_html_translation_table(HTML_ENTITIES),$map);
  324. }
  325. if (is_array($map)) return @strtr($str, $map);
  326. return $str;
  327. }
  328. /**Returns the translation table used by {@link strtr}
  329. * @return (Array)
  330. */
  331. function getLatexTranslationTable(){
  332. $map=array();
  333. $map['=']=''; $map['{']=''; $map['}']=''; $map['\"']=' ';//clean bibkey
  334. $map['~']='&nbsp;'; $map['\\~']='~';
  335. $map['\\`a']='&agrave;'; $map['\\`A']='&Agrave;';
  336. $map['\\`e']='&egrave;'; $map['\\`E']='&Egrave;';
  337. $map['\\`\\i']='&igrave;'; $map['\\`\\I']='&Igrave;';
  338. $map['\\`o']='&ograve;'; $map['\\`O']='&Ograve;';
  339. $map['\\`u']='&ugrave;'; $map['\\`U']='&Ugrave;';
  340. $map['\\\'a']='&aacute;'; $map['\\\'A']='&Aacute;';
  341. $map['\\\'e']='&eacute;'; $map['\\\'E']='&Eacute;';
  342. $map['\\\'\\i']='&iacute;';$map['\\\'\\I']='&Iacute;';
  343. $map['\\\'o']='&oacute;'; $map['\\\'O']='&Oacute;';
  344. $map['\\\'u']='&uacute;'; $map['\\\'U']='&Uacute;';
  345. $map['\\^a']='&acirc;'; $map['\\^A']='&Acirc;';
  346. $map['\\^e']='&ecirc;'; $map['\\^E']='&Ecirc;';
  347. $map['\\^\\i']='&icirc;'; $map['\\^\\I']='&Icirc;';
  348. $map['\\^o']='&ocirc;'; $map['\\^O']='&Ocirc;';
  349. $map['\\^u']='&ucirc;'; $map['\\^U']='&Ucirc;';
  350. $map['\\:a']='&auml;'; $map['\\:A']='&Auml;';
  351. $map['\\:e']='&euml;'; $map['\\:E']='&Euml;';
  352. $map['\\:i']='&iuml;'; $map['\\:I']='&Iuml;';
  353. $map['\\:o']='&ouml;'; $map['\\:O']='&Ouml;';
  354. $map['\\:u']='&uuml;'; $map['\\:U']='&Uuml;';
  355. $map['\\oe']='&oelig;'; $map['\\OE']='&OElig;';
  356. //bind some (la)Tex command ...
  357. $map['\\TeX']='TeX';$map['\\LaTeX']='LaTeX';
  358. $map['\\dots']='&hellip;'; $map['\\ldots']='...';
  359. $map['--']='&ndash;';$map['---']='&mdash;';
  360. //bind some mathematical command
  361. $map['\\infty']='&infin;';
  362. return $map;
  363. }
  364. }
  365. ?>