fz.php 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. <?php
  2. /*
  3. # Compressing / Decompressing articles using Zipf law
  4. #
  5. # This program is free software: you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation, either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses>.
  17. */
  18. /*
  19. Usage: fz --{compress|decompress} [--dictionary dictionary] filename
  20. dictionary is a 2 column csv file. See matrix.csv
  21. Work to come:
  22. a) Compression: input a text file and ouput a fzt one according to usage
  23. b) Decompression: decompress a file to its original state (read first 5 characters
  24. and extract the text language
  25. c) fz should be used as a pipe
  26. d) Localize to specific languages by providing an appropriate matrix.csv dictionary
  27. e) translate it to other more appropriate programming languages as C/C++ for better performance
  28. */
  29. $language="en";
  30. $file="matrix.csv";
  31. $csv= file_get_contents($file);
  32. $array0 = array_map("str_getcsv", explode("\n", $csv));
  33. $file="article.txt";
  34. $article= file_get_contents($file);
  35. $outp="FZ${language}T";
  36. foreach (explode("\n", $article) as $keya => $vala ) {
  37. $array=preg_split('/[\s,]+/',$vala);
  38. foreach ( $array as $key => $val ) {
  39. $found=false;
  40. foreach ( $array0 as $key0 => $val0 ) {
  41. $pval=$val;
  42. $repl='/\b'.$val0[0].'\b/u';
  43. $nv=preg_replace($repl, $key0, $val );
  44. if($nv != $pval) {
  45. $outp.= "#".chr($nv);
  46. $found=true;
  47. break;
  48. }
  49. }
  50. if(!$found) $outp.= $val;
  51. $outp.=" ";
  52. }
  53. $outp.="\n";
  54. }
  55. $outfile="article.fzt";
  56. file_put_contents($outfile, $outp);
  57. ?>