phpwiki-search.pl 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283
  1. #! /usr/bin/perl
  2. # Copyright (C) 2004 Alex Schroeder <alex@emacswiki.org>
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 2 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the
  16. # Free Software Foundation, Inc.
  17. # 59 Temple Place, Suite 330
  18. # Boston, MA 02111-1307 USA
  19. use CGI qw/:standard/;
  20. use CGI::Carp qw(fatalsToBrowser);
  21. use LWP::UserAgent;
  22. use Encode;
  23. if (not param('url')) {
  24. print header(-charset=>'utf-8'),
  25. start_html('PHP Wiki Search RSS 3.0'),
  26. h1('PHP Wiki Search RSS 3.0'),
  27. p('Translates a PHP Wiki Search result into RSS 3.0 usable by Oddmuse.'),
  28. start_form(-method=>'GET'),
  29. p('Search URL: ', textfield('url', '', 40), checkbox('latin-1'), submit()),
  30. end_form(),
  31. end_html();
  32. exit;
  33. }
  34. print header(-type=>'text/plain; charset=UTF-8');
  35. my $url = param('url');
  36. if (param('latin-1')) {
  37. $url =~ s/%([0-9a-f][0-9a-f])/chr(hex($1))/ige;
  38. $url = encode('latin-1', decode('utf-8', $url));
  39. my @letters = split(//, $url);
  40. my @safe = ('a' .. 'z', 'A' .. 'Z', '0' .. '9', '-', '_', '.', '!', '~', '*', "'", '(', ')',
  41. ':', '/', '?', ';', '&', '=');
  42. foreach my $letter (@letters) {
  43. my $pattern = quotemeta($letter);
  44. if (not grep(/$pattern/, @safe)) {
  45. $letter = uc(sprintf("%%%02x", ord($letter)));
  46. }
  47. }
  48. $url = join('', @letters);
  49. }
  50. my $ua = new LWP::UserAgent;
  51. my $request = HTTP::Request->new('GET', $url);
  52. my $response = $ua->request($request);
  53. my $data = $response->content;
  54. $data = encode('utf-8', decode('latin-1', $data)) if param('latin-1');
  55. $data =~ /\<title\>([^<]*)/i;
  56. print "title: $1\n" if $1;
  57. print "link: " . param(url) . "\n";
  58. print "debug: $url\n"; # FIXME
  59. print "\n";
  60. while ($data =~ m|<dt>.*?<a href="([^"]*)".*\n((<dd>.*</dd>\n)*)|g) {
  61. my ($title, $desc) = ($1, $2);
  62. $title =~ s/%([0-9a-f][0-9a-f])/chr(hex($1))/ige;
  63. $title = encode('utf-8', decode('latin-1', $title)) if param('latin-1');
  64. print "title: $title\n";
  65. $_ = $desc;
  66. s|<dd>||g;
  67. s|<small[^>]*>||g;
  68. s|<strong[^>]*>||g;
  69. s|</strong>||g;
  70. s|</small>||g;
  71. s|</dd>||g;
  72. s|\n+$||g;
  73. s|\n|\n\t|g;
  74. print "description: $_\n";
  75. print "\n";
  76. }