countrycodes.pl 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. #!/usr/bin/perl
  2. use CGI qw(escape);
  3. use Encode qw(_utf8_on);
  4. use LWP::Simple;
  5. use strict;
  6. my $data = get('http://en.wikipedia.org/w/index.php?title=ISO_3166-1_alpha-2&action=raw');
  7. _utf8_on($data); # mess with Perl's internals... data is in UTF8, but we need to tell Perl that!
  8. my @lines = split /\r?\n/, $data;
  9. my $longest_name = 0;
  10. my $longest_link = 0;
  11. print "DROP TABLE Countries;\n";
  12. print "CREATE TABLE Countries (country varchar(2) PRIMARY KEY, country_name varchar(200), wikipedia_en varchar(120));\n";
  13. foreach (@lines)
  14. {
  15. # toothpicks stood on end
  16. if (/^ \s* \| \s* # Initial pipe symbol
  17. id=\"([A-Z]{2})\" # ID attribute
  18. \s* \| \s* # Another pipe symbol
  19. <tt>..<\/tt> # Teletype font
  20. \s* \|\| \s* # Double pipe symbol
  21. \[\[ # Double opening brackets
  22. ([^\]\|]+) # Link
  23. (\|([^\]]+))? # Link text (optional)
  24. \]\] # Double closing bracket
  25. /xi)
  26. {
  27. my $code = $1;
  28. my $link = escape($2);
  29. my $name = $4 || $2;
  30. $link =~ s/\%20/_/g;
  31. $name =~ s/\'/\'\'/g;
  32. $longest_link = length($link) if length($link) > $longest_link;
  33. $longest_name = length($name) if length($name) > $longest_name;
  34. print "INSERT INTO Countries VALUES ('$code', '$name', 'http://en.wikipedia.org/wiki/$link');\n";
  35. }
  36. }
  37. $longest_link += length('http://en.wikipedia.org/wiki/');
  38. print "-- Longest name: $longest_name\n";
  39. print "-- Longest link: $longest_link\n";
  40. print "\n";
  41. print "-- This needs to be piped into the postgresql command line client.\n";
  42. print "-- e.g. ./countrycodes.pl | psql librefm\n";