remove_duplicate_file_urls.php 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. #!/usr/bin/env php
  2. <?php
  3. /*
  4. * StatusNet - a distributed open-source microblogging tool
  5. * Copyright (C) 2008, 2009, StatusNet, Inc.
  6. *
  7. * This program is free software: you can redistribute it and/or modify
  8. * it under the terms of the GNU Affero General Public License as published by
  9. * the Free Software Foundation, either version 3 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU Affero General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Affero General Public License
  18. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  19. */
  20. define('INSTALLDIR', realpath(dirname(__FILE__) . '/..'));
  21. $shortoptions = 'y';
  22. $longoptions = array('yes');
  23. $helptext = <<<END_OF_HELP
  24. remove_duplicate_file_urls.php [options]
  25. Remove duplicate URL entries in the file and file_redirection tables because they for some reason were not unique.
  26. -y --yes do not wait for confirmation
  27. END_OF_HELP;
  28. require_once INSTALLDIR.'/scripts/commandline.inc';
  29. if (!have_option('y', 'yes')) {
  30. print "About to remove duplicate URL entries in file and file_redirection tables. Are you sure? [y/N] ";
  31. $response = fgets(STDIN);
  32. if (strtolower(trim($response)) != 'y') {
  33. print "Aborting.\n";
  34. exit(0);
  35. }
  36. }
  37. $file = new File();
  38. $file->query('SELECT id, url, COUNT(*) AS c FROM file GROUP BY url HAVING c > 1');
  39. print "\nFound {$file->N} URLs with duplicate entries in file table";
  40. while ($file->fetch()) {
  41. // We've got a URL that is duplicated in the file table
  42. $dupfile = new File();
  43. $dupfile->url = $file->url;
  44. if ($dupfile->find(true)) {
  45. print "\nDeleting duplicate entries in file table for URL: {$file->url} [";
  46. // Leave one of the URLs in the database by using ->find(true)
  47. // and only deleting starting with this fetch.
  48. while($dupfile->fetch()) {
  49. print ".";
  50. $dupfile->delete();
  51. }
  52. print "]\n";
  53. } else {
  54. print "\nWarning! URL suddenly disappeared from database: {$file->url}\n";
  55. }
  56. }
  57. $file = new File_redirection();
  58. $file->query('SELECT file_id, url, COUNT(*) AS c FROM file_redirection GROUP BY url HAVING c > 1');
  59. print "\nFound {$file->N} URLs with duplicate entries in file_redirection table";
  60. while ($file->fetch()) {
  61. // We've got a URL that is duplicated in the file_redirection table
  62. $dupfile = new File_redirection();
  63. $dupfile->url = $file->url;
  64. if ($dupfile->find(true)) {
  65. print "\nDeleting duplicate entries in file table for URL: {$file->url} [";
  66. // Leave one of the URLs in the database by using ->find(true)
  67. // and only deleting starting with this fetch.
  68. while($dupfile->fetch()) {
  69. print ".";
  70. $dupfile->delete();
  71. }
  72. print "]\n";
  73. } else {
  74. print "\nWarning! URL suddenly disappeared from database: {$file->url}\n";
  75. }
  76. }
  77. print "\nDONE.\n";