sub_renamer.pl 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. #!/usr/bin/perl
  2. # Author: Daniel "Trizen" Șuteu
  3. # Date: 1st December 2014
  4. # License: GPLv3
  5. # https://github.com/trizen
  6. use utf8;
  7. use 5.014;
  8. use strict;
  9. use warnings;
  10. use Encode qw(decode_utf8);
  11. use File::Find qw(find);
  12. use Getopt::Long qw(GetOptions);
  13. binmode(STDOUT, ':utf8');
  14. my $rename = 0;
  15. my $single_file = 0;
  16. my $min_percentage = 50;
  17. sub help {
  18. my ($code) = @_;
  19. print <<"HELP";
  20. Rename subtitles to match the video files
  21. usage: $0 /my/videos [...]
  22. options:
  23. -r --rename : rename the file names (default: $rename)
  24. -s --single-file : one video and one subtitle in a dir (default: $single_file)
  25. -p --percentage=i : minimum percentage of approximation (default: $min_percentage)
  26. Match subtitles to video names across directories and rename them accordingly.
  27. The match is done heuristically, using an approximation comparison algorithm.
  28. When there are more subtitles and more videos inside a directory, the script
  29. makes decisions based on the filename approximations and rename the file
  30. if they are at least 50% similar. (this percent is customizable)
  31. The script has, also, several special cases for serials (S00E00)
  32. and for single video files with one subtitle in the same directory.
  33. Usage example:
  34. $0 -s -p=75 ~/Videos
  35. Copyright (C) 2014 Daniel "Trizen" Șuteu <trizenx\@gmail\.com>
  36. License: GPLv3 or later, at your choice. See <https://www.gnu.org/licenses/gpl>
  37. HELP
  38. exit($code // 0);
  39. }
  40. GetOptions(
  41. 'p|percentage=i' => \$min_percentage,
  42. 'r|rename!' => \$rename,
  43. 's|single-file!' => \$single_file,
  44. 'h|help' => sub { help() },
  45. )
  46. or die("Error in command line arguments");
  47. my @dirs = grep { -d } @ARGV;
  48. @dirs || help(2);
  49. # Source: https://en.wikipedia.org/wiki/Video_file_format
  50. my @video_formats = qw(
  51. avi
  52. mp4
  53. wmv
  54. mkv
  55. webm
  56. flv
  57. ogv
  58. ogg
  59. drc
  60. mng
  61. mov
  62. qt
  63. rm
  64. rmvb
  65. asf
  66. m4p
  67. m4v
  68. mpg
  69. mp2
  70. mpeg
  71. mpe
  72. mpv
  73. m4v
  74. 3gp
  75. 3g2
  76. mxf
  77. roq
  78. nsv
  79. yuv
  80. );
  81. # Source: https://en.wikipedia.org/wiki/Subtitle_%28captioning%29#Subtitle_formats
  82. my @subtitle_formats = qw(
  83. aqt
  84. gsub
  85. jss
  86. sub
  87. ttxt
  88. pjs
  89. psb
  90. rt
  91. smi
  92. stl
  93. ssf
  94. srt
  95. ssa
  96. ass
  97. usf
  98. );
  99. sub acmp {
  100. my ($name1, $name2, $percentage) = @_;
  101. my ($len1, $len2) = (length($name1), length($name2));
  102. if ($len1 > $len2) {
  103. ($name2, $len2, $name1, $len1) = ($name1, $len1, $name2, $len2);
  104. }
  105. return -1
  106. if (my $min = int($len2 * $percentage / 100)) > $len1;
  107. my $diff = $len1 - $min;
  108. foreach my $i (0 .. $diff) {
  109. foreach my $j ($i .. $diff) {
  110. if (index($name2, substr($name1, $i, $min + $j - $i)) != -1) {
  111. return 0;
  112. }
  113. }
  114. }
  115. return 1;
  116. }
  117. my $videos_re = do {
  118. local $" = '|';
  119. qr/\.(?:@video_formats)\z/i;
  120. };
  121. my $subs_re = do {
  122. local $" = '|';
  123. qr/\.(?:@subtitle_formats)\z/i;
  124. };
  125. my $serial_re = qr/S([0-9]{2,})E([0-9]{2,})/;
  126. if (not $rename) {
  127. warn "\n[!] To actually rename the files, execute me with option '-r'.\n\n";
  128. }
  129. my %content;
  130. find {
  131. no_chdir => 0,
  132. wanted => sub {
  133. if (/$videos_re/) {
  134. my $name = decode_utf8($_) =~ s/$videos_re//r;
  135. push @{$content{$File::Find::dir}{videos}{$name}}, decode_utf8($File::Find::name);
  136. }
  137. elsif (/$subs_re/) {
  138. my $name = decode_utf8($_) =~ s/$subs_re//r;
  139. push @{$content{$File::Find::dir}{subs}{$name}}, decode_utf8($File::Find::name);
  140. }
  141. },
  142. } => @dirs;
  143. sub ilc {
  144. my ($string) = @_;
  145. $string =~ s/[[:punct:]]+/ /g;
  146. $string = join(' ', split(' ', $string));
  147. lc($string);
  148. }
  149. foreach my $dir (sort keys %content) {
  150. my $subs = $content{$dir}{subs} // next;
  151. my $videos = $content{$dir}{videos} // next;
  152. # Make a table with scores and rename the subtitles
  153. # accordingly to each video it belongs (using heuristics)
  154. my (%table, %seen, %subs_taken);
  155. my @subs = sort keys %{$subs};
  156. my @videos = sort keys %{$videos};
  157. my %memo;
  158. foreach my $sub (@subs) {
  159. foreach my $video (@videos) {
  160. PERCENT: for (my $i = 100 ; $i >= $min_percentage ; $i--) {
  161. # Break if subtitle has the same name as video
  162. # and mark it as already taken.
  163. if ($sub eq $video) {
  164. $subs_taken{$sub}++;
  165. last;
  166. }
  167. if (acmp($memo{$sub} //= ilc($sub), $memo{$video} //= ilc($video), $i) == 0) {
  168. # A subtitle can't be shared with more videos
  169. if (exists $seen{$sub}) {
  170. foreach my $key (@{$seen{$sub}}) {
  171. if (@{$table{$key}}) {
  172. if ($i > $table{$key}[-1][1]) {
  173. pop @{$table{$key}};
  174. }
  175. else {
  176. last PERCENT;
  177. }
  178. }
  179. }
  180. }
  181. push @{$table{$video}}, [$sub, $i];
  182. push @{$seen{$sub}}, $video;
  183. last;
  184. }
  185. }
  186. }
  187. }
  188. if (@subs == 1 and @videos == 1 and not keys %table) {
  189. my ($sub, $video) = (@subs, @videos);
  190. next if $sub eq $video;
  191. $table{$video} = [[$sub, 0]];
  192. }
  193. # Rename the files
  194. foreach my $video (sort keys %table) {
  195. @{$table{$video}} || next;
  196. my ($sub, $percentage) = @{(sort { $b->[1] <=> $a->[1] } @{$table{$video}})[0]};
  197. next if exists $subs_taken{$sub};
  198. foreach my $subfile (@{$subs->{$sub}}) {
  199. # If it is a serial (SxxExx)
  200. # skip if subtitle contains a serial number
  201. # that is different from that of the video.
  202. if ($video =~ /$serial_re/) {
  203. my ($vs, $ve) = ($1, $2);
  204. if ($sub =~ /$serial_re/) {
  205. my ($ss, $se) = ($1, $2);
  206. if ($vs ne $ss or $ve ne $se) {
  207. next;
  208. }
  209. }
  210. }
  211. my $new_name = $subfile =~ s/\Q$sub\E(?=$subs_re)/$video/r;
  212. say "** Renaming: $subfile -> $new_name ($percentage%)";
  213. # Skip file if the current percentage is lower than the minimum percentage
  214. if ($percentage < $min_percentage) {
  215. if (@subs == 1 and @videos == 1) {
  216. if (not $single_file) {
  217. warn "\t[!] I will rename this if you execute me with option '-s'.\n";
  218. next;
  219. }
  220. }
  221. else { # this will not happen
  222. warn "\t[!] Percentage is lower than $min_percentage%. Skipping file...\n";
  223. next;
  224. }
  225. }
  226. # Rename the file (if rename is enabled)
  227. if ($rename) {
  228. if (-e $new_name) {
  229. warn "\t[!] File already exists... Skipping...\n";
  230. next;
  231. }
  232. rename($subfile, $new_name)
  233. || warn "\t[!] Can't rename file: $!\n";
  234. }
  235. }
  236. }
  237. }