hls-fetch 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318
  1. #!/usr/bin/perl -w
  2. #
  3. # hls-fetch - Download and decrypt HTTP Live Streaming videos.
  4. # Copyright (C) 2012 Oskar Liljeblad
  5. # Copyright (C) 2015, 2016 Desktopd Project
  6. #
  7. # This program is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, either version 3 of the License, or
  10. # (at your option) any later version.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License
  18. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  19. #
  20. use strict;
  21. use Getopt::Long;
  22. use HTML::Parser;
  23. use LWP::UserAgent;
  24. use JSON;
  25. use File::Temp qw(tempfile);
  26. use URI::URL;
  27. use constant READ_SIZE => 1024;
  28. my %opt = ('bandwidth' => 'max', 'user-agent' => 'Mozilla/5.0 (compatible; hls-fetch/1.0)');
  29. Getopt::Long::GetOptions(\%opt, 'embedded', 'svtplay', 'playlist', 'output|o=s', 'bandwidth|b=s', 'quiet|q', 'force|f', 'verbose|v', 'no-decrypt', 'retry', 'user-agent=s', 'resume-at=i', 'version', 'help') || exit 1;
  30. if ($opt{'version'}) {
  31. print "hls-fetch 0.2\n";
  32. print "Copyright (C) 2012 Oskar Liljeblad\n";
  33. print "Copyright (C) 2015-2016 Desktopd Project\n";
  34. print "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.\n";
  35. print "This is free software: you are free to change and redistribute it.\n";
  36. print "There is NO WARRANTY, to the extent permitted by law.\n\n";
  37. print "Written by Oskar Liljeblad.\n";
  38. exit 0;
  39. }
  40. if ($opt{'help'}) {
  41. print "Usage: hls-fetch [OPTION]... URL\n";
  42. print "Download and decrypt videos served by the HTTP Live Streaming (HLS) protocol.\n\n";
  43. print " --embedded URL refers to a page with <video> tag (default)\n";
  44. print " --playlist URL refers to an M3U (m3u8) playlist\n";
  45. print " --svtplay URL refers to an SVT Play page (svtplay.se)\n";
  46. print " -o, --output=FILE save video to FILE rather than \"video.ts\"\n";
  47. print " -f, --force force overwriting existing output file\n";
  48. print " -b, --bandwidth=SPEC pick video with specified bandwidth (bits/s),\n";
  49. print " lowest (\"min\") or highest (\"max\") (default max)\n";
  50. print " -v, --verbose explain what is being done\n";
  51. print " -q, --quiet no output other than errors\n";
  52. print " --no-decrypt skip decryption even if stream should be decrypted\n";
  53. print " --retry retry connection if a network problem is detected\n";
  54. print " --user-agent=UA use this string as HTTP user agent\n";
  55. print " --resume-at=N resume at the Nth segment\n";
  56. print " --help display this help and exit\n";
  57. print " --version output version information and exit\n";
  58. print "\nDecryption requires openssl.\n";
  59. print "\nReport bugs to Oskar Liljeblad <oskar\@osk.mine.nu>.\n";
  60. exit 0;
  61. }
  62. die "--embedded, --playlist and --svtplay are mutually exclusive\n" if (scalar grep { defined } @opt{'svtplay', 'playlist', 'embedded'}) > 1;
  63. $opt{'embedded'} = 1 if !$opt{'playlist'} && !$opt{'svtplay'};
  64. die "--verbose and --quiet are mutually exclusive\n" if $opt{'verbose'} && $opt{'quiet'};
  65. die "non-numeric --bandwidth specified\n" if $opt{'bandwidth'} !~ /^(min|max|\d+)$/;
  66. die "missing URL operand\n" if !@ARGV;
  67. if (!exists $opt{'output'}) {
  68. $opt{'output'} = 'video.ts';
  69. warn "no output file specified, assuming video.ts\n" if !$opt{'quiet'};
  70. }
  71. # switch statement
  72. # Mimicking iOS magically allows successful downloads in many cases
  73. while (1) {
  74. if ("ipad" eq $opt{'user-agent'}) {
  75. $opt{'user-agent'} = 'Mozilla/5.0 (iPad; CPU OS 9_3_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13F69 Safari/601.1';
  76. last;
  77. }
  78. if ("iphone" eq $opt{'user-agent'}) {
  79. $opt{'user-agent'} = 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13F69 Safari/601.1';
  80. last;
  81. }
  82. last;
  83. }
  84. my ($url) = @ARGV;
  85. my $browser = LWP::UserAgent->new;
  86. $browser->cookie_jar({});
  87. $browser->agent($opt{'user-agent'});
  88. my $video_file = $opt{'output'};
  89. die "$video_file: file exists, not overwriting without -f/--force\n" if !$opt{'force'} && -e $video_file;
  90. open(my $video_fh, '>', $video_file) || die "$video_file: cannot open file: $!\n";
  91. if ($opt{'svtplay'}) {
  92. my $data = eval { fetch_url($url) }; die "$url: cannot fetch page: $@" if $@;
  93. my $parser = HTML::Parser->new(api_version => 3, start_h => [\&handle_svtplay_tag, 'tagname,@attr']);
  94. my ($json_path, $json_title);
  95. sub handle_svtplay_tag {
  96. my ($tag, %attr) = @_;
  97. if ($tag eq 'a' && exists $attr{'id'} && $attr{'id'} =~ /^player($|_)/ && exists $attr{'data-json-href'} && exists $attr{'data-title'}) {
  98. $json_path = $attr{'data-json-href'};
  99. $json_title = $attr{'data-title'};
  100. }
  101. }
  102. $parser->parse($data);
  103. die "$url: cannot find any video on page\n" if !defined $json_path;
  104. print "Title: $json_title\n" if $opt{'verbose'};
  105. my $json_url = url($json_path, $url)->abs;
  106. $json_url->query('output=json');
  107. $json_url = $json_url->as_string();
  108. print "URL (JSON): $json_url\n" if $opt{'verbose'};
  109. $data = eval { fetch_url($json_url) }; die "$json_url: cannot fetch JSON data: $@" if $@;
  110. my $json = decode_json($data) // die "$json_url: cannot parse JSON data\n";
  111. die "$json_url: invalid JSON data\n" if !exists $json->{'video'}->{'videoReferences'}->[0];
  112. ($url) = map { $_->{'url'} } grep { $_->{'playerType'} eq 'ios' } @{$json->{'video'}->{'videoReferences'}};
  113. die "$json_url: missing video URL for 'ios' type in JSON data\n" if !defined $url;
  114. print "URL (master): $url\n" if $opt{'verbose'};
  115. }
  116. elsif ($opt{'embedded'}) {
  117. print "URL (embedded): $url\n" if $opt{'verbose'};
  118. my $data = eval { fetch_url($url) }; die "$url: cannot fetch page: $@" if $@;
  119. my $parser = HTML::Parser->new(api_version => 3, start_h => [\&handle_playlist_tag, 'tagname,@attr']);
  120. my $index_url;
  121. sub handle_playlist_tag {
  122. my ($tag, %attr) = @_;
  123. if ($tag eq 'video') {
  124. $index_url = $attr{'src'} if exists $attr{'src'};
  125. } elsif ($tag eq 'source') {
  126. $index_url = $attr{'src'} if exists $attr{'src'} && exists $attr{'type'} && $attr{'type'} eq 'application/vnd.apple.mpegurl';
  127. }
  128. }
  129. $parser->parse($data);
  130. if (((!defined $index_url) || "" eq $index_url) && $data =~ /(https?:.+\.m3u8)/) {
  131. $index_url = $1;
  132. }
  133. die "$url: cannot find any video on page\n" if !defined $index_url;
  134. $url = url($index_url, $url)->abs()->as_string();
  135. print "URL (master): $url\n" if $opt{'verbose'};
  136. }
  137. my $data = eval { fetch_url($url) }; die "$url: cannot fetch playlist: $@" if $@;
  138. my @lines = split(/\r*\n|\r\n*/, $data);
  139. die "$url: invalid playlist, no header\n" if @lines < 1 || $lines[0] ne '#EXTM3U';
  140. if (!grep { /^#EXTINF:/ } @lines) {
  141. my (@streams, $last_stream);
  142. foreach my $line (@lines) {
  143. if ($line =~ /^#EXT-X-STREAM-INF:(.*)$/) {
  144. $last_stream = { parse_m3u_attribs($url, $1) };
  145. push @streams, $last_stream;
  146. } elsif ($line !~ /^#EXT/) {
  147. die "$url: missing #EXT-X-STREAM-INF for URL: $line\n" if !defined $last_stream;
  148. $last_stream->{'URL'} = $line;
  149. $last_stream = undef;
  150. }
  151. }
  152. die "$url: no streams found in playlist\n" if !@streams;
  153. warn "$url: non-numeric bandwidth in playlist\n" if grep { $_->{'BANDWIDTH'} =~ /\D/ } @streams;
  154. my @bandwidths = sort { $a <=> $b } grep { /^\d+$/ } map { $_->{'BANDWIDTH'} } @streams;
  155. print "Bandwidths: ", join(', ', @bandwidths), "\n" if $opt{'verbose'};
  156. my $stream;
  157. if ($opt{'bandwidth'} eq 'min') {
  158. ($stream) = grep { $_->{'BANDWIDTH'} == $bandwidths[0] } @streams;
  159. } elsif ($opt{'bandwidth'} eq 'max') {
  160. ($stream) = grep { $_->{'BANDWIDTH'} == $bandwidths[-1] } @streams;
  161. } else {
  162. ($stream) = grep { $opt{'bandwidth'} == $_->{'BANDWIDTH'} } @streams;
  163. die "$url: no streams with bandwidth $opt{'bandwidth'} in playlist\n" if !defined $stream;
  164. }
  165. print "Bandwidth (selected): $stream->{'BANDWIDTH'}\n" if $opt{'verbose'};
  166. $url = url($stream->{'URL'}, $url)->abs()->as_string();
  167. print "URL (index): $url\n" if $opt{'verbose'};
  168. $data = eval { fetch_url($url) }; die "$url: cannot fetch playlist: $@" if $@;
  169. @lines = split(/\r?\n/, $data);
  170. die "$url: invalid playlist, no header\n" if @lines < 1 || $lines[0] ne '#EXTM3U';
  171. }
  172. my $sequence = 0;
  173. my (%segments, $cryptkey_url);
  174. foreach my $line (@lines) {
  175. if ($line =~ /^#EXT-X-MEDIA-SEQUENCE:(\d+)$/) {
  176. $sequence = $1;
  177. print "First sequence number: $sequence\n" if $opt{'verbose'};
  178. } elsif ($line =~ /^#EXT-X-KEY:(.*)$/) {
  179. my %attr = parse_m3u_attribs($url, $1);
  180. die "$url: unsupported encryption method $attr{'METHOD'} in playlist\n" if exists $attr{'METHOD'} && $attr{'METHOD'} ne 'AES-128';
  181. $cryptkey_url = $attr{'URI'};
  182. die "$url: missing encryption key URI in playlist\n" if !defined $cryptkey_url;
  183. } elsif ($line !~ /^#EXT/) {
  184. $segments{$sequence} = { 'url' => $line, 'cryptkey_url' => $cryptkey_url };
  185. $sequence++;
  186. }
  187. }
  188. die "$url: no segments in playlist\n" if !scalar keys %segments;
  189. my %cryptkeys;
  190. #my $cryptkey;
  191. #if (defined $cryptkey_url) {
  192. # print "URL (key): $cryptkey_url\n" if $opt{'verbose'};
  193. # $cryptkey = eval { fetch_url($cryptkey_url) }; die "$cryptkey_url: cannot fetch encryption key: $@" if $@;
  194. # $cryptkey = join('', map { sprintf('%02x', ord) } split(//, $cryptkey));
  195. # print "Key: $cryptkey\n" if $opt{'verbose'};
  196. #}
  197. print "Segments: ", scalar keys %segments, "\n" if $opt{'verbose'};
  198. $| = 1;
  199. foreach my $sequence (sort { $a <=> $b } keys %segments) {
  200. if ($opt{'resume-at'} && $sequence < $opt{'resume-at'}) {
  201. next;
  202. }
  203. my $segment = $segments{$sequence};
  204. my $segment_url = url($segment->{'url'}, $url)->abs()->as_string();
  205. print "URL (segment $sequence/", scalar keys %segments, "): $segment_url\n" if $opt{'verbose'};
  206. printf "\r%d/%d", $sequence, scalar keys %segments if !$opt{'quiet'} && !$opt{'verbose'};
  207. if (!$opt{'no-decrypt'} && defined $segment->{'cryptkey_url'} && !exists $cryptkeys{$segment->{'cryptkey_url'}}) {
  208. print "URL (key): ", $segment->{'cryptkey_url'}, "\n" if $opt{'verbose'};
  209. my $cryptkey = eval { fetch_url($segment->{'cryptkey_url'}) };
  210. die "$segment->{'cryptkey_url'}: cannot fetch encryption key: $@" if $@;
  211. $cryptkey = join('', map { sprintf('%02x', ord) } split(//, $cryptkey));
  212. print "Key: $cryptkey\n" if $opt{'verbose'};
  213. $cryptkeys{$segment->{'cryptkey_url'}} = $cryptkey;
  214. }
  215. my ($segment_fh, $segment_file) = tempfile();
  216. close $segment_fh;
  217. eval {
  218. eval { fetch_url($segment_url, $segment_file) }; die "$segment_url: cannot not fetch segment: $@" if $@;
  219. if (!$opt{'no-decrypt'} && defined $segment->{'cryptkey_url'}) {
  220. my ($decrypt_fh, $decrypt_file) = tempfile();
  221. close $decrypt_fh;
  222. my $iv = sprintf('%032x', $sequence);
  223. my @cmd = ('openssl', 'aes-128-cbc', '-d', '-in', $segment_file, '-out', $decrypt_file, '-K', $cryptkeys{$segment->{'cryptkey_url'}}, '-iv', $iv);
  224. system @cmd;
  225. unlink $segment_file || warn "$segment_file: cannot remove file: $!\n";
  226. $segment_file = $decrypt_file;
  227. die "$segment_file: openssl failed (status $?)\n" if $? != 0;
  228. }
  229. open ($segment_fh, '<', $segment_file) || die "$segment_file: cannot open file: $!\n";
  230. for (;;) {
  231. my $size = sysread($segment_fh, $data, READ_SIZE);
  232. die "$segment_file: cannot read from file: $!\n" if !defined $size;
  233. last if $size == 0;
  234. die "$video_file: cannot write to file: $!\n" if !defined syswrite($video_fh, $data);
  235. }
  236. close $segment_fh;
  237. };
  238. unlink $segment_file || warn "$segment_file: cannot remove file: $!\n";
  239. die $@ if $@;
  240. }
  241. close $video_fh;
  242. sub parse_m3u_attribs {
  243. my ($url, $attr_str) = @_;
  244. my %attr;
  245. for (my $as = $attr_str; $as ne ''; ) {
  246. $as =~ s/^?([^=]*)=([^,"]*|"[^"]*")\s*(,\s*|$)// or die "$url: invalid attributes in playlist: $attr_str\n";
  247. my ($key, $val) = ($1, $2);
  248. $val =~ s/^"(.*)"$/$1/;
  249. $attr{$key} = $val;
  250. }
  251. return %attr;
  252. }
  253. sub fetch_url {
  254. my ($url, $filename) = @_;
  255. my $max_retries = 10;
  256. my $retry_delay = 2;
  257. my $retry_delay_factor = 2;
  258. my $count = 0;
  259. while (1) {
  260. my $response;
  261. if (defined $filename) {
  262. $response = $browser->get($url, ":content_file" => $filename);
  263. #die $response->status_line(), "\n" if !$response->is_success;
  264. return undef if $response->is_success;
  265. } else {
  266. $response = $browser->get($url);
  267. #die $response->status_line(), "\n" if !$response->is_success;
  268. return $response->decoded_content() if $response->is_success;
  269. }
  270. if (500 != $response->code && 503 != $response->code || $response->message =~ /Server Error/i || (!$opt{'retry'})) {
  271. # Server explicitly failed, should not blindly retry
  272. die $response->status_line(), "\n";
  273. }
  274. print $response->status_line(), "\n" if !$opt{'quiet'};
  275. # Nasty connection problems, should retry
  276. if ($count < $max_retries) {
  277. print "Sleeping for $retry_delay secs before retry\n" if $opt{'verbose'};
  278. sleep $retry_delay;
  279. $retry_delay = $retry_delay * $retry_delay_factor;
  280. $count++;
  281. print "Retrying HTTP request: $url\n" if $opt{'verbose'};
  282. next;
  283. }
  284. die "Maximum number of retries reached: is the resource available at the specified URI?\n";
  285. }
  286. }