123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318 |
- #!/usr/bin/perl -w
- #
- # hls-fetch - Download and decrypt HTTP Live Streaming videos.
- # Copyright (C) 2012 Oskar Liljeblad
- # Copyright (C) 2015, 2016 Desktopd Project
- #
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
- #
- use strict;
- use Getopt::Long;
- use HTML::Parser;
- use LWP::UserAgent;
- use JSON;
- use File::Temp qw(tempfile);
- use URI::URL;
- use constant READ_SIZE => 1024;
- my %opt = ('bandwidth' => 'max', 'user-agent' => 'Mozilla/5.0 (compatible; hls-fetch/1.0)');
- Getopt::Long::GetOptions(\%opt, 'embedded', 'svtplay', 'playlist', 'output|o=s', 'bandwidth|b=s', 'quiet|q', 'force|f', 'verbose|v', 'no-decrypt', 'retry', 'user-agent=s', 'resume-at=i', 'version', 'help') || exit 1;
- if ($opt{'version'}) {
- print "hls-fetch 0.2\n";
- print "Copyright (C) 2012 Oskar Liljeblad\n";
- print "Copyright (C) 2015-2016 Desktopd Project\n";
- print "License GPLv3+: GNU GPL version 3 or later <https://gnu.org/licenses/gpl.html>.\n";
- print "This is free software: you are free to change and redistribute it.\n";
- print "There is NO WARRANTY, to the extent permitted by law.\n\n";
- print "Written by Oskar Liljeblad.\n";
- exit 0;
- }
- if ($opt{'help'}) {
- print "Usage: hls-fetch [OPTION]... URL\n";
- print "Download and decrypt videos served by the HTTP Live Streaming (HLS) protocol.\n\n";
- print " --embedded URL refers to a page with <video> tag (default)\n";
- print " --playlist URL refers to an M3U (m3u8) playlist\n";
- print " --svtplay URL refers to an SVT Play page (svtplay.se)\n";
- print " -o, --output=FILE save video to FILE rather than \"video.ts\"\n";
- print " -f, --force force overwriting existing output file\n";
- print " -b, --bandwidth=SPEC pick video with specified bandwidth (bits/s),\n";
- print " lowest (\"min\") or highest (\"max\") (default max)\n";
- print " -v, --verbose explain what is being done\n";
- print " -q, --quiet no output other than errors\n";
- print " --no-decrypt skip decryption even if stream should be decrypted\n";
- print " --retry retry connection if a network problem is detected\n";
- print " --user-agent=UA use this string as HTTP user agent\n";
- print " --resume-at=N resume at the Nth segment\n";
- print " --help display this help and exit\n";
- print " --version output version information and exit\n";
- print "\nDecryption requires openssl.\n";
- print "\nReport bugs to Oskar Liljeblad <oskar\@osk.mine.nu>.\n";
- exit 0;
- }
- die "--embedded, --playlist and --svtplay are mutually exclusive\n" if (scalar grep { defined } @opt{'svtplay', 'playlist', 'embedded'}) > 1;
- $opt{'embedded'} = 1 if !$opt{'playlist'} && !$opt{'svtplay'};
- die "--verbose and --quiet are mutually exclusive\n" if $opt{'verbose'} && $opt{'quiet'};
- die "non-numeric --bandwidth specified\n" if $opt{'bandwidth'} !~ /^(min|max|\d+)$/;
- die "missing URL operand\n" if !@ARGV;
- if (!exists $opt{'output'}) {
- $opt{'output'} = 'video.ts';
- warn "no output file specified, assuming video.ts\n" if !$opt{'quiet'};
- }
- # switch statement
- # Mimicking iOS magically allows successful downloads in many cases
- while (1) {
- if ("ipad" eq $opt{'user-agent'}) {
- $opt{'user-agent'} = 'Mozilla/5.0 (iPad; CPU OS 9_3_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13F69 Safari/601.1';
- last;
- }
-
- if ("iphone" eq $opt{'user-agent'}) {
- $opt{'user-agent'} = 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_3_2 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13F69 Safari/601.1';
- last;
- }
-
- last;
- }
- my ($url) = @ARGV;
- my $browser = LWP::UserAgent->new;
- $browser->cookie_jar({});
- $browser->agent($opt{'user-agent'});
- my $video_file = $opt{'output'};
- die "$video_file: file exists, not overwriting without -f/--force\n" if !$opt{'force'} && -e $video_file;
- open(my $video_fh, '>', $video_file) || die "$video_file: cannot open file: $!\n";
- if ($opt{'svtplay'}) {
- my $data = eval { fetch_url($url) }; die "$url: cannot fetch page: $@" if $@;
- my $parser = HTML::Parser->new(api_version => 3, start_h => [\&handle_svtplay_tag, 'tagname,@attr']);
- my ($json_path, $json_title);
- sub handle_svtplay_tag {
- my ($tag, %attr) = @_;
- if ($tag eq 'a' && exists $attr{'id'} && $attr{'id'} =~ /^player($|_)/ && exists $attr{'data-json-href'} && exists $attr{'data-title'}) {
- $json_path = $attr{'data-json-href'};
- $json_title = $attr{'data-title'};
- }
- }
- $parser->parse($data);
- die "$url: cannot find any video on page\n" if !defined $json_path;
- print "Title: $json_title\n" if $opt{'verbose'};
- my $json_url = url($json_path, $url)->abs;
- $json_url->query('output=json');
- $json_url = $json_url->as_string();
- print "URL (JSON): $json_url\n" if $opt{'verbose'};
- $data = eval { fetch_url($json_url) }; die "$json_url: cannot fetch JSON data: $@" if $@;
- my $json = decode_json($data) // die "$json_url: cannot parse JSON data\n";
- die "$json_url: invalid JSON data\n" if !exists $json->{'video'}->{'videoReferences'}->[0];
- ($url) = map { $_->{'url'} } grep { $_->{'playerType'} eq 'ios' } @{$json->{'video'}->{'videoReferences'}};
- die "$json_url: missing video URL for 'ios' type in JSON data\n" if !defined $url;
- print "URL (master): $url\n" if $opt{'verbose'};
- }
- elsif ($opt{'embedded'}) {
- print "URL (embedded): $url\n" if $opt{'verbose'};
- my $data = eval { fetch_url($url) }; die "$url: cannot fetch page: $@" if $@;
- my $parser = HTML::Parser->new(api_version => 3, start_h => [\&handle_playlist_tag, 'tagname,@attr']);
- my $index_url;
- sub handle_playlist_tag {
- my ($tag, %attr) = @_;
- if ($tag eq 'video') {
- $index_url = $attr{'src'} if exists $attr{'src'};
- } elsif ($tag eq 'source') {
- $index_url = $attr{'src'} if exists $attr{'src'} && exists $attr{'type'} && $attr{'type'} eq 'application/vnd.apple.mpegurl';
- }
- }
- $parser->parse($data);
- if (((!defined $index_url) || "" eq $index_url) && $data =~ /(https?:.+\.m3u8)/) {
- $index_url = $1;
- }
- die "$url: cannot find any video on page\n" if !defined $index_url;
- $url = url($index_url, $url)->abs()->as_string();
- print "URL (master): $url\n" if $opt{'verbose'};
- }
- my $data = eval { fetch_url($url) }; die "$url: cannot fetch playlist: $@" if $@;
- my @lines = split(/\r*\n|\r\n*/, $data);
- die "$url: invalid playlist, no header\n" if @lines < 1 || $lines[0] ne '#EXTM3U';
- if (!grep { /^#EXTINF:/ } @lines) {
- my (@streams, $last_stream);
- foreach my $line (@lines) {
- if ($line =~ /^#EXT-X-STREAM-INF:(.*)$/) {
- $last_stream = { parse_m3u_attribs($url, $1) };
- push @streams, $last_stream;
- } elsif ($line !~ /^#EXT/) {
- die "$url: missing #EXT-X-STREAM-INF for URL: $line\n" if !defined $last_stream;
- $last_stream->{'URL'} = $line;
- $last_stream = undef;
- }
- }
- die "$url: no streams found in playlist\n" if !@streams;
- warn "$url: non-numeric bandwidth in playlist\n" if grep { $_->{'BANDWIDTH'} =~ /\D/ } @streams;
- my @bandwidths = sort { $a <=> $b } grep { /^\d+$/ } map { $_->{'BANDWIDTH'} } @streams;
- print "Bandwidths: ", join(', ', @bandwidths), "\n" if $opt{'verbose'};
- my $stream;
- if ($opt{'bandwidth'} eq 'min') {
- ($stream) = grep { $_->{'BANDWIDTH'} == $bandwidths[0] } @streams;
- } elsif ($opt{'bandwidth'} eq 'max') {
- ($stream) = grep { $_->{'BANDWIDTH'} == $bandwidths[-1] } @streams;
- } else {
- ($stream) = grep { $opt{'bandwidth'} == $_->{'BANDWIDTH'} } @streams;
- die "$url: no streams with bandwidth $opt{'bandwidth'} in playlist\n" if !defined $stream;
- }
- print "Bandwidth (selected): $stream->{'BANDWIDTH'}\n" if $opt{'verbose'};
- $url = url($stream->{'URL'}, $url)->abs()->as_string();
- print "URL (index): $url\n" if $opt{'verbose'};
- $data = eval { fetch_url($url) }; die "$url: cannot fetch playlist: $@" if $@;
- @lines = split(/\r?\n/, $data);
- die "$url: invalid playlist, no header\n" if @lines < 1 || $lines[0] ne '#EXTM3U';
- }
- my $sequence = 0;
- my (%segments, $cryptkey_url);
- foreach my $line (@lines) {
- if ($line =~ /^#EXT-X-MEDIA-SEQUENCE:(\d+)$/) {
- $sequence = $1;
- print "First sequence number: $sequence\n" if $opt{'verbose'};
- } elsif ($line =~ /^#EXT-X-KEY:(.*)$/) {
- my %attr = parse_m3u_attribs($url, $1);
- die "$url: unsupported encryption method $attr{'METHOD'} in playlist\n" if exists $attr{'METHOD'} && $attr{'METHOD'} ne 'AES-128';
- $cryptkey_url = $attr{'URI'};
- die "$url: missing encryption key URI in playlist\n" if !defined $cryptkey_url;
- } elsif ($line !~ /^#EXT/) {
- $segments{$sequence} = { 'url' => $line, 'cryptkey_url' => $cryptkey_url };
- $sequence++;
- }
- }
- die "$url: no segments in playlist\n" if !scalar keys %segments;
- my %cryptkeys;
- #my $cryptkey;
- #if (defined $cryptkey_url) {
- # print "URL (key): $cryptkey_url\n" if $opt{'verbose'};
- # $cryptkey = eval { fetch_url($cryptkey_url) }; die "$cryptkey_url: cannot fetch encryption key: $@" if $@;
- # $cryptkey = join('', map { sprintf('%02x', ord) } split(//, $cryptkey));
- # print "Key: $cryptkey\n" if $opt{'verbose'};
- #}
- print "Segments: ", scalar keys %segments, "\n" if $opt{'verbose'};
- $| = 1;
- foreach my $sequence (sort { $a <=> $b } keys %segments) {
- if ($opt{'resume-at'} && $sequence < $opt{'resume-at'}) {
- next;
- }
-
- my $segment = $segments{$sequence};
- my $segment_url = url($segment->{'url'}, $url)->abs()->as_string();
- print "URL (segment $sequence/", scalar keys %segments, "): $segment_url\n" if $opt{'verbose'};
- printf "\r%d/%d", $sequence, scalar keys %segments if !$opt{'quiet'} && !$opt{'verbose'};
- if (!$opt{'no-decrypt'} && defined $segment->{'cryptkey_url'} && !exists $cryptkeys{$segment->{'cryptkey_url'}}) {
- print "URL (key): ", $segment->{'cryptkey_url'}, "\n" if $opt{'verbose'};
- my $cryptkey = eval { fetch_url($segment->{'cryptkey_url'}) };
- die "$segment->{'cryptkey_url'}: cannot fetch encryption key: $@" if $@;
- $cryptkey = join('', map { sprintf('%02x', ord) } split(//, $cryptkey));
- print "Key: $cryptkey\n" if $opt{'verbose'};
- $cryptkeys{$segment->{'cryptkey_url'}} = $cryptkey;
- }
- my ($segment_fh, $segment_file) = tempfile();
- close $segment_fh;
- eval {
- eval { fetch_url($segment_url, $segment_file) }; die "$segment_url: cannot not fetch segment: $@" if $@;
- if (!$opt{'no-decrypt'} && defined $segment->{'cryptkey_url'}) {
- my ($decrypt_fh, $decrypt_file) = tempfile();
- close $decrypt_fh;
- my $iv = sprintf('%032x', $sequence);
- my @cmd = ('openssl', 'aes-128-cbc', '-d', '-in', $segment_file, '-out', $decrypt_file, '-K', $cryptkeys{$segment->{'cryptkey_url'}}, '-iv', $iv);
- system @cmd;
- unlink $segment_file || warn "$segment_file: cannot remove file: $!\n";
- $segment_file = $decrypt_file;
- die "$segment_file: openssl failed (status $?)\n" if $? != 0;
- }
- open ($segment_fh, '<', $segment_file) || die "$segment_file: cannot open file: $!\n";
- for (;;) {
- my $size = sysread($segment_fh, $data, READ_SIZE);
- die "$segment_file: cannot read from file: $!\n" if !defined $size;
- last if $size == 0;
- die "$video_file: cannot write to file: $!\n" if !defined syswrite($video_fh, $data);
- }
- close $segment_fh;
- };
- unlink $segment_file || warn "$segment_file: cannot remove file: $!\n";
- die $@ if $@;
- }
- close $video_fh;
- sub parse_m3u_attribs {
- my ($url, $attr_str) = @_;
- my %attr;
- for (my $as = $attr_str; $as ne ''; ) {
- $as =~ s/^?([^=]*)=([^,"]*|"[^"]*")\s*(,\s*|$)// or die "$url: invalid attributes in playlist: $attr_str\n";
- my ($key, $val) = ($1, $2);
- $val =~ s/^"(.*)"$/$1/;
- $attr{$key} = $val;
- }
- return %attr;
- }
- sub fetch_url {
- my ($url, $filename) = @_;
- my $max_retries = 10;
- my $retry_delay = 2;
- my $retry_delay_factor = 2;
- my $count = 0;
- while (1) {
- my $response;
- if (defined $filename) {
- $response = $browser->get($url, ":content_file" => $filename);
- #die $response->status_line(), "\n" if !$response->is_success;
- return undef if $response->is_success;
- } else {
- $response = $browser->get($url);
- #die $response->status_line(), "\n" if !$response->is_success;
- return $response->decoded_content() if $response->is_success;
- }
-
- if (500 != $response->code && 503 != $response->code || $response->message =~ /Server Error/i || (!$opt{'retry'})) {
- # Server explicitly failed, should not blindly retry
- die $response->status_line(), "\n";
- }
-
- print $response->status_line(), "\n" if !$opt{'quiet'};
- # Nasty connection problems, should retry
- if ($count < $max_retries) {
- print "Sleeping for $retry_delay secs before retry\n" if $opt{'verbose'};
- sleep $retry_delay;
- $retry_delay = $retry_delay * $retry_delay_factor;
- $count++;
- print "Retrying HTTP request: $url\n" if $opt{'verbose'};
- next;
- }
-
- die "Maximum number of retries reached: is the resource available at the specified URI?\n";
- }
- }
|