123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- #!/usr/bin/perl
- # Daniel "Trizen" Șuteu
- # License: GPLv3
- # Date: 04 February 2013
- # Latest edit on: 16 July 2015
- # https://github.com/trizen
- # Perl source code extractor.
- use utf8;
- use 5.018;
- use strict;
- use warnings;
- use open IO => ':utf8', ':std';
- #use lib qw(../lib);
- use Perl::Tokenizer qw(perl_tokens);
- use List::Util qw(any);
- use Getopt::Std qw(getopts);
- use Term::ANSIColor qw(color);
- my %opts;
- getopts('hnlpcNb:a:t', \%opts);
- sub usage {
- my ($code) = @_;
- print <<"HELP";
- usage: $0 [options] [types] [files]
- options:
- -b [i] : before characters
- -a [i] : after characters
- -l : print the full line
- -c : highlight the token (with -l)
- -p : print the name and position
- -n : print non-matching tokens
- -t : print the token names only
- -N : don't print a newline after the token
- types:
- Types are regular expressions.
- Example: ^operator
- ^keyword
- ^heredoc
- ^comment
- ^format
- ^backtick
- usage example: $0 -l -c regex /perl/script.pl
- $0 -l -c -n -p /perl/script.pl
- uncomment and unpod a perl script:
- $0 -N -n '^(?:pod|comment)\$' script.pl > clean_script.pl
- HELP
- exit $code;
- }
- usage(0) if $opts{h};
- my @types;
- while (@ARGV and not -f $ARGV[0]) {
- push @types, map { qr{$_} } shift @ARGV;
- }
- my $code = (
- do { local $/; <> }
- // die "usage: $0 [file]\n"
- );
- my $reset_color = color('reset');
- my $color = color('bold red on_black');
- perl_tokens {
- my ($token, $from, $to) = @_;
- if ($opts{t}) {
- say $token;
- return;
- }
- my $matches = any { $token =~ $_ } @types;
- if ($opts{n} ? !$matches : $matches) {
- if ($opts{p}) {
- print "[$token] pos($from, $to): ";
- }
- if ($opts{l} and not $token eq 'vertical_space') {
- my $beg = rindex($code, "\n", $from) + 1;
- my $end = index($code, "\n", $to);
- my $line = substr($code, $beg, ($end - $beg));
- if ($opts{c}) {
- substr($line, ($from - $beg), 0, $color);
- substr($line, ($from - $beg) + ($to - $from) + length($color), 0, $reset_color);
- }
- print $line;
- }
- else {
- if ($opts{b}) {
- print substr($code, $from - $opts{b}, $opts{b});
- }
- print substr($code, $from, ($to - $from));
- if ($opts{a}) {
- print substr($code, $to, $opts{a});
- }
- }
- print "\n" unless $opts{N};
- }
- }
- $code;
- =encoding utf8
- =head1 NAME
- pfilter - a simple token extractor.
- =head1 SYNOPSIS
- pfilter [options] [types] < [script.pl]
- Options:
- -b [i] : before characters
- -a [i] : after characters
- -l : print the full line
- -c : highlight the token (with -l)
- -p : print the name and position
- -n : print non-matching tokens
- -t : print the token names only
- -N : don't print a newline after the token
- Types:
- Types are regular expressions.
- Example: ^operator
- ^keyword
- ^heredoc
- ^comment
- ^format
- ^backtick
- For more types, see: C<perldoc Perl::Tokenizer>
- Example:
- # uncomment and unpod a Perl script:
- pfilter -N -n '^(?:pod|comment)\z' script.pl > clean_script.pl
- =head1 DESCRIPTION
- pfilter extracts tokens from a Perl script that match a given regular expression.
- =head1 AUTHOR
- Daniel "Trizen" Șuteu, E<lt>trizen@protonmail.comE<gt>
- =head1 COPYRIGHT AND LICENSE
- Copyright (C) 2015
- This library is free software; you can redistribute it and/or modify
- it under the same terms as Perl itself, either Perl version 5.22.0 or,
- at your option, any later version of Perl 5 you may have available.
- =cut
|