trizen
/
perl-scripts


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
							#!/usr/bin/perl

# Daniel "Trizen" Șuteu
# License: GPLv3
# Date: 17 April 2012
# https://github.com/trizen

# Group and list words from a wordlist that have similar ending chars

use strict;
use warnings;

use open IO => ':utf8', ':std';
use Getopt::Long qw(GetOptions);

my $min = 4;
my $max = 15;

my $min_words = 2;
my $max_words = 'inf';

my $unique = 0;

GetOptions(
           'end-min|end_min=i'     => \$min,
           'end-max|end_max=i'     => \$max,
           'group-min|group_min=i' => \$min_words,
           'group-max|group_max=i' => \$max_words,
           'unique!'               => \$unique,
          )
  or die "Error in command-line arguments!";

@ARGV or die <<"HELP";
usage: $0 [options] wordlists

options:
       --end-min=i   : minimum number of similar characters (default: $min)
       --end-max=i   : maximum number of similar characters (default: $max)
       --group-min=i : minimum number of words per group (default: $min_words)
       --group-max=i : maximum number of words per group (default: $max_words)
       --unique!     : don't use the same word in different groups (default: $unique)
HELP

--$min;    # starting with zero

foreach my $file (grep { -f } @ARGV) {
    my %table;
    open my $fh, '<', $file or do { warn "$0: can't open file $file: $!"; next };
    while (defined(my $line = <$fh>)) {
        chomp $line;

        next if (my $length = length($line)) <= $min;
        --$length;    # same as $#chars

        my @chars = split //, $line;
        for (my $i = $length - $min ; $i >= 0 ; --$i) {
            push @{$table{join q{}, @chars[$i .. $length]}}, $line;
        }
    }
    close $fh;

    my %data;
    my %seen;
    {
        local $, = "\n";
        local $\ = "\n";
        foreach my $key (
                         map  { $_->[1] }
                         sort { $b->[0] <=> $a->[0] }
                         map  { [scalar @{$table{$_}} => $_] } keys %table
          ) {
            next if length($key) > $max;
            @{$table{$key}} = do {
                my %s;
                grep { !$s{$_}++ } @{$table{$key}};
            };
            my $items = @{$table{$key}};
            next if $items < $min_words;
            next if $items > $max_words;

            if ($unique) {
                @{$table{$key}} = grep { not exists $seen{$_} } @{$table{$key}};
                @{$table{$key}} or next;
                @seen{@{$table{$key}}} = ();
            }

            #print "\e[1;46m$key\e[0m";
            print "\t\t\t==$key==";
            print @{$table{$key}};
        }
    }
}