pgimeno
/
esofiles


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
							#!/usr/bin/perl -w

# Sortle interpreter - Implements the esoteric programming language "Sortle"
# Written in 2012 by Scott Feeney aka Graue
# Last modified Mar. 12, 2012 - Optimize regex matching for groups of just dots
#
# http://esolangs.org/wiki/Sortle
# http://esolangs.org/wiki/User:Graue
#
# To the extent possible under law, the author(s) have dedicated all copyright
# and related and neighboring rights to this software to the public domain
# worldwide. This software is distributed without any warranty.
#
# You should have received a copy of the CC0 Public Domain Dedication along
# with this software. If not, see:
# http://creativecommons.org/publicdomain/zero/1.0/
#
# If this implementation disagrees with something explicitly stated in the
# language spec, it is a bug in the implementation, not the spec. Please let
# me know of any such bugs you find! You can email me via my user page on the
# wiki (linked above). Thanks.

use strict;

#STILL TO DO:
# maybe use bignums for integer calculations and conversions(?)
# test some more
# clean up debug msgs

# $verbose can be:
#  0: no debug info - only print the final expression's name (if program halts)
#  1: show state after each evaluation - useful for non-halting programs
#  2: show state AND show progress of stack during evals
#  3: all that AND show progress of regex matches
#  1000: all that AND random debugging crap
my $verbose = 0;

# Leave strings untouched, while converting integers to strings,
# with the odd requirement that in Sortle, the integer 0
# becomes the null string.
#
# All input strings should be prefixed with '"'.
# Output string is also prefixed with '"'.
#
sub sortlestring($) {
	my $s = $_[0];

	return $s if substr($s, 0, 1) eq '"';
	return '"' if $s == 0;
	return "\"$s";
}

# Convert to integer without any annoying warning about the argument
# not being numeric (just use 0 then).
# If input is a string, and it is prefixed with ", this marker will be removed
# prior to conversion.
sub toint($) {
	my $s = "$_[0]";
	$s = substr($s, 1) if substr($s, 0, 1) eq '"';
	my $x;
	if (($x) = $s =~ /^([+-]?\d+)/) {
		$x = int $x;
	} else {
		$x = 0;
	}
	return $x;
}

# return 1 if both strings are equal, 0 otherwise,
# considering the character "." in $a to be equivalent to any character in $b.
sub stringequaldot($$) {
	my ($a, $b) = @_;
	return 0 if length $a != length $b;
	for (my $x = 0; $x < length $a; $x++) {
		my $ac = substr $a, $x, 1;
		next if $ac eq ".";
		my $bc = substr $b, $x, 1;
		return 0 if $ac ne $bc;
	}
	return 1;
}

sub matchregex($$;$);

# match a verified well-formed regex against a specific string
# (no substrings)
# return: string containing captures, or undef if no match
sub matchregex($$;$) {
	my ($regex, $s, $captureall) = @_;

	if ($verbose >= 3) {
		print "    ";
		# print - if non-recursive
		print (defined($captureall) ? " " : "-");

		print "matchregex \"$regex\" \"$s\" ";
		print "capt:$captureall" if defined $captureall;
		print "\n";
	}

	# $captureall is only set when this is called recursively.
	# if not set, turn it off if regex contains () and on otherwise.
	if (!defined $captureall) {
		$captureall = $regex !~ /\(/ || 0;
	}

	# a null string is matched only by an empty regex,
	# which captures the empty string
	if ($s eq "") {
		if ($regex eq "") { return ""; }
		else { return undef; }
	}

	# otherwise, if regex is empty it's a non-match
	return undef if $regex eq "";

	# separate out any non-grouped characters at beginning of regex
	# not followed by a modifier (@ or !)
	my ($reghead) = $regex =~ /^([^([@!]+)(?![@!])/;
	if (defined $reghead) {
		my $stringhead = substr $s, 0, length $reghead;
		print "     (found reghead: $reghead vs. $stringhead)\n" if $verbose >= 1000;
		return undef if !stringequaldot($reghead, $stringhead);
		print "     (heads match)\n" if $verbose >= 1000;

		# head matches, see if the rest does
		my $tailmatch = matchregex((substr $regex, length $reghead),
			(substr $s, length $reghead), $captureall);
		return undef if !defined $tailmatch;

		# so it did match and captured $tailmatch. If capturing all,
		# prepend our part-match to that, otherwise leave it alone
		return ($captureall ? $stringhead . $tailmatch : $tailmatch);
	}

	# at the beginning of the regex,
	# we have either a group, or a single character with modifier
	# (which we can treat as a [] group with modifier).
	# first, get the group.
	my ($group) = $regex =~ /^(\[[^[]*\])/;
	($group) = $regex =~ /^(\([^(]*\))/ if !defined $group;
	$group = "[" . substr($regex, 0, 1) . "]" if !defined $group;

	# now get its modifier, if any.

	my $mod;
	my $tailidx = length $group; # string offset in regex after group

	# if group was a single character, and we added [] around it,
	# account for that.
	print "     group is >>>$group<<<\n" if $verbose >= 1000;
	print "     length of group before adjusting is $tailidx\n" if $verbose >= 1000;
	$tailidx -= 2 if index("([", substr($regex, 0, 1)) == -1;

	print "     length of group is $tailidx and length of regex is "
		. length($regex) . "\n" if $verbose >= 1000;
	if ($tailidx == length $regex) { $mod = ""; }
	elsif (substr($regex, $tailidx, 1) eq "@") {
		$mod = "@";
		$tailidx++;
	} elsif (substr($regex, $tailidx, 1) eq "!") {
		$mod = "!";
		$tailidx++;
	} else { $mod = ""; }

	# min and max number of times the group must repeat
	my $minrepeat = (($mod eq "@") ? 0 : 1);
	my $maxrepeat = (($mod eq "!") ? -999 : 1);

	# content of group:
	# if $group is "(foo)" or "[foo]", $content is "foo"
	my $content = substr $group, 1, -1;

	# match group with fewest number of repetitions that allows
	# rest of string to match (minimal munch)
	my $tailmatch = undef; # set to partial capture when tail matched

	my $matchcount; # num chars matched in the string
	if ($content =~ /^\.+$/ && $maxrepeat < 0) {
		# optimize: group is entirely dots, like the common (.)! pattern
		# with no maximum repetition count
		my $dotstep = length $content; # step size, e.g. (..)! can only
		                               # match an even number of chars
		my $dots;
		for ($dots = $minrepeat*$dotstep; $dots <= length $s; $dots += $dotstep) {

			# guaranteed the dots match... does the tail match?
			$tailmatch = matchregex(substr($regex, $tailidx),
				substr($s, $dots), $captureall);
			last if defined $tailmatch;
		}
		$matchcount = $dots if defined $tailmatch;
	} else {
		# general case
		my ($acc, $reps);
		for ($acc = "", $reps = 0;
		  $reps-1 != $maxrepeat;
		  $acc .= $content, $reps += 1) {

			print "     ACC is >>>$acc<<< REPS is $reps NEED [$minrepeat, $maxrepeat]\n" if $verbose >= 1000;
			# do we have enough repetitions yet?
			next if $reps < $minrepeat; # nope, build up $acc first

			# does the repeated group match?
			last if !stringequaldot($acc, substr($s, 0, length $acc));

			# does the tail match?
			$tailmatch = matchregex(substr($regex, $tailidx),
				substr($s, length $acc), $captureall);
			last if defined $tailmatch;
		}
		$matchcount = length $acc if defined $tailmatch;
	}

	# if tail not matched for any allowed number of repetitions,
	# then the whole regex doesn't match
	return undef if !defined $tailmatch;

	# 1. If capturing all, return what we matched here, plus the tail.
	if ($captureall) {
		return substr($s, 0, $matchcount) . $tailmatch;
	}
	# 2. Otherwise, if this group was (), return what we matched here.
	elsif (substr($group, 0, 1) eq "(") {
		return substr($s, 0, $matchcount);
	}
	# 3. Group [] and not capturing all, so return (possibly empty) tail
	#    only.
	else { return $tailmatch; }
}

# evaluate a regex (against $s OR all other expression names)
# and return string result: capture if matched, "" otherwise
# if () are not used in regex, the capture is the whole string
sub evalregex($$$$) {
	my ($regex, $s, $ip, $exprref) = @_;

	# check regex for nested groups
	die "Nested groups not allowed in regex \"$regex\""
		if $regex =~ /\([^)]*[([]/
		|| $regex =~ /\[[^\]]*[([]/;

	# or unclosed []s
	my $opencount = 0;
	$opencount++ while $regex =~ /\[/g;
	$opencount-- while $regex =~ /\]/g;
	die "Unclosed [] group in regex \"$regex\"" if $opencount != 0;

	# or unclosed or multiple ()s
	$opencount = 0;
	$opencount++ while $regex =~ /\(/g;
	die "Multiple () groups not allowed in regex \"$regex\""
		if $opencount > 1;
	$opencount-- while $regex =~ /\)/g;
	die "Unclosed () group in regex \"$regex\"" if $opencount != 0;

	if ($s ne "") {
		# Test every substring of $s, starting with one-byte
		# substrings from left to right.
		my $match = undef; # set to captured string when matched
		for (my $len = 1; $len <= length $s && !defined $match;
		  $len++) {
			for (my $start = 0; length $s - $len >= $start
			  && !defined $match; $start++) {
				$match = matchregex($regex,
					(substr $s, $start, $len));
			}
		}
		return $match || "";
	}

	# op1 is "", so search all expression names other than the current
	# expression ($ip).
	# The .pdf spec doesn't specify an order, but the earlier sortle.txt
	# spec says expression names are searched in reverse order, starting
	# with the one before the current expression.
	# So we'll do that. To start, find the index of the current ip.
	my @expnames = reverse sort keys %{ $exprref };
	my $idx;
	for ($idx = 0; $expnames[$idx] ne $ip; $idx++) {
		die "Internal error" if $idx == $#expnames;
	}

	$idx = ($idx + 1) % (scalar @expnames);
	my $match = undef; # set to captured string when matched
	while ($expnames[$idx] ne $ip && !defined $match) {
		$match = matchregex($regex, $expnames[$idx]);
		$idx = ($idx + 1) % (scalar @expnames);
	}
	return $match || "";
}

# evaluate an expression (array reference)
# and return string result. 2nd arg - ip; 3rd - reference to exprs hash
sub evalexpr($$$) {
	my @expr = @{ $_[0] };
	my $ip = $_[1];
	my $exprref = $_[2];
	my @stack = ();

	foreach my $tok (@expr) {
		if ($verbose >= 2) {
			# print stack and token
			print "  -stack:";
			foreach my $elmt (@stack) {
				print " [$elmt]";
			}
			print "\n";
			print "   token: $tok\n";
		}

		# literal string
		# Note: This preserves the " at the beginning (not end)
		# when pushing on stack. This is done because there is
		# seemingly no way to distinguish string "0" from number 0
		# in Perl (it's too eager to convert one to the other).
		# We need this because in Sortle, the number 0 coerced to a
		# string becomes not "0" but "".
		# Thus, on the stack, we would store "0" as ["0]
		# and 0 as [0], without the [].
		# toint() will remove the leading " if needed
		# (but does not require it), and sortlestring() adds the "
		# if not present.
		if (my ($s) = $tok =~ /^(".*)"$/) {

			# handle escape sequences: \ab => character 0xab
			$s =~ s/\\([[:xdigit:]]{2})/chr hex $1/ge;

			# (To be pedantic, we could check for uses of \
			#  outside a valid escape sequence, and report an
			#  error - but I'm too lazy, so that can just be
			#  considered undefined behavior.)

			push @stack, $s;
			next;
		}

		# literal number
		if ($tok =~ /^[+-]?\d+$/) {
			push @stack, toint($tok);
			next;
		}

		# must be an operator... all of which take 2 operands
		die "Stack empty" if 1+$#stack < 2;
		my $op1 = pop @stack;
		my $op2 = pop @stack;

		if (index("+*/%", $tok) != -1) {
			# operator requires and produces numbers
			$op1 = toint($op1);
			$op2 = toint($op2);

			push(@stack, $op1 + $op2) if $tok eq "+";
			push(@stack, $op1 * $op2) if $tok eq "*";
			push(@stack, int($op1 / $op2)) if $tok eq "/";
			push(@stack, $op1 % $op2) if $tok eq "%";
		} else {
			# operator requires and produces strings
			$op1 = substr sortlestring($op1), 1;
			$op2 = substr sortlestring($op2), 1;

			push(@stack, '"' . $op2 . $op1) if $tok eq "~";

			if ($tok eq "^" || $tok eq "\$") {
				if ($op1 gt $op2) { push(@stack, '"' . $op1); }
				else { push(@stack, '"' . $op2); }
			}

			push(@stack, '"' . evalregex($op2, $op1, $ip, $exprref))
				if $tok eq "?";
		}
	}

	# Need exactly one expression on stack
	if (scalar @stack != 1) {
		die "Expression left " . (scalar @stack) . " values on stack";
	}
	print "  -final stack: [$stack[0]]\n\n" if $verbose >= 2;

	# Convert the one expression to a string, if needed
	my $final = $stack[0];
	$final = sortlestring($final) if $final !~ /^"/;

	# Remove the " prefix when returning the string
	return substr($final, 1);
}

# print current state
# usage: printstate($ip, \%exprs)
sub printstate($$) {
	my $ip = $_[0];
	my %exprs = %{$_[1]};
	foreach my $key (sort keys %exprs) {
		print "*" if $ip eq $key; # expression to evaluate next
		print "$key :=";
		foreach my $x (@{ $exprs{$key} }) {
			print " $x";
		}
		print "\n";
	}
}

# run one step of the program
# usage: advancestate(\$ip, \%exprs)
sub advancestate($$) {
	my $ipref = $_[0];
	my $oldname = $$ipref; # needed in case the expression suicides
	my $exprref = $_[1];
	my $newname = evalexpr([@{ $exprref->{$$ipref} }], $$ipref,
		$_[1]);

	# store expression under new name, unless new name is blank (suicide)
	$exprref->{$newname} = $exprref->{$$ipref}
		unless $newname eq "";

	# delete old name, unless it's the same as the new name
	delete $exprref->{$$ipref}
		unless $$ipref eq $newname;

	# find next expression in sorted order, wrapping, after $newname.
	# exception: if $newname is blank (expression has committed suicide),
	# we want the one after where the suicided expression WAS.
	my $isnext = 0;
	undef $$ipref;
	foreach my $x (sort keys %$exprref) {
		if ($newname eq "" && $x gt $oldname) {
			# expression suicided, but would have been before
			# this one.
			$$ipref = $x;
			last;
		}
		if ($x eq $newname) {
			$isnext = 1;
			next;
		}
		if ($isnext == 1) {
			$$ipref = $x;
			$isnext = 0;
			last;
		}
	}
	if (!defined $$ipref) { # means we need to wrap around
		$$ipref = (sort keys %$exprref)[0];
	}
}


# Top-level interpreter

my %exprs = (); # code expressions

# for concatenating multiple lines when a line ends with a \
my $partialline;

# read source code from stdin or named input files
while (my $line = <>) {
	chomp $line;
	if (defined $partialline) {
		$line = $partialline . " " . $line;
		undef $partialline;
	}
	next if $line =~ /^\s*#/ || $line =~ /^\s*$/; # skip blank lines

	# if the line ends with a \, but the \ is NOT within a comment,
	# delete the \ and prepend this line to what's on the next line
	# ($tmp stores $line with quoted strings removed, to test if a
	#  comment is present)
	my $tmp = $line;
	$tmp =~ s/"[^"]*"//g; # remove closed quoted strings from $tmp
	$tmp =~ s/"[^"]*$//; # remove an unclosed string, if present
	if ($tmp !~ /#/ && $line =~ /\\$/) { # no comment, but a \ on end
		$partialline = substr $line, 0, -1;
		next;
		# rest of line will be read, and line parsed, on next loop
	}

	# split expression name and value
	my ($name, $contents) = $line =~
		/^\s*([A-Za-z][A-Za-z0-9]*)\s*:=(.+)$/;
	die "Sortle syntax error" if !defined $name || !defined $contents;
	$contents =~ s/^(\s*)//;
	$contents =~ s/(\s*)$//;

	# split tokens at whitespace (except inside quoted strings)
	# Note: this code will allow strings to contain escaped double-quotes
	#       e.g "this is a \"string\"", which technically isn't how Sortle
	#       works. But it is undefined behavior because backslashes are
	#       only supposed to be used in Sortle strings when followed by
	#       two hex digits, e.g. \0a for newline. Backslashes should be
	#       escaped in the same manner, i.e. "this is a \5c\22string\22\5c"
	my @tokens = $contents =~ m/\s* ("(?:(?!(?<!\\)").)*" | '(?:(?!(?<!\\)').)*' | \S+)/gx;
	die "Empty expression" if (scalar @tokens) <= 0;

	# check that tokens are valid, and remove comments
	my @terms = ();
	foreach my $tok (@tokens) {
		last if $tok =~ /^#/; # comment: ignore further tokens on line
		push @terms, $tok;

		next if $tok =~ /^[+-]?\d+$/; # number
		next if $tok =~ /^".*"$/; # string
		next if length $tok == 1
			&& index("+*/%^~?\$", $tok) != -1; # operator
		die "Invalid token: $tok";
	}
	@{ $exprs{$name} } = @terms;
}
die "Last line cannot end with \\" if defined $partialline;
die "No expressions defined" if scalar keys %exprs == 0;

my $ip = (sort keys %exprs)[0];

my $evalcount = 0;
while (scalar keys %exprs > 1) {
	if ($verbose >= 1) {
		printstate($ip, \%exprs);
		print "$evalcount expressions evaluated\n\n";
	}

	advancestate(\$ip, \%exprs);
	++$evalcount;
}

if ($verbose >= 1) {
	printstate($ip, \%exprs);
	print "$evalcount expressions evaluated\n\n";
}

print((keys %exprs)[0] . "\n");