959 lines
32 KiB
Perl
959 lines
32 KiB
Perl
# -*- mode: perl; perl-indent-level: 2; -*-
|
|
# vim: ts=8 sw=2 sts=2 noexpandtab
|
|
|
|
# Memoize.pm
|
|
#
|
|
# Copyright 1998, 1999, 2000, 2001, 2012 M. J. Dominus.
|
|
# You may copy and distribute this program under the
|
|
# same terms as Perl itself.
|
|
|
|
use strict; use warnings;
|
|
|
|
package Memoize;
|
|
our $VERSION = '1.16';
|
|
|
|
use Carp;
|
|
use Scalar::Util 1.11 (); # for set_prototype
|
|
|
|
BEGIN { require Exporter; *import = \&Exporter::import }
|
|
our @EXPORT = qw(memoize);
|
|
our @EXPORT_OK = qw(unmemoize flush_cache);
|
|
|
|
my %memotable;
|
|
|
|
sub CLONE {
|
|
my @info = values %memotable;
|
|
%memotable = map +($_->{WRAPPER} => $_), @info;
|
|
}
|
|
|
|
sub memoize {
|
|
my $fn = shift;
|
|
my %options = @_;
|
|
|
|
unless (defined($fn) &&
|
|
(ref $fn eq 'CODE' || ref $fn eq '')) {
|
|
croak "Usage: memoize 'functionname'|coderef {OPTIONS}";
|
|
}
|
|
|
|
my $uppack = caller; # TCL me Elmo!
|
|
my $name = (ref $fn ? undef : $fn);
|
|
my $cref = _make_cref($fn, $uppack);
|
|
|
|
my $normalizer = $options{NORMALIZER};
|
|
if (defined $normalizer && ! ref $normalizer) {
|
|
$normalizer = _make_cref($normalizer, $uppack);
|
|
}
|
|
|
|
my $install_name = exists $options{INSTALL}
|
|
? $options{INSTALL} # use given name (or, if undef: do not install)
|
|
: $name; # no INSTALL option provided: default to original name if possible
|
|
|
|
if (defined $install_name) {
|
|
$install_name = $uppack . '::' . $install_name
|
|
unless $install_name =~ /::/;
|
|
}
|
|
|
|
# convert LIST_CACHE => MERGE to SCALAR_CACHE => MERGE
|
|
# to ensure TIE/HASH will always be checked by _check_suitable
|
|
if (($options{LIST_CACHE} || '') eq 'MERGE') {
|
|
$options{LIST_CACHE} = $options{SCALAR_CACHE};
|
|
$options{SCALAR_CACHE} = 'MERGE';
|
|
}
|
|
|
|
# These will be the caches
|
|
my %caches;
|
|
for my $context (qw(LIST SCALAR)) { # SCALAR_CACHE must be last, to process MERGE
|
|
my $fullopt = $options{"${context}_CACHE"} ||= 'MEMORY';
|
|
my ($cache_opt, @cache_opt_args) = ref $fullopt ? @$fullopt : $fullopt;
|
|
if ($cache_opt eq 'FAULT') { # no cache
|
|
$caches{$context} = undef;
|
|
} elsif ($cache_opt eq 'HASH') { # user-supplied hash
|
|
my $cache = $cache_opt_args[0];
|
|
_check_suitable($context, ref tied %$cache);
|
|
$caches{$context} = $cache;
|
|
} elsif ($cache_opt eq 'TIE') {
|
|
carp("TIE option to memoize() is deprecated; use HASH instead")
|
|
if warnings::enabled('all');
|
|
my $module = shift(@cache_opt_args) || '';
|
|
_check_suitable($context, $module);
|
|
my $hash = $caches{$context} = {};
|
|
(my $modulefile = $module . '.pm') =~ s{::}{/}g;
|
|
require $modulefile;
|
|
tie(%$hash, $module, @cache_opt_args)
|
|
or croak "Couldn't tie memoize hash to `$module': $!";
|
|
} elsif ($cache_opt eq 'MEMORY') {
|
|
$caches{$context} = {};
|
|
} elsif ($cache_opt eq 'MERGE' and not ref $fullopt) { # ['MERGE'] was never supported
|
|
die "cannot MERGE $context\_CACHE" if $context ne 'SCALAR'; # should never happen
|
|
die 'bad cache setup order' if not exists $caches{LIST}; # should never happen
|
|
$options{MERGED} = 1;
|
|
$caches{SCALAR} = $caches{LIST};
|
|
} else {
|
|
croak "Unrecognized option to `${context}_CACHE': `$cache_opt' should be one of (MERGE TIE MEMORY FAULT HASH)";
|
|
}
|
|
}
|
|
|
|
my $wrapper = _wrap($install_name, $cref, $normalizer, $options{MERGED}, \%caches);
|
|
|
|
if (defined $install_name) {
|
|
no strict;
|
|
no warnings 'redefine';
|
|
*{$install_name} = $wrapper;
|
|
}
|
|
|
|
$memotable{$wrapper} = {
|
|
L => $caches{LIST},
|
|
S => $caches{SCALAR},
|
|
U => $cref,
|
|
NAME => $install_name,
|
|
WRAPPER => $wrapper,
|
|
};
|
|
|
|
$wrapper # Return just memoized version
|
|
}
|
|
|
|
sub flush_cache {
|
|
my $func = _make_cref($_[0], scalar caller);
|
|
my $info = $memotable{$func};
|
|
die "$func not memoized" unless defined $info;
|
|
for my $context (qw(S L)) {
|
|
my $cache = $info->{$context};
|
|
if (tied %$cache && ! (tied %$cache)->can('CLEAR')) {
|
|
my $funcname = defined($info->{NAME}) ?
|
|
"function $info->{NAME}" : "anonymous function $func";
|
|
my $context = {S => 'scalar', L => 'list'}->{$context};
|
|
croak "Tied cache hash for $context-context $funcname does not support flushing";
|
|
} else {
|
|
%$cache = ();
|
|
}
|
|
}
|
|
}
|
|
|
|
sub _wrap {
|
|
my ($name, $orig, $normalizer, $merged, $caches) = @_;
|
|
my ($cache_L, $cache_S) = @$caches{qw(LIST SCALAR)};
|
|
undef $caches; # keep the pad from keeping the hash alive forever
|
|
Scalar::Util::set_prototype(sub {
|
|
my $argstr = do {
|
|
no warnings 'uninitialized';
|
|
defined $normalizer
|
|
? ( wantarray ? ( $normalizer->( @_ ) )[0] : $normalizer->( @_ ) )
|
|
. '' # coerce undef to string while the warning is off
|
|
: join chr(28), @_;
|
|
};
|
|
|
|
if (wantarray) {
|
|
_crap_out($name, 'list') unless $cache_L;
|
|
exists $cache_L->{$argstr} ? (
|
|
@{$cache_L->{$argstr}}
|
|
) : do {
|
|
my @q = do { no warnings 'recursion'; &$orig };
|
|
$cache_L->{$argstr} = \@q;
|
|
@q;
|
|
};
|
|
} else {
|
|
_crap_out($name, 'scalar') unless $cache_S;
|
|
exists $cache_S->{$argstr} ? (
|
|
$merged ? $cache_S->{$argstr}[0] : $cache_S->{$argstr}
|
|
) : do {
|
|
my $val = do { no warnings 'recursion'; &$orig };
|
|
$cache_S->{$argstr} = $merged ? [$val] : $val;
|
|
$val;
|
|
};
|
|
}
|
|
}, prototype $orig);
|
|
}
|
|
|
|
sub unmemoize {
|
|
my $f = shift;
|
|
my $uppack = caller;
|
|
my $cref = _make_cref($f, $uppack);
|
|
|
|
unless (exists $memotable{$cref}) {
|
|
croak "Could not unmemoize function `$f', because it was not memoized to begin with";
|
|
}
|
|
|
|
my $tabent = $memotable{$cref};
|
|
unless (defined $tabent) {
|
|
croak "Could not figure out how to unmemoize function `$f'";
|
|
}
|
|
my $name = $tabent->{NAME};
|
|
if (defined $name) {
|
|
no strict;
|
|
no warnings 'redefine';
|
|
*{$name} = $tabent->{U}; # Replace with original function
|
|
}
|
|
delete $memotable{$cref};
|
|
|
|
$tabent->{U};
|
|
}
|
|
|
|
sub _make_cref {
|
|
my $fn = shift;
|
|
my $uppack = shift;
|
|
my $cref;
|
|
my $name;
|
|
|
|
if (ref $fn eq 'CODE') {
|
|
$cref = $fn;
|
|
} elsif (! ref $fn) {
|
|
if ($fn =~ /::/) {
|
|
$name = $fn;
|
|
} else {
|
|
$name = $uppack . '::' . $fn;
|
|
}
|
|
no strict;
|
|
if (defined $name and !defined(&$name)) {
|
|
croak "Cannot operate on nonexistent function `$fn'";
|
|
}
|
|
# $cref = \&$name;
|
|
$cref = *{$name}{CODE};
|
|
} else {
|
|
my $parent = (caller(1))[3]; # Function that called _make_cref
|
|
croak "Usage: argument 1 to `$parent' must be a function name or reference.\n";
|
|
}
|
|
our $DEBUG and warn "${name}($fn) => $cref in _make_cref\n";
|
|
$cref;
|
|
}
|
|
|
|
sub _crap_out {
|
|
my ($funcname, $context) = @_;
|
|
if (defined $funcname) {
|
|
croak "Function `$funcname' called in forbidden $context context; faulting";
|
|
} else {
|
|
croak "Anonymous function called in forbidden $context context; faulting";
|
|
}
|
|
}
|
|
|
|
# Raise an error if the user tries to specify one of these packages as a
|
|
# tie for LIST_CACHE
|
|
my %scalar_only = map {($_ => 1)} qw(DB_File GDBM_File SDBM_File ODBM_File), map +($_, "Memoize::$_"), qw(AnyDBM_File NDBM_File);
|
|
sub _check_suitable {
|
|
my ($context, $package) = @_;
|
|
croak "You can't use $package for LIST_CACHE because it can only store scalars"
|
|
if $context eq 'LIST' and $scalar_only{$package};
|
|
}
|
|
|
|
1;
|
|
|
|
__END__
|
|
|
|
=pod
|
|
|
|
=head1 NAME
|
|
|
|
Memoize - Make functions faster by trading space for time
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use Memoize;
|
|
memoize('slow_function');
|
|
slow_function(arguments); # Is faster than it was before
|
|
|
|
|
|
This is normally all you need to know. However, many options are available:
|
|
|
|
memoize(function, options...);
|
|
|
|
Options include:
|
|
|
|
NORMALIZER => function
|
|
INSTALL => new_name
|
|
|
|
SCALAR_CACHE => 'MEMORY'
|
|
SCALAR_CACHE => ['HASH', \%cache_hash ]
|
|
SCALAR_CACHE => 'FAULT'
|
|
SCALAR_CACHE => 'MERGE'
|
|
|
|
LIST_CACHE => 'MEMORY'
|
|
LIST_CACHE => ['HASH', \%cache_hash ]
|
|
LIST_CACHE => 'FAULT'
|
|
LIST_CACHE => 'MERGE'
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
I<Memoizing> a function makes it faster by trading space for time. It
|
|
does this by caching the return values of the function in a table.
|
|
If you call the function again with the same arguments, C<memoize>
|
|
jumps in and gives you the value out of the table, instead of letting
|
|
the function compute the value all over again.
|
|
|
|
=head1 EXAMPLE
|
|
|
|
Here is an extreme example. Consider the Fibonacci sequence, defined
|
|
by the following function:
|
|
|
|
# Compute Fibonacci numbers
|
|
sub fib {
|
|
my $n = shift;
|
|
return $n if $n < 2;
|
|
fib($n-1) + fib($n-2);
|
|
}
|
|
|
|
This function is very slow. Why? To compute fib(14), it first wants
|
|
to compute fib(13) and fib(12), and add the results. But to compute
|
|
fib(13), it first has to compute fib(12) and fib(11), and then it
|
|
comes back and computes fib(12) all over again even though the answer
|
|
is the same. And both of the times that it wants to compute fib(12),
|
|
it has to compute fib(11) from scratch, and then it has to do it
|
|
again each time it wants to compute fib(13). This function does so
|
|
much recomputing of old results that it takes a really long time to
|
|
run---fib(14) makes 1,200 extra recursive calls to itself, to compute
|
|
and recompute things that it already computed.
|
|
|
|
This function is a good candidate for memoization. If you memoize the
|
|
C<fib> function above, it will compute fib(14) exactly once, the first
|
|
time it needs to, and then save the result in a table. Then if you
|
|
ask for fib(14) again, it gives you the result out of the table.
|
|
While computing fib(14), instead of computing fib(12) twice, it does
|
|
it once; the second time it needs the value it gets it from the table.
|
|
It doesn't compute fib(11) four times; it computes it once, getting it
|
|
from the table the next three times. Instead of making 1,200
|
|
recursive calls to C<fib>, it makes 15. This makes the function about
|
|
150 times faster.
|
|
|
|
You could do the memoization yourself, by rewriting the function, like
|
|
this:
|
|
|
|
# Compute Fibonacci numbers, memoized version
|
|
{ my @fib;
|
|
sub fib {
|
|
my $n = shift;
|
|
return $fib[$n] if defined $fib[$n];
|
|
return $fib[$n] = $n if $n < 2;
|
|
$fib[$n] = fib($n-1) + fib($n-2);
|
|
}
|
|
}
|
|
|
|
Or you could use this module, like this:
|
|
|
|
use Memoize;
|
|
memoize('fib');
|
|
|
|
# Rest of the fib function just like the original version.
|
|
|
|
This makes it easy to turn memoizing on and off.
|
|
|
|
Here's an even simpler example: I wrote a simple ray tracer; the
|
|
program would look in a certain direction, figure out what it was
|
|
looking at, and then convert the C<color> value (typically a string
|
|
like C<red>) of that object to a red, green, and blue pixel value, like
|
|
this:
|
|
|
|
for ($direction = 0; $direction < 300; $direction++) {
|
|
# Figure out which object is in direction $direction
|
|
$color = $object->{color};
|
|
($r, $g, $b) = @{&ColorToRGB($color)};
|
|
...
|
|
}
|
|
|
|
Since there are relatively few objects in a picture, there are only a
|
|
few colors, which get looked up over and over again. Memoizing
|
|
C<ColorToRGB> sped up the program by several percent.
|
|
|
|
=head1 DETAILS
|
|
|
|
This module exports exactly one function, C<memoize>. The rest of the
|
|
functions in this package are None of Your Business.
|
|
|
|
You should say
|
|
|
|
memoize(function)
|
|
|
|
where C<function> is the name of the function you want to memoize, or
|
|
a reference to it. C<memoize> returns a reference to the new,
|
|
memoized version of the function, or C<undef> on a non-fatal error.
|
|
At present, there are no non-fatal errors, but there might be some in
|
|
the future.
|
|
|
|
If C<function> was the name of a function, then C<memoize> hides the
|
|
old version and installs the new memoized version under the old name,
|
|
so that C<&function(...)> actually invokes the memoized version.
|
|
|
|
=head1 OPTIONS
|
|
|
|
There are some optional options you can pass to C<memoize> to change
|
|
the way it behaves a little. To supply options, invoke C<memoize>
|
|
like this:
|
|
|
|
memoize(function, NORMALIZER => function,
|
|
INSTALL => newname,
|
|
SCALAR_CACHE => option,
|
|
LIST_CACHE => option
|
|
);
|
|
|
|
Each of these options is optional; you can include some, all, or none
|
|
of them.
|
|
|
|
=head2 INSTALL
|
|
|
|
If you supply a function name with C<INSTALL>, memoize will install
|
|
the new, memoized version of the function under the name you give.
|
|
For example,
|
|
|
|
memoize('fib', INSTALL => 'fastfib')
|
|
|
|
installs the memoized version of C<fib> as C<fastfib>; without the
|
|
C<INSTALL> option it would have replaced the old C<fib> with the
|
|
memoized version.
|
|
|
|
To prevent C<memoize> from installing the memoized version anywhere, use
|
|
C<INSTALL =E<gt> undef>.
|
|
|
|
=head2 NORMALIZER
|
|
|
|
Suppose your function looks like this:
|
|
|
|
# Typical call: f('aha!', A => 11, B => 12);
|
|
sub f {
|
|
my $a = shift;
|
|
my %hash = @_;
|
|
$hash{B} ||= 2; # B defaults to 2
|
|
$hash{C} ||= 7; # C defaults to 7
|
|
|
|
# Do something with $a, %hash
|
|
}
|
|
|
|
Now, the following calls to your function are all completely equivalent:
|
|
|
|
f(OUCH);
|
|
f(OUCH, B => 2);
|
|
f(OUCH, C => 7);
|
|
f(OUCH, B => 2, C => 7);
|
|
f(OUCH, C => 7, B => 2);
|
|
(etc.)
|
|
|
|
However, unless you tell C<Memoize> that these calls are equivalent,
|
|
it will not know that, and it will compute the values for these
|
|
invocations of your function separately, and store them separately.
|
|
|
|
To prevent this, supply a C<NORMALIZER> function that turns the
|
|
program arguments into a string in a way that equivalent arguments
|
|
turn into the same string. A C<NORMALIZER> function for C<f> above
|
|
might look like this:
|
|
|
|
sub normalize_f {
|
|
my $a = shift;
|
|
my %hash = @_;
|
|
$hash{B} ||= 2;
|
|
$hash{C} ||= 7;
|
|
|
|
join(',', $a, map ($_ => $hash{$_}) sort keys %hash);
|
|
}
|
|
|
|
Each of the argument lists above comes out of the C<normalize_f>
|
|
function looking exactly the same, like this:
|
|
|
|
OUCH,B,2,C,7
|
|
|
|
You would tell C<Memoize> to use this normalizer this way:
|
|
|
|
memoize('f', NORMALIZER => 'normalize_f');
|
|
|
|
C<memoize> knows that if the normalized version of the arguments is
|
|
the same for two argument lists, then it can safely look up the value
|
|
that it computed for one argument list and return it as the result of
|
|
calling the function with the other argument list, even if the
|
|
argument lists look different.
|
|
|
|
The default normalizer just concatenates the arguments with character
|
|
28 in between. (In ASCII, this is called FS or control-\.) This
|
|
always works correctly for functions with only one string argument,
|
|
and also when the arguments never contain character 28. However, it
|
|
can confuse certain argument lists:
|
|
|
|
normalizer("a\034", "b")
|
|
normalizer("a", "\034b")
|
|
normalizer("a\034\034b")
|
|
|
|
for example.
|
|
|
|
Since hash keys are strings, the default normalizer will not
|
|
distinguish between C<undef> and the empty string. It also won't work
|
|
when the function's arguments are references. For example, consider a
|
|
function C<g> which gets two arguments: A number, and a reference to
|
|
an array of numbers:
|
|
|
|
g(13, [1,2,3,4,5,6,7]);
|
|
|
|
The default normalizer will turn this into something like
|
|
C<"13\034ARRAY(0x436c1f)">. That would be all right, except that a
|
|
subsequent array of numbers might be stored at a different location
|
|
even though it contains the same data. If this happens, C<Memoize>
|
|
will think that the arguments are different, even though they are
|
|
equivalent. In this case, a normalizer like this is appropriate:
|
|
|
|
sub normalize { join ' ', $_[0], @{$_[1]} }
|
|
|
|
For the example above, this produces the key "13 1 2 3 4 5 6 7".
|
|
|
|
Another use for normalizers is when the function depends on data other
|
|
than those in its arguments. Suppose you have a function which
|
|
returns a value which depends on the current hour of the day:
|
|
|
|
sub on_duty {
|
|
my ($problem_type) = @_;
|
|
my $hour = (localtime)[2];
|
|
open my $fh, "$DIR/$problem_type" or die...;
|
|
my $line;
|
|
while ($hour-- > 0)
|
|
$line = <$fh>;
|
|
}
|
|
return $line;
|
|
}
|
|
|
|
At 10:23, this function generates the 10th line of a data file; at
|
|
3:45 PM it generates the 15th line instead. By default, C<Memoize>
|
|
will only see the $problem_type argument. To fix this, include the
|
|
current hour in the normalizer:
|
|
|
|
sub normalize { join ' ', (localtime)[2], @_ }
|
|
|
|
The calling context of the function (scalar or list context) is
|
|
propagated to the normalizer. This means that if the memoized
|
|
function will treat its arguments differently in list context than it
|
|
would in scalar context, you can have the normalizer function select
|
|
its behavior based on the results of C<wantarray>. Even if called in
|
|
a list context, a normalizer should still return a single string.
|
|
|
|
=head2 C<SCALAR_CACHE>, C<LIST_CACHE>
|
|
|
|
Normally, C<Memoize> caches your function's return values into an
|
|
ordinary Perl hash variable. However, you might like to have the
|
|
values cached on the disk, so that they persist from one run of your
|
|
program to the next, or you might like to associate some other
|
|
interesting semantics with the cached values.
|
|
|
|
There's a slight complication under the hood of C<Memoize>: There are
|
|
actually I<two> caches, one for scalar values and one for list values.
|
|
When your function is called in scalar context, its return value is
|
|
cached in one hash, and when your function is called in list context,
|
|
its value is cached in the other hash. You can control the caching
|
|
behavior of both contexts independently with these options.
|
|
|
|
The argument to C<LIST_CACHE> or C<SCALAR_CACHE> must either be one of
|
|
the following four strings:
|
|
|
|
MEMORY
|
|
FAULT
|
|
MERGE
|
|
HASH
|
|
|
|
or else it must be a reference to an array whose first element is one of
|
|
these four strings, such as C<[HASH, arguments...]>.
|
|
|
|
=over 4
|
|
|
|
=item C<MEMORY>
|
|
|
|
C<MEMORY> means that return values from the function will be cached in
|
|
an ordinary Perl hash variable. The hash variable will not persist
|
|
after the program exits. This is the default.
|
|
|
|
=item C<HASH>
|
|
|
|
C<HASH> allows you to specify that a particular hash that you supply
|
|
will be used as the cache. You can tie this hash beforehand to give
|
|
it any behavior you want.
|
|
|
|
A tied hash can have any semantics at all. It is typically tied to an
|
|
on-disk database, so that cached values are stored in the database and
|
|
retrieved from it again when needed, and the disk file typically
|
|
persists after your program has exited. See C<perltie> for more
|
|
complete details about C<tie>.
|
|
|
|
A typical example is:
|
|
|
|
use DB_File;
|
|
tie my %cache => 'DB_File', $filename, O_RDWR|O_CREAT, 0666;
|
|
memoize 'function', SCALAR_CACHE => [HASH => \%cache];
|
|
|
|
This has the effect of storing the cache in a C<DB_File> database
|
|
whose name is in C<$filename>. The cache will persist after the
|
|
program has exited. Next time the program runs, it will find the
|
|
cache already populated from the previous run of the program. Or you
|
|
can forcibly populate the cache by constructing a batch program that
|
|
runs in the background and populates the cache file. Then when you
|
|
come to run your real program the memoized function will be fast
|
|
because all its results have been precomputed.
|
|
|
|
Another reason to use C<HASH> is to provide your own hash variable.
|
|
You can then inspect or modify the contents of the hash to gain finer
|
|
control over the cache management.
|
|
|
|
=item C<TIE>
|
|
|
|
This option is no longer supported. It is still documented only to
|
|
aid in the debugging of old programs that use it. Old programs should
|
|
be converted to use the C<HASH> option instead.
|
|
|
|
memoize ... ['TIE', PACKAGE, ARGS...]
|
|
|
|
is merely a shortcut for
|
|
|
|
require PACKAGE;
|
|
{ tie my %cache, PACKAGE, ARGS...;
|
|
memoize ... [HASH => \%cache];
|
|
}
|
|
|
|
=item C<FAULT>
|
|
|
|
C<FAULT> means that you never expect to call the function in scalar
|
|
(or list) context, and that if C<Memoize> detects such a call, it
|
|
should abort the program. The error message is one of
|
|
|
|
`foo' function called in forbidden list context at line ...
|
|
`foo' function called in forbidden scalar context at line ...
|
|
|
|
=item C<MERGE>
|
|
|
|
C<MERGE> normally means that the memoized function does not
|
|
distinguish between list and scalar context, and that return values in
|
|
both contexts should be stored together. Both C<LIST_CACHE =E<gt>
|
|
MERGE> and C<SCALAR_CACHE =E<gt> MERGE> mean the same thing.
|
|
|
|
Consider this function:
|
|
|
|
sub complicated {
|
|
# ... time-consuming calculation of $result
|
|
return $result;
|
|
}
|
|
|
|
The C<complicated> function will return the same numeric C<$result>
|
|
regardless of whether it is called in list or in scalar context.
|
|
|
|
Normally, the following code will result in two calls to C<complicated>, even
|
|
if C<complicated> is memoized:
|
|
|
|
$x = complicated(142);
|
|
($y) = complicated(142);
|
|
$z = complicated(142);
|
|
|
|
The first call will cache the result, say 37, in the scalar cache; the
|
|
second will cache the list C<(37)> in the list cache. The third call
|
|
doesn't call the real C<complicated> function; it gets the value 37
|
|
from the scalar cache.
|
|
|
|
Obviously, the second call to C<complicated> is a waste of time, and
|
|
storing its return value is a waste of space. Specifying C<LIST_CACHE
|
|
=E<gt> MERGE> will make C<memoize> use the same cache for scalar and
|
|
list context return values, so that the second call uses the scalar
|
|
cache that was populated by the first call. C<complicated> ends up
|
|
being called only once, and both subsequent calls return C<37> from the
|
|
cache, regardless of the calling context.
|
|
|
|
=back
|
|
|
|
=head3 List values in scalar context
|
|
|
|
Consider this function:
|
|
|
|
sub iota { return reverse (1..$_[0]) }
|
|
|
|
This function normally returns a list. Suppose you memoize it and
|
|
merge the caches:
|
|
|
|
memoize 'iota', SCALAR_CACHE => 'MERGE';
|
|
|
|
@i7 = iota(7);
|
|
$i7 = iota(7);
|
|
|
|
Here the first call caches the list (1,2,3,4,5,6,7). The second call
|
|
does not really make sense. C<Memoize> cannot guess what behavior
|
|
C<iota> should have in scalar context without actually calling it in
|
|
scalar context. Normally C<Memoize> I<would> call C<iota> in scalar
|
|
context and cache the result, but the C<SCALAR_CACHE =E<gt> 'MERGE'>
|
|
option says not to do that, but to use the cache list-context value
|
|
instead. But it cannot return a list of seven elements in a scalar
|
|
context. In this case C<$i7> will receive the B<first element> of the
|
|
cached list value, namely 7.
|
|
|
|
=head3 Merged disk caches
|
|
|
|
Another use for C<MERGE> is when you want both kinds of return values
|
|
stored in the same disk file; this saves you from having to deal with
|
|
two disk files instead of one. You can use a normalizer function to
|
|
keep the two sets of return values separate. For example:
|
|
|
|
local $MLDBM::UseDB = 'DB_File';
|
|
tie my %cache => 'MLDBM', $filename, ...;
|
|
|
|
memoize 'myfunc',
|
|
NORMALIZER => 'n',
|
|
SCALAR_CACHE => [HASH => \%cache],
|
|
LIST_CACHE => 'MERGE',
|
|
;
|
|
|
|
sub n {
|
|
my $context = wantarray() ? 'L' : 'S';
|
|
# ... now compute the hash key from the arguments ...
|
|
$hashkey = "$context:$hashkey";
|
|
}
|
|
|
|
This normalizer function will store scalar context return values in
|
|
the disk file under keys that begin with C<S:>, and list context
|
|
return values under keys that begin with C<L:>.
|
|
|
|
=head1 OTHER FACILITIES
|
|
|
|
=head2 C<unmemoize>
|
|
|
|
There's an C<unmemoize> function that you can import if you want to.
|
|
Why would you want to? Here's an example: Suppose you have your cache
|
|
tied to a DBM file, and you want to make sure that the cache is
|
|
written out to disk if someone interrupts the program. If the program
|
|
exits normally, this will happen anyway, but if someone types
|
|
control-C or something then the program will terminate immediately
|
|
without synchronizing the database. So what you can do instead is
|
|
|
|
$SIG{INT} = sub { unmemoize 'function' };
|
|
|
|
C<unmemoize> accepts a reference to, or the name of a previously
|
|
memoized function, and undoes whatever it did to provide the memoized
|
|
version in the first place, including making the name refer to the
|
|
unmemoized version if appropriate. It returns a reference to the
|
|
unmemoized version of the function.
|
|
|
|
If you ask it to unmemoize a function that was never memoized, it
|
|
croaks.
|
|
|
|
=head2 C<flush_cache>
|
|
|
|
C<flush_cache(function)> will flush out the caches, discarding I<all>
|
|
the cached data. The argument may be a function name or a reference
|
|
to a function. For finer control over when data is discarded or
|
|
expired, see the documentation for C<Memoize::Expire>, included in
|
|
this package.
|
|
|
|
Note that if the cache is a tied hash, C<flush_cache> will attempt to
|
|
invoke the C<CLEAR> method on the hash. If there is no C<CLEAR>
|
|
method, this will cause a run-time error.
|
|
|
|
An alternative approach to cache flushing is to use the C<HASH> option
|
|
(see above) to request that C<Memoize> use a particular hash variable
|
|
as its cache. Then you can examine or modify the hash at any time in
|
|
any way you desire. You may flush the cache by using C<%hash = ()>.
|
|
|
|
=head1 CAVEATS
|
|
|
|
Memoization is not a cure-all:
|
|
|
|
=over 4
|
|
|
|
=item *
|
|
|
|
Do not memoize a function whose behavior depends on program
|
|
state other than its own arguments, such as global variables, the time
|
|
of day, or file input. These functions will not produce correct
|
|
results when memoized. For a particularly easy example:
|
|
|
|
sub f {
|
|
time;
|
|
}
|
|
|
|
This function takes no arguments, and as far as C<Memoize> is
|
|
concerned, it always returns the same result. C<Memoize> is wrong, of
|
|
course, and the memoized version of this function will call C<time> once
|
|
to get the current time, and it will return that same time
|
|
every time you call it after that.
|
|
|
|
=item *
|
|
|
|
Do not memoize a function with side effects.
|
|
|
|
sub f {
|
|
my ($a, $b) = @_;
|
|
my $s = $a + $b;
|
|
print "$a + $b = $s.\n";
|
|
}
|
|
|
|
This function accepts two arguments, adds them, and prints their sum.
|
|
Its return value is the number of characters it printed, but you
|
|
probably didn't care about that. But C<Memoize> doesn't understand
|
|
that. If you memoize this function, you will get the result you
|
|
expect the first time you ask it to print the sum of 2 and 3, but
|
|
subsequent calls will return 1 (the return value of
|
|
C<print>) without actually printing anything.
|
|
|
|
=item *
|
|
|
|
Do not memoize a function that returns a data structure that is
|
|
modified by its caller.
|
|
|
|
Consider these functions: C<getusers> returns a list of users somehow,
|
|
and then C<main> throws away the first user on the list and prints the
|
|
rest:
|
|
|
|
sub main {
|
|
my $userlist = getusers();
|
|
shift @$userlist;
|
|
foreach $u (@$userlist) {
|
|
print "User $u\n";
|
|
}
|
|
}
|
|
|
|
sub getusers {
|
|
my @users;
|
|
# Do something to get a list of users;
|
|
\@users; # Return reference to list.
|
|
}
|
|
|
|
If you memoize C<getusers> here, it will work right exactly once. The
|
|
reference to the users list will be stored in the memo table. C<main>
|
|
will discard the first element from the referenced list. The next
|
|
time you invoke C<main>, C<Memoize> will not call C<getusers>; it will
|
|
just return the same reference to the same list it got last time. But
|
|
this time the list has already had its head removed; C<main> will
|
|
erroneously remove another element from it. The list will get shorter
|
|
and shorter every time you call C<main>.
|
|
|
|
Similarly, this:
|
|
|
|
$u1 = getusers();
|
|
$u2 = getusers();
|
|
pop @$u1;
|
|
|
|
will modify $u2 as well as $u1, because both variables are references
|
|
to the same array. Had C<getusers> not been memoized, $u1 and $u2
|
|
would have referred to different arrays.
|
|
|
|
=item *
|
|
|
|
Do not memoize a very simple function.
|
|
|
|
Recently someone mentioned to me that the Memoize module made his
|
|
program run slower instead of faster. It turned out that he was
|
|
memoizing the following function:
|
|
|
|
sub square {
|
|
$_[0] * $_[0];
|
|
}
|
|
|
|
I pointed out that C<Memoize> uses a hash, and that looking up a
|
|
number in the hash is necessarily going to take a lot longer than a
|
|
single multiplication. There really is no way to speed up the
|
|
C<square> function.
|
|
|
|
Memoization is not magical.
|
|
|
|
=back
|
|
|
|
=head1 PERSISTENT CACHE SUPPORT
|
|
|
|
You can tie the cache tables to any sort of tied hash that you want
|
|
to, as long as it supports C<TIEHASH>, C<FETCH>, C<STORE>, and
|
|
C<EXISTS>. For example,
|
|
|
|
tie my %cache => 'GDBM_File', $filename, O_RDWR|O_CREAT, 0666;
|
|
memoize 'function', SCALAR_CACHE => [HASH => \%cache];
|
|
|
|
works just fine. For some storage methods, you need a little glue.
|
|
|
|
C<SDBM_File> doesn't supply an C<EXISTS> method, so included in this
|
|
package is a glue module called C<Memoize::SDBM_File> which does
|
|
provide one. Use this instead of plain C<SDBM_File> to store your
|
|
cache table on disk in an C<SDBM_File> database:
|
|
|
|
tie my %cache => 'Memoize::SDBM_File', $filename, O_RDWR|O_CREAT, 0666;
|
|
memoize 'function', SCALAR_CACHE => [HASH => \%cache];
|
|
|
|
C<NDBM_File> has the same problem and the same solution. (Use
|
|
C<Memoize::NDBM_File instead of plain NDBM_File.>)
|
|
|
|
C<Storable> isn't a tied hash class at all. You can use it to store a
|
|
hash to disk and retrieve it again, but you can't modify the hash while
|
|
it's on the disk. So if you want to store your cache table in a
|
|
C<Storable> database, use C<Memoize::Storable>, which puts a hashlike
|
|
front-end onto C<Storable>. The hash table is actually kept in
|
|
memory, and is loaded from your C<Storable> file at the time you
|
|
memoize the function, and stored back at the time you unmemoize the
|
|
function (or when your program exits):
|
|
|
|
tie my %cache => 'Memoize::Storable', $filename;
|
|
memoize 'function', SCALAR_CACHE => [HASH => \%cache];
|
|
|
|
tie my %cache => 'Memoize::Storable', $filename, 'nstore';
|
|
memoize 'function', SCALAR_CACHE => [HASH => \%cache];
|
|
|
|
Include the C<nstore> option to have the C<Storable> database written
|
|
in I<network order>. (See L<Storable> for more details about this.)
|
|
|
|
The C<flush_cache()> function will raise a run-time error unless the
|
|
tied package provides a C<CLEAR> method.
|
|
|
|
=head1 EXPIRATION SUPPORT
|
|
|
|
See Memoize::Expire, which is a plug-in module that adds expiration
|
|
functionality to Memoize. If you don't like the kinds of policies
|
|
that Memoize::Expire implements, it is easy to write your own plug-in
|
|
module to implement whatever policy you desire. Memoize comes with
|
|
several examples. An expiration manager that implements a LRU policy
|
|
is available on CPAN as Memoize::ExpireLRU.
|
|
|
|
=head1 BUGS
|
|
|
|
The test suite is much better, but always needs improvement.
|
|
|
|
There is some problem with the way C<goto &f> works under threaded
|
|
Perl, perhaps because of the lexical scoping of C<@_>. This is a bug
|
|
in Perl, and until it is resolved, memoized functions will see a
|
|
slightly different C<caller()> and will perform a little more slowly
|
|
on threaded perls than unthreaded perls.
|
|
|
|
Some versions of C<DB_File> won't let you store data under a key of
|
|
length 0. That means that if you have a function C<f> which you
|
|
memoized and the cache is in a C<DB_File> database, then the value of
|
|
C<f()> (C<f> called with no arguments) will not be memoized. If this
|
|
is a big problem, you can supply a normalizer function that prepends
|
|
C<"x"> to every key.
|
|
|
|
=head1 SEE ALSO
|
|
|
|
At L<https://perl.plover.com/MiniMemoize/> there is an article about
|
|
memoization and about the internals of Memoize that appeared in The
|
|
Perl Journal, issue #13.
|
|
|
|
Mark-Jason Dominus's book I<Higher-Order Perl> (2005, ISBN 1558607013,
|
|
published
|
|
by Morgan Kaufmann) discusses memoization (and many other
|
|
topics) in tremendous detail. It is available on-line for free.
|
|
For more information, visit L<https://hop.perl.plover.com/>.
|
|
|
|
=head1 THANK YOU
|
|
|
|
Many thanks to Florian Ragwitz for administration and packaging
|
|
assistance, to John Tromp for bug reports, to Jonathan Roy for bug reports
|
|
and suggestions, to Michael Schwern for other bug reports and patches,
|
|
to Mike Cariaso for helping me to figure out the Right Thing to Do
|
|
About Expiration, to Joshua Gerth, Joshua Chamas, Jonathan Roy
|
|
(again), Mark D. Anderson, and Andrew Johnson for more suggestions
|
|
about expiration, to Brent Powers for the Memoize::ExpireLRU module,
|
|
to Ariel Scolnicov for delightful messages about the Fibonacci
|
|
function, to Dion Almaer for thought-provoking suggestions about the
|
|
default normalizer, to Walt Mankowski and Kurt Starsinic for much help
|
|
investigating problems under threaded Perl, to Alex Dudkevich for
|
|
reporting the bug in prototyped functions and for checking my patch,
|
|
to Tony Bass for many helpful suggestions, to Jonathan Roy (again) for
|
|
finding a use for C<unmemoize()>, to Philippe Verdret for enlightening
|
|
discussion of C<Hook::PrePostCall>, to Nat Torkington for advice I
|
|
ignored, to Chris Nandor for portability advice, to Randal Schwartz
|
|
for suggesting the 'C<flush_cache> function, and to Jenda Krynicky for
|
|
being a light in the world.
|
|
|
|
Special thanks to Jarkko Hietaniemi, the 5.8.0 pumpking, for including
|
|
this module in the core and for his patient and helpful guidance
|
|
during the integration process.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Mark Jason Dominus
|
|
|
|
=head1 COPYRIGHT AND LICENSE
|
|
|
|
This software is copyright (c) 2012 by Mark Jason Dominus.
|
|
|
|
This is free software; you can redistribute it and/or modify it under
|
|
the same terms as the Perl 5 programming language system itself.
|
|
|
|
=cut
|