861 lines
29 KiB
Perl
861 lines
29 KiB
Perl
package Hash::Util::FieldHash;
|
|
|
|
use strict;
|
|
use warnings;
|
|
no warnings 'experimental::builtin';
|
|
use builtin qw(reftype);
|
|
|
|
our $VERSION = '1.26';
|
|
|
|
use Exporter 'import';
|
|
our %EXPORT_TAGS = (
|
|
'all' => [ qw(
|
|
fieldhash
|
|
fieldhashes
|
|
idhash
|
|
idhashes
|
|
id
|
|
id_2obj
|
|
register
|
|
)],
|
|
);
|
|
our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
|
|
|
|
{
|
|
require XSLoader;
|
|
my %ob_reg; # private object registry
|
|
sub _ob_reg { \ %ob_reg }
|
|
XSLoader::load();
|
|
}
|
|
|
|
sub fieldhash (\%) {
|
|
for ( shift ) {
|
|
return unless ref() && reftype( $_) eq 'HASH';
|
|
return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0);
|
|
return $_ if Hash::Util::FieldHash::_fieldhash( $_, 2) == 2;
|
|
return;
|
|
}
|
|
}
|
|
|
|
sub idhash (\%) {
|
|
for ( shift ) {
|
|
return unless ref() && reftype( $_) eq 'HASH';
|
|
return $_ if Hash::Util::FieldHash::_fieldhash( $_, 0);
|
|
return $_ if Hash::Util::FieldHash::_fieldhash( $_, 1) == 1;
|
|
return;
|
|
}
|
|
}
|
|
|
|
sub fieldhashes { map &fieldhash( $_), @_ }
|
|
sub idhashes { map &idhash( $_), @_ }
|
|
|
|
1;
|
|
__END__
|
|
|
|
=head1 NAME
|
|
|
|
Hash::Util::FieldHash - Support for Inside-Out Classes
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
### Create fieldhashes
|
|
use Hash::Util qw(fieldhash fieldhashes);
|
|
|
|
# Create a single field hash
|
|
fieldhash my %foo;
|
|
|
|
# Create three at once...
|
|
fieldhashes \ my(%foo, %bar, %baz);
|
|
# ...or any number
|
|
fieldhashes @hashrefs;
|
|
|
|
### Create an idhash and register it for garbage collection
|
|
use Hash::Util::FieldHash qw(idhash register);
|
|
idhash my %name;
|
|
my $object = \ do { my $o };
|
|
# register the idhash for garbage collection with $object
|
|
register($object, \ %name);
|
|
# the following entry will be deleted when $object goes out of scope
|
|
$name{$object} = 'John Doe';
|
|
|
|
### Register an ordinary hash for garbage collection
|
|
use Hash::Util::FieldHash qw(id register);
|
|
my %name;
|
|
my $object = \ do { my $o };
|
|
# register the hash %name for garbage collection of $object's id
|
|
register $object, \ %name;
|
|
# the following entry will be deleted when $object goes out of scope
|
|
$name{id $object} = 'John Doe';
|
|
|
|
=head1 FUNCTIONS
|
|
|
|
C<Hash::Util::FieldHash> offers a number of functions in support of
|
|
L<The Inside-out Technique> of class construction.
|
|
|
|
=over
|
|
|
|
=item id
|
|
|
|
id($obj)
|
|
|
|
Returns the reference address of a reference $obj. If $obj is
|
|
not a reference, returns $obj.
|
|
|
|
This function is a stand-in replacement for
|
|
L<Scalar::Util::refaddr|Scalar::Util/refaddr>,
|
|
that is, it returns
|
|
the reference address of its argument as a numeric value. The only
|
|
difference is that C<refaddr()> returns C<undef> when given a
|
|
non-reference while C<id()> returns its argument unchanged.
|
|
|
|
C<id()> also uses a caching technique that makes it faster when
|
|
the id of an object is requested often, but slower if it is needed
|
|
only once or twice.
|
|
|
|
=item id_2obj
|
|
|
|
$obj = id_2obj($id)
|
|
|
|
If C<$id> is the id of a registered object (see L</register>), returns
|
|
the object, otherwise an undefined value. For registered objects this
|
|
is the inverse function of C<id()>.
|
|
|
|
=item register
|
|
|
|
register($obj)
|
|
register($obj, @hashrefs)
|
|
|
|
In the first form, registers an object to work with for the function
|
|
C<id_2obj()>. In the second form, it additionally marks the given
|
|
hashrefs down for garbage collection. This means that when the object
|
|
goes out of scope, any entries in the given hashes under the key of
|
|
C<id($obj)> will be deleted from the hashes.
|
|
|
|
It is a fatal error to register a non-reference $obj. Any non-hashrefs
|
|
among the following arguments are silently ignored.
|
|
|
|
It is I<not> an error to register the same object multiple times with
|
|
varying sets of hashrefs. Any hashrefs that are not registered yet
|
|
will be added, others ignored.
|
|
|
|
Registry also implies thread support. When a new thread is created,
|
|
all references are replaced with new ones, including all objects.
|
|
If a hash uses the reference address of an object as a key, that
|
|
connection would be broken. With a registered object, its id will
|
|
be updated in all hashes registered with it.
|
|
|
|
=item idhash
|
|
|
|
idhash my %hash
|
|
|
|
Makes an idhash from the argument, which must be a hash.
|
|
|
|
An I<idhash> works like a normal hash, except that it stringifies a
|
|
I<reference used as a key> differently. A reference is stringified
|
|
as if the C<id()> function had been invoked on it, that is, its
|
|
reference address in decimal is used as the key.
|
|
|
|
=item idhashes
|
|
|
|
idhashes \ my(%hash, %gnash, %trash)
|
|
idhashes \ @hashrefs
|
|
|
|
Creates many idhashes from its hashref arguments. Returns those
|
|
arguments that could be converted or their number in scalar context.
|
|
|
|
=item fieldhash
|
|
|
|
fieldhash %hash;
|
|
|
|
Creates a single fieldhash. The argument must be a hash. Returns
|
|
a reference to the given hash if successful, otherwise nothing.
|
|
|
|
A I<fieldhash> is, in short, an idhash with auto-registry. When an
|
|
object (or, indeed, any reference) is used as a fieldhash key, the
|
|
fieldhash is automatically registered for garbage collection with
|
|
the object, as if C<register $obj, \ %fieldhash> had been called.
|
|
|
|
=item fieldhashes
|
|
|
|
fieldhashes @hashrefs;
|
|
|
|
Creates any number of field hashes. Arguments must be hash references.
|
|
Returns the converted hashrefs in list context, their number in scalar
|
|
context.
|
|
|
|
=back
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
A word on terminology: I shall use the term I<field> for a scalar
|
|
piece of data that a class associates with an object. Other terms that
|
|
have been used for this concept are "object variable", "(object) property",
|
|
"(object) attribute" and more. Especially "attribute" has some currency
|
|
among Perl programmer, but that clashes with the C<attributes> pragma. The
|
|
term "field" also has some currency in this sense and doesn't seem
|
|
to conflict with other Perl terminology.
|
|
|
|
In Perl, an object is a blessed reference. The standard way of associating
|
|
data with an object is to store the data inside the object's body, that is,
|
|
the piece of data pointed to by the reference.
|
|
|
|
In consequence, if two or more classes want to access an object they
|
|
I<must> agree on the type of reference and also on the organization of
|
|
data within the object body. Failure to agree on the type results in
|
|
immediate death when the wrong method tries to access an object. Failure
|
|
to agree on data organization may lead to one class trampling over the
|
|
data of another.
|
|
|
|
This object model leads to a tight coupling between subclasses.
|
|
If one class wants to inherit from another (and both classes access
|
|
object data), the classes must agree about implementation details.
|
|
Inheritance can only be used among classes that are maintained together,
|
|
in a single source or not.
|
|
|
|
In particular, it is not possible to write general-purpose classes
|
|
in this technique, classes that can advertise themselves as "Put me
|
|
on your @ISA list and use my methods". If the other class has different
|
|
ideas about how the object body is used, there is trouble.
|
|
|
|
For reference C<Name_hash> in L</Example 1> shows the standard implementation of
|
|
a simple class C<Name> in the well-known hash based way. It also demonstrates
|
|
the predictable failure to construct a common subclass C<NamedFile>
|
|
of C<Name> and the class C<IO::File> (whose objects I<must> be globrefs).
|
|
|
|
Thus, techniques are of interest that store object data I<not> in
|
|
the object body but some other place.
|
|
|
|
=head2 The Inside-out Technique
|
|
|
|
With I<inside-out> classes, each class declares a (typically lexical)
|
|
hash for each field it wants to use. The reference address of an
|
|
object is used as the hash key. By definition, the reference address
|
|
is unique to each object so this guarantees a place for each field that
|
|
is private to the class and unique to each object. See C<Name_id>
|
|
in L</Example 1> for a simple example.
|
|
|
|
In comparison to the standard implementation where the object is a
|
|
hash and the fields correspond to hash keys, here the fields correspond
|
|
to hashes, and the object determines the hash key. Thus the hashes
|
|
appear to be turned I<inside out>.
|
|
|
|
The body of an object is never examined by an inside-out class, only
|
|
its reference address is used. This allows for the body of an actual
|
|
object to be I<anything at all> while the object methods of the class
|
|
still work as designed. This is a key feature of inside-out classes.
|
|
|
|
=head2 Problems of Inside-out
|
|
|
|
Inside-out classes give us freedom of inheritance, but as usual there
|
|
is a price.
|
|
|
|
Most obviously, there is the necessity of retrieving the reference
|
|
address of an object for each data access. It's a minor inconvenience,
|
|
but it does clutter the code.
|
|
|
|
More important (and less obvious) is the necessity of garbage
|
|
collection. When a normal object dies, anything stored in the
|
|
object body is garbage-collected by perl. With inside-out objects,
|
|
Perl knows nothing about the data stored in field hashes by a class,
|
|
but these must be deleted when the object goes out of scope. Thus
|
|
the class must provide a C<DESTROY> method to take care of that.
|
|
|
|
In the presence of multiple classes it can be non-trivial
|
|
to make sure that every relevant destructor is called for
|
|
every object. Perl calls the first one it finds on the
|
|
inheritance tree (if any) and that's it.
|
|
|
|
A related issue is thread-safety. When a new thread is created,
|
|
the Perl interpreter is cloned, which implies that all reference
|
|
addresses in use will be replaced with new ones. Thus, if a class
|
|
tries to access a field of a cloned object its (cloned) data will
|
|
still be stored under the now invalid reference address of the
|
|
original in the parent thread. A general C<CLONE> method must
|
|
be provided to re-establish the association.
|
|
|
|
=head2 Solutions
|
|
|
|
C<Hash::Util::FieldHash> addresses these issues on several
|
|
levels.
|
|
|
|
The C<id()> function is provided in addition to the
|
|
existing C<Scalar::Util::refaddr()>. Besides its short name
|
|
it can be a little faster under some circumstances (and a
|
|
bit slower under others). Benchmark if it matters. The
|
|
working of C<id()> also allows the use of the class name
|
|
as a I<generic object> as described L<further down|/"The Generic Object">.
|
|
|
|
The C<id()> function is incorporated in I<id hashes> in the sense
|
|
that it is called automatically on every key that is used with
|
|
the hash. No explicit call is necessary.
|
|
|
|
The problems of garbage collection and thread safety are both
|
|
addressed by the function C<register()>. It registers an object
|
|
together with any number of hashes. Registry means that when the
|
|
object dies, an entry in any of the hashes under the reference
|
|
address of this object will be deleted. This guarantees garbage
|
|
collection in these hashes. It also means that on thread
|
|
cloning the object's entries in registered hashes will be
|
|
replaced with updated entries whose key is the cloned object's
|
|
reference address. Thus the object-data association becomes
|
|
thread-safe.
|
|
|
|
Object registry is best done when the object is initialized
|
|
for use with a class. That way, garbage collection and thread
|
|
safety are established for every object and every field that is
|
|
initialized.
|
|
|
|
Finally, I<field hashes> incorporate all these functions in one
|
|
package. Besides automatically calling the C<id()> function
|
|
on every object used as a key, the object is registered with
|
|
the field hash on first use. Classes based on field hashes
|
|
are fully garbage-collected and thread safe without further
|
|
measures.
|
|
|
|
=head2 More Problems
|
|
|
|
Another problem that occurs with inside-out classes is serialization.
|
|
Since the object data is not in its usual place, standard routines
|
|
like C<Storable::freeze()>, C<Storable::thaw()> and
|
|
C<Data::Dumper::Dumper()> can't deal with it on their own. Both
|
|
C<Data::Dumper> and C<Storable> provide the necessary hooks to
|
|
make things work, but the functions or methods used by the hooks
|
|
must be provided by each inside-out class.
|
|
|
|
A general solution to the serialization problem would require another
|
|
level of registry, one that associates I<classes> and fields.
|
|
So far, the functions of C<Hash::Util::FieldHash> are unaware of
|
|
any classes, which I consider a feature. Therefore C<Hash::Util::FieldHash>
|
|
doesn't address the serialization problems.
|
|
|
|
=head2 The Generic Object
|
|
|
|
Classes based on the C<id()> function (and hence classes based on
|
|
C<idhash()> and C<fieldhash()>) show a peculiar behavior in that
|
|
the class name can be used like an object. Specifically, methods
|
|
that set or read data associated with an object continue to work as
|
|
class methods, just as if the class name were an object, distinct from
|
|
all other objects, with its own data. This object may be called
|
|
the I<generic object> of the class.
|
|
|
|
This works because field hashes respond to keys that are not references
|
|
like a normal hash would and use the string offered as the hash key.
|
|
Thus, if a method is called as a class method, the field hash is presented
|
|
with the class name instead of an object and blithely uses it as a key.
|
|
Since the keys of real objects are decimal numbers, there is no
|
|
conflict and the slot in the field hash can be used like any other.
|
|
The C<id()> function behaves correspondingly with respect to non-reference
|
|
arguments.
|
|
|
|
Two possible uses (besides ignoring the property) come to mind.
|
|
A singleton class could be implemented this using the generic object.
|
|
If necessary, an C<init()> method could die or ignore calls with
|
|
actual objects (references), so only the generic object will ever exist.
|
|
|
|
Another use of the generic object would be as a template. It is
|
|
a convenient place to store class-specific defaults for various
|
|
fields to be used in actual object initialization.
|
|
|
|
Usually, the feature can be entirely ignored. Calling I<object
|
|
methods> as I<class methods> normally leads to an error and isn't used
|
|
routinely anywhere. It may be a problem that this error isn't
|
|
indicated by a class with a generic object.
|
|
|
|
=head2 How to use Field Hashes
|
|
|
|
Traditionally, the definition of an inside-out class contains a bare
|
|
block inside which a number of lexical hashes are declared and the
|
|
basic accessor methods defined, usually through C<Scalar::Util::refaddr>.
|
|
Further methods may be defined outside this block. There has to be
|
|
a DESTROY method and, for thread support, a CLONE method.
|
|
|
|
When field hashes are used, the basic structure remains the same.
|
|
Each lexical hash will be made a field hash. The call to C<refaddr>
|
|
can be omitted from the accessor methods. DESTROY and CLONE methods
|
|
are not necessary.
|
|
|
|
If you have an existing inside-out class, simply making all hashes
|
|
field hashes with no other change should make no difference. Through
|
|
the calls to C<refaddr> or equivalent, the field hashes never get to
|
|
see a reference and work like normal hashes. Your DESTROY (and
|
|
CLONE) methods are still needed.
|
|
|
|
To make the field hashes kick in, it is easiest to redefine C<refaddr>
|
|
as
|
|
|
|
sub refaddr { shift }
|
|
|
|
instead of importing it from C<Scalar::Util>. It should now be possible
|
|
to disable DESTROY and CLONE. Note that while it isn't disabled,
|
|
DESTROY will be called before the garbage collection of field hashes,
|
|
so it will be invoked with a functional object and will continue to
|
|
function.
|
|
|
|
It is not desirable to import the functions C<fieldhash> and/or
|
|
C<fieldhashes> into every class that is going to use them. They
|
|
are only used once to set up the class. When the class is up and running,
|
|
these functions serve no more purpose.
|
|
|
|
If there are only a few field hashes to declare, it is simplest to
|
|
|
|
use Hash::Util::FieldHash;
|
|
|
|
early and call the functions qualified:
|
|
|
|
Hash::Util::FieldHash::fieldhash my %foo;
|
|
|
|
Otherwise, import the functions into a convenient package like
|
|
C<HUF> or, more general, C<Aux>
|
|
|
|
{
|
|
package Aux;
|
|
use Hash::Util::FieldHash ':all';
|
|
}
|
|
|
|
and call
|
|
|
|
Aux::fieldhash my %foo;
|
|
|
|
as needed.
|
|
|
|
=head2 Garbage-Collected Hashes
|
|
|
|
Garbage collection in a field hash means that entries will "spontaneously"
|
|
disappear when the object that created them disappears. That must be
|
|
borne in mind, especially when looping over a field hash. If anything
|
|
you do inside the loop could cause an object to go out of scope, a
|
|
random key may be deleted from the hash you are looping over. That
|
|
can throw the loop iterator, so it's best to cache a consistent snapshot
|
|
of the keys and/or values and loop over that. You will still have to
|
|
check that a cached entry still exists when you get to it.
|
|
|
|
Garbage collection can be confusing when keys are created in a field hash
|
|
from normal scalars as well as references. Once a reference is I<used> with
|
|
a field hash, the entry will be collected, even if it was later overwritten
|
|
with a plain scalar key (every positive integer is a candidate). This
|
|
is true even if the original entry was deleted in the meantime. In fact,
|
|
deletion from a field hash, and also a test for existence constitute
|
|
I<use> in this sense and create a liability to delete the entry when
|
|
the reference goes out of scope. If you happen to create an entry
|
|
with an identical key from a string or integer, that will be collected
|
|
instead. Thus, mixed use of references and plain scalars as field hash
|
|
keys is not entirely supported.
|
|
|
|
=head1 EXAMPLES
|
|
|
|
The examples show a very simple class that implements a I<name>, consisting
|
|
of a first and last name (no middle initial). The name class has four
|
|
methods:
|
|
|
|
=over
|
|
|
|
=item * C<init()>
|
|
|
|
An object method that initializes the first and last name to its
|
|
two arguments. If called as a class method, C<init()> creates an
|
|
object in the given class and initializes that.
|
|
|
|
=item * C<first()>
|
|
|
|
Retrieve the first name
|
|
|
|
=item * C<last()>
|
|
|
|
Retrieve the last name
|
|
|
|
=item * C<name()>
|
|
|
|
Retrieve the full name, the first and last name joined by a blank.
|
|
|
|
=back
|
|
|
|
The examples show this class implemented with different levels of
|
|
support by C<Hash::Util::FieldHash>. All supported combinations
|
|
are shown. The difference between implementations is often quite
|
|
small. The implementations are:
|
|
|
|
=over
|
|
|
|
=item * C<Name_hash>
|
|
|
|
A conventional (not inside-out) implementation where an object is
|
|
a hash that stores the field values, without support by
|
|
C<Hash::Util::FieldHash>. This implementation doesn't allow
|
|
arbitrary inheritance.
|
|
|
|
=item * C<Name_id>
|
|
|
|
Inside-out implementation based on the C<id()> function. It needs
|
|
a C<DESTROY> method. For thread support a C<CLONE> method (not shown)
|
|
would also be needed. Instead of C<Hash::Util::FieldHash::id()> the
|
|
function C<Scalar::Util::refaddr> could be used with very little
|
|
functional difference. This is the basic pattern of an inside-out
|
|
class.
|
|
|
|
=item * C<Name_idhash>
|
|
|
|
Idhash-based inside-out implementation. Like C<Name_id> it needs
|
|
a C<DESTROY> method and would need C<CLONE> for thread support.
|
|
|
|
=item * C<Name_id_reg>
|
|
|
|
Inside-out implementation based on the C<id()> function with explicit
|
|
object registry. No destructor is needed and objects are thread safe.
|
|
|
|
=item * C<Name_idhash_reg>
|
|
|
|
Idhash-based inside-out implementation with explicit object registry.
|
|
No destructor is needed and objects are thread safe.
|
|
|
|
=item * C<Name_fieldhash>
|
|
|
|
FieldHash-based inside-out implementation. Object registry happens
|
|
automatically. No destructor is needed and objects are thread safe.
|
|
|
|
=back
|
|
|
|
These examples are realized in the code below, which could be copied
|
|
to a file F<Example.pm>.
|
|
|
|
=head2 Example 1
|
|
|
|
use strict; use warnings;
|
|
|
|
{
|
|
package Name_hash; # standard implementation: the
|
|
# object is a hash
|
|
sub init {
|
|
my $obj = shift;
|
|
my ($first, $last) = @_;
|
|
# create an object if called as class method
|
|
$obj = bless {}, $obj unless ref $obj;
|
|
$obj->{ first} = $first;
|
|
$obj->{ last} = $last;
|
|
$obj;
|
|
}
|
|
|
|
sub first { shift()->{ first} }
|
|
sub last { shift()->{ last} }
|
|
|
|
sub name {
|
|
my $n = shift;
|
|
join ' ' => $n->first, $n->last;
|
|
}
|
|
|
|
}
|
|
|
|
{
|
|
package Name_id;
|
|
use Hash::Util::FieldHash qw(id);
|
|
|
|
my (%first, %last);
|
|
|
|
sub init {
|
|
my $obj = shift;
|
|
my ($first, $last) = @_;
|
|
# create an object if called as class method
|
|
$obj = bless \ my $o, $obj unless ref $obj;
|
|
$first{ id $obj} = $first;
|
|
$last{ id $obj} = $last;
|
|
$obj;
|
|
}
|
|
|
|
sub first { $first{ id shift()} }
|
|
sub last { $last{ id shift()} }
|
|
|
|
sub name {
|
|
my $n = shift;
|
|
join ' ' => $n->first, $n->last;
|
|
}
|
|
|
|
sub DESTROY {
|
|
my $id = id shift;
|
|
delete $first{ $id};
|
|
delete $last{ $id};
|
|
}
|
|
|
|
}
|
|
|
|
{
|
|
package Name_idhash;
|
|
use Hash::Util::FieldHash;
|
|
|
|
Hash::Util::FieldHash::idhashes( \ my (%first, %last) );
|
|
|
|
sub init {
|
|
my $obj = shift;
|
|
my ($first, $last) = @_;
|
|
# create an object if called as class method
|
|
$obj = bless \ my $o, $obj unless ref $obj;
|
|
$first{ $obj} = $first;
|
|
$last{ $obj} = $last;
|
|
$obj;
|
|
}
|
|
|
|
sub first { $first{ shift()} }
|
|
sub last { $last{ shift()} }
|
|
|
|
sub name {
|
|
my $n = shift;
|
|
join ' ' => $n->first, $n->last;
|
|
}
|
|
|
|
sub DESTROY {
|
|
my $n = shift;
|
|
delete $first{ $n};
|
|
delete $last{ $n};
|
|
}
|
|
|
|
}
|
|
|
|
{
|
|
package Name_id_reg;
|
|
use Hash::Util::FieldHash qw(id register);
|
|
|
|
my (%first, %last);
|
|
|
|
sub init {
|
|
my $obj = shift;
|
|
my ($first, $last) = @_;
|
|
# create an object if called as class method
|
|
$obj = bless \ my $o, $obj unless ref $obj;
|
|
register( $obj, \ (%first, %last) );
|
|
$first{ id $obj} = $first;
|
|
$last{ id $obj} = $last;
|
|
$obj;
|
|
}
|
|
|
|
sub first { $first{ id shift()} }
|
|
sub last { $last{ id shift()} }
|
|
|
|
sub name {
|
|
my $n = shift;
|
|
join ' ' => $n->first, $n->last;
|
|
}
|
|
}
|
|
|
|
{
|
|
package Name_idhash_reg;
|
|
use Hash::Util::FieldHash qw(register);
|
|
|
|
Hash::Util::FieldHash::idhashes \ my (%first, %last);
|
|
|
|
sub init {
|
|
my $obj = shift;
|
|
my ($first, $last) = @_;
|
|
# create an object if called as class method
|
|
$obj = bless \ my $o, $obj unless ref $obj;
|
|
register( $obj, \ (%first, %last) );
|
|
$first{ $obj} = $first;
|
|
$last{ $obj} = $last;
|
|
$obj;
|
|
}
|
|
|
|
sub first { $first{ shift()} }
|
|
sub last { $last{ shift()} }
|
|
|
|
sub name {
|
|
my $n = shift;
|
|
join ' ' => $n->first, $n->last;
|
|
}
|
|
}
|
|
|
|
{
|
|
package Name_fieldhash;
|
|
use Hash::Util::FieldHash;
|
|
|
|
Hash::Util::FieldHash::fieldhashes \ my (%first, %last);
|
|
|
|
sub init {
|
|
my $obj = shift;
|
|
my ($first, $last) = @_;
|
|
# create an object if called as class method
|
|
$obj = bless \ my $o, $obj unless ref $obj;
|
|
$first{ $obj} = $first;
|
|
$last{ $obj} = $last;
|
|
$obj;
|
|
}
|
|
|
|
sub first { $first{ shift()} }
|
|
sub last { $last{ shift()} }
|
|
|
|
sub name {
|
|
my $n = shift;
|
|
join ' ' => $n->first, $n->last;
|
|
}
|
|
}
|
|
|
|
1;
|
|
|
|
To exercise the various implementations the script L<below|/"Example 2"> can
|
|
be used.
|
|
|
|
It sets up a class C<Name> that is a mirror of one of the implementation
|
|
classes C<Name_hash>, C<Name_id>, ..., C<Name_fieldhash>. That determines
|
|
which implementation is run.
|
|
|
|
The script first verifies the function of the C<Name> class.
|
|
|
|
In the second step, the free inheritability of the implementation
|
|
(or lack thereof) is demonstrated. For this purpose it constructs
|
|
a class called C<NamedFile> which is a common subclass of C<Name> and
|
|
the standard class C<IO::File>. This puts inheritability to the test
|
|
because objects of C<IO::File> I<must> be globrefs. Objects of C<NamedFile>
|
|
should behave like a file opened for reading and also support the C<name()>
|
|
method. This class juncture works with exception of the C<Name_hash>
|
|
implementation, where object initialization fails because of the
|
|
incompatibility of object bodies.
|
|
|
|
=head2 Example 2
|
|
|
|
use strict; use warnings; $| = 1;
|
|
|
|
use Example;
|
|
|
|
{
|
|
package Name;
|
|
use parent 'Name_id'; # define here which implementation to run
|
|
}
|
|
|
|
|
|
# Verify that the base package works
|
|
my $n = Name->init(qw(Albert Einstein));
|
|
print $n->name, "\n";
|
|
print "\n";
|
|
|
|
# Create a named file handle (See definition below)
|
|
my $nf = NamedFile->init(qw(/tmp/x Filomena File));
|
|
# use as a file handle...
|
|
for ( 1 .. 3 ) {
|
|
my $l = <$nf>;
|
|
print "line $_: $l";
|
|
}
|
|
# ...and as a Name object
|
|
print "...brought to you by ", $nf->name, "\n";
|
|
exit;
|
|
|
|
|
|
# Definition of NamedFile
|
|
package NamedFile;
|
|
use parent 'Name';
|
|
use parent 'IO::File';
|
|
|
|
sub init {
|
|
my $obj = shift;
|
|
my ($file, $first, $last) = @_;
|
|
$obj = $obj->IO::File::new() unless ref $obj;
|
|
$obj->open($file) or die "Can't read '$file': $!";
|
|
$obj->Name::init($first, $last);
|
|
}
|
|
__END__
|
|
|
|
|
|
=head1 GUTS
|
|
|
|
To make C<Hash::Util::FieldHash> work, there were two changes to
|
|
F<perl> itself. C<PERL_MAGIC_uvar> was made available for hashes,
|
|
and weak references now call uvar C<get> magic after a weakref has been
|
|
cleared. The first feature is used to make field hashes intercept
|
|
their keys upon access. The second one triggers garbage collection.
|
|
|
|
=head2 The C<PERL_MAGIC_uvar> interface for hashes
|
|
|
|
C<PERL_MAGIC_uvar> I<get> magic is called from C<hv_fetch_common> and
|
|
C<hv_delete_common> through the function C<hv_magic_uvar_xkey>, which
|
|
defines the interface. The call happens for hashes with "uvar" magic
|
|
if the C<ufuncs> structure has equal values in the C<uf_val> and C<uf_set>
|
|
fields. Hashes are unaffected if (and as long as) these fields
|
|
hold different values.
|
|
|
|
Upon the call, the C<mg_obj> field will hold the hash key to be accessed.
|
|
Upon return, the C<SV*> value in C<mg_obj> will be used in place of the
|
|
original key in the hash access. The integer index value in the first
|
|
parameter will be the C<action> value from C<hv_fetch_common>, or -1
|
|
if the call is from C<hv_delete_common>.
|
|
|
|
This is a template for a function suitable for the C<uf_val> field in
|
|
a C<ufuncs> structure for this call. The C<uf_set> and C<uf_index>
|
|
fields are irrelevant.
|
|
|
|
IV watch_key(pTHX_ IV action, SV* field) {
|
|
MAGIC* mg = mg_find(field, PERL_MAGIC_uvar);
|
|
SV* keysv = mg->mg_obj;
|
|
/* Do whatever you need to. If you decide to
|
|
supply a different key newkey, return it like this
|
|
*/
|
|
sv_2mortal(newkey);
|
|
mg->mg_obj = newkey;
|
|
return 0;
|
|
}
|
|
|
|
=head2 Weakrefs call uvar magic
|
|
|
|
When a weak reference is stored in an C<SV> that has "uvar" magic, C<set>
|
|
magic is called after the reference has gone stale. This hook can be
|
|
used to trigger further garbage-collection activities associated with
|
|
the referenced object.
|
|
|
|
=head2 How field hashes work
|
|
|
|
The three features of key hashes, I<key replacement>, I<thread support>,
|
|
and I<garbage collection> are supported by a data structure called
|
|
the I<object registry>. This is a private hash where every object
|
|
is stored. An "object" in this sense is any reference (blessed or
|
|
unblessed) that has been used as a field hash key.
|
|
|
|
The object registry keeps track of references that have been used as
|
|
field hash keys. The keys are generated from the reference address
|
|
like in a field hash (though the registry isn't a field hash). Each
|
|
value is a weak copy of the original reference, stored in an C<SV> that
|
|
is itself magical (C<PERL_MAGIC_uvar> again). The magical structure
|
|
holds a list (another hash, really) of field hashes that the reference
|
|
has been used with. When the weakref becomes stale, the magic is
|
|
activated and uses the list to delete the reference from all field
|
|
hashes it has been used with. After that, the entry is removed from
|
|
the object registry itself. Implicitly, that frees the magic structure
|
|
and the storage it has been using.
|
|
|
|
Whenever a reference is used as a field hash key, the object registry
|
|
is checked and a new entry is made if necessary. The field hash is
|
|
then added to the list of fields this reference has used.
|
|
|
|
The object registry is also used to repair a field hash after thread
|
|
cloning. Here, the entire object registry is processed. For every
|
|
reference found there, the field hashes it has used are visited and
|
|
the entry is updated.
|
|
|
|
=head2 Internal function Hash::Util::FieldHash::_fieldhash
|
|
|
|
# test if %hash is a field hash
|
|
my $result = _fieldhash \ %hash, 0;
|
|
|
|
# make %hash a field hash
|
|
my $result = _fieldhash \ %hash, 1;
|
|
|
|
C<_fieldhash> is the internal function used to create field hashes.
|
|
It takes two arguments, a hashref and a mode. If the mode is boolean
|
|
false, the hash is not changed but tested if it is a field hash. If
|
|
the hash isn't a field hash the return value is boolean false. If it
|
|
is, the return value indicates the mode of field hash. When called with
|
|
a boolean true mode, it turns the given hash into a field hash of this
|
|
mode, returning the mode of the created field hash. C<_fieldhash>
|
|
does not erase the given hash.
|
|
|
|
Currently there is only one type of field hash, and only the boolean
|
|
value of the mode makes a difference, but that may change.
|
|
|
|
=head1 AUTHOR
|
|
|
|
Anno Siegel (ANNO) wrote the xs code and the changes in perl proper
|
|
Jerry Hedden (JDHEDDEN) made it faster
|
|
|
|
=head1 COPYRIGHT AND LICENSE
|
|
|
|
Copyright (C) 2006-2007 by (Anno Siegel)
|
|
|
|
This library is free software; you can redistribute it and/or modify
|
|
it under the same terms as Perl itself, either Perl version 5.8.7 or,
|
|
at your option, any later version of Perl 5 you may have available.
|
|
|
|
=cut
|