package AI::GA;

# Author: Stefan Kamphausen <mail@skamphausen.de>
# Copyright 2001 Stefan Kamphausen.

# This implements a Simple Somewhat Generalized Genetic Algorithm

# See the bottom of this file for the POD documentation.  Search for the
# DOCS-Header.
# You can run this file through either pod2man or pod2html to produce pretty
# documentation in manual or html file format (these utilities are part of the
# Perl 5 distribution).

####################################################################
##                             LICENSE
####################################################################
# This program is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2, or
# (at your option) any later version.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.

# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.


# Please Visit
#   http://www.skamphausen.de/software
# for recent versions, other (free) software written by this author
# and whatever else you might expect from such a page.

use strict;
#use Data::Dumper;

$GA::VERSION='0.6';

sub new {
  my ($class,$fitness,$tokenref) = @_;
  my $self = {};
  # An array which contains all the individuals as arrays of tokens
  $self->{pop} = ();
  # A Hash which contains all the fitness-values adressed by
  # the concatenated tokens
  $self->{fitvals} = ();
  # the 'alphabet' of allowed symbols
  $self->{tokens} = $tokenref;
  # user provided fitness function, gets array of tokens as arg
  $self->{fitness} = $fitness;
  # default value for mutation probability; may be overridden
  $self->{mut_prob} = 0.05;
  # a counter for the generation
  $self->{generation} = 0;
  bless $self, $class;
  return $self;
}

sub init_pop {
  my ($self,$size,$length) = @_;
  my ($i,$j);
  for ($i=0;$i<$size;$i++) {
	for ($j=0;$j<$length;$j++) {
	  my $rtok = $self->random_token();
	  #print "RTOK: $rtok\n";
	  push @{$self->{pop}[$i]}, $rtok;
	}
  }
  #print Dumper(@{$self->{pop}});
  $self->calculate_fitness();
  $self->sort_pop();
}

sub sort_pop {
  my ($self) = @_;
  @{$self->{pop}} = sort
	{$self->{fitvals}{join "",@{$a}} <=> $self->{fitvals}{join "",@{$b}}}
  @{$self->{pop}};
}

sub calculate_fitness {
  my ($self) = @_;
  my ($i,$f);
  %{$self->{fitvals}} = ();
  foreach $i (@{$self->{pop}}) {
	#print "CALC: Indiv:",@{$i},"\n";
	$f = &{$self->{fitness}}(@{$i});
	$self->{fitvals}{join "",@{$i}} = $f;
  }
}

### Return some values
sub best_fit {
  my $self = shift;
  return $self->{fitvals}{join("",@{$self->{pop}[0]})};
}
sub generation {
  my $self = shift;
  return $self->{generation};
}
sub random_token {
  my ($self) = @_;
  my $max = scalar(@{$self->{tokens}});
  
  my $ran = int rand $max;
  my $tok = @{$self->{tokens}}[$ran];
  #print "Ran: $ran, Max: $max ";
  #print "Token: $tok\n";
  return $tok;
}


### Dawn of The Next Generation 

# combines mutation and crossover
sub breed {
  # FIXME: optional mut_prob
  my $self = shift;
  my $opt_mutation_rate = shift;
  
  my @new_pop = ();
  my ($p1,$p2,$c,$i);
  # mutation
  my $p_mut = $opt_mutation_rate || $self->{mut_prob};
  # prepare for roulette wheel
  my $a_sum = 0;
  my @fit = ();
  foreach (@{$self->{pop}}) {
	#	my $f = $self->{fitvals}{join "",@{$_}};
	#	$a_sum += 1.0/($f+1);
	#	push @fit, $f;
	my $f = $self->{fitvals}{join "",@{$_}};
	my $f2 = 1.0/($f+1);
	$a_sum += $f2;
	push @fit, $f2;
  }
  @fit = sort {$b <=> $a} @fit;
  
  #print "BREED: Fitness\n",Dumper(@fit),"\n\n";
  my $length = scalar(@{$self->{pop}});
  # Golden Cage
  $new_pop[0] = @{$self->{pop}}[0];
  # Choose Parents
  for ($i=1;$i<$length;$i++) {
	$p1 = rwheel(\@fit,$a_sum);
	$p2 = rwheel(\@fit,$a_sum);
	#	print "P1: $p1 P2: $p2\n";
	push @new_pop, $self->crossover_mut($p1,$p2,$p_mut);
  }
  @{$self->{pop}} = @new_pop;
  $self->calculate_fitness();
  $self->sort_pop();
  return ++$self->{generation};
}

sub mutate {
  my ($self,$rate) = @_;
  my ($ran,$i,$t);
  my $the_rate = $rate || $self->{mut_prob};
  foreach $i (@{$self->{pop}}) {
	for ($t=0;$t<scalar(@{$i});$t++) {
	  $ran = rand();
	  if ($ran < $the_rate) {
		@{$i}[$t] = $self->random_token();
	  }
	}
  }
}

sub crossover_mut {
  my $self = shift;
  my $p1 = shift;
  my $p2 = shift;
  my $opt_mutation_rate = shift;
  
  my ($ran,$t,$new_size);
  $ran = rand();
  my $pmut = $opt_mutation_rate || $self->{mut_prob};
  my $pp1 = (1.0-$pmut)/2.0;
  
  # 50:50 for the size of the new one 
  my @new = ();
  if ($ran < 0.5) {
	$new_size = scalar(@{$self->{pop}[$p1]})
  } else {
	$new_size = scalar(@{$self->{pop}[$p2]})	
  }
  for($t=0;$t<$new_size;$t++) {
	$ran = rand();
	# 50:50 to take gene from p1 or p2 unless mutation
	if ($ran < $pmut) {
	  #print "M";
	  $new[$t] = $self->random_token();
	} elsif ($ran < $pp1) {
	  #print "1";
	  $new[$t] = @{@{$self->{pop}}[$p1]}[$t];	  
	} else {
	  #print "2";
	  $new[$t] = @{@{$self->{pop}}[$p2]}[$t];	  
	}
  }
  return \@new;
}

sub crossover {
  my $self = shift;
  my $p1 = shift;
  my $p2 = shift;
  
  my ($ran,$t,$new_size);
  $ran = rand();
  # 50:50 for the size of the new one 
  my @new = ();
  if ($ran < 0.5) {
	$new_size = scalar(@{$self->{pop}[$p1]})
  } else {
	$new_size = scalar(@{$self->{pop}[$p2]})	
  }
  for($t=0;$t<$new_size;$t++) {
	$ran = rand();
	# 50:50 to take gene from p1 or p2 unless mutation
	if ($ran < 0.5) {
	  $new[$t] = @{@{$self->{pop}}[$p1]}[$t];
	} else {
	  $new[$t] = @{@{$self->{pop}}[$p2]}[$t];
	}
  }
  return \@new;
}

### Print-Outs
sub dump_indivs {
  my $self = shift;
  my $i;
  my $len = scalar(@{$self->{pop}});
  for ($i=0;$i<$len;$i++) {
	my $s = join("",@{$self->{pop}[$i]});
	printf "%4d ",$i;
	print $s;
	printf "  {%5d}\n",$self->{fitvals}{$s};
  }
}
sub dump_best {
  my $self = shift;
  my $s = join("",@{$self->{pop}[0]});
  print $s;
  printf "  {%f}\n",$self->{fitvals}{$s};
}

### Random
sub rwheel {
  # random element of an array according to it's value
  # aka roulette wheel
  my ($a_ref,$a_sum) = @_;
  my @arr = @{$a_ref};
  my $sum = 0;
  my $i;
  #  print "RWHEEL: length = ",scalar(@arr),"\n";
  #  print "RWHEEL ARRAY: ",join(" ",@arr),"\n";
  my $ran = rand $a_sum;
  #  print "RWHEEL: RAN $ran < $a_sum\n";
  for ($i=0;$i<scalar(@arr);$i++) {
	$sum += $arr[$i];
	#	print "\tSUM: $sum \$arr[$i] = $arr[$i]\n";
	if ($sum > $ran ) {
	  return $i;
	}
  }
  die "ARGH! I never should have reached this point!\n";
}

1;

__END__

############################################################
#                           DOCS                           #
############################################################
=head1 NAME

AI::GA - a general genetic algorithm library

=head1 SYNOPSIS

    # This is a little example

    use AI::GA;

    # evolve a string that matches this target
    $target = "Hello_World";
    $len = length $target;

    # create an array of allowed tokens
    @token = ();
    for ('a'..'z') {
      push @token, $_;
    }
    for ('A'..'Z') {
      push @token, $_;
    }
    push @token, "_";

    # New GA object that sets the alphabet and the
    # fitness function
    $p = AI::GA->new(\&fitness_function,\@token);

    # initialise the population
    $p->init_pop(100,$len);

    do {
      # breed the next generation using crossover and mutation
      $gen = $p->breed();
      printf "[%5d] ", $gen;
      # built in data dumper
      $p->dump_best();
      # best_fit return the fitness of the best
    } while ($p->best_fit() > 0 && $gen < 2000);

    $p->dump_best();
    exit(0);

    # Now all we need is the fitness function that needs to understand
    # the representation of an individual

    sub fitness_function {
      my @indiv_tokens = @_ ;
      # Representation
      my $s1 = join "", @indiv_tokens;
      my $sum = 0;
      my $f;
      for($f=0;$f<$len;$f++) {
        my $z1=substr($s1,$f,1);
        my $z2=substr($target,$f,1);
        my $a=(ord($z1)-ord($z2))*(ord($z1)-ord($z2));
        $sum +=$a;
      }
      return $sum;
    }

=head1 DESCRIPTION

C<AI::GA> implements a (hopefully) generalized genetic algorithm.
It does this by using an array of allowed tokens as individuals.
The user has to provide a fitness function. There the actual
representation is implemented. If you got a string of chars it is
quite easy: simply join them. If you want to have real numbers you
should probably use a bitwise representation and calculate the
real values in your fitness function.

=head2 The Easy Way

The easy setup is pretty easy. With

   $p = AI::GA->new(\&fitness_function,\@token);

you create a new GA object which knows all the allowed tokens and
how to calculate the fitness of an individual.
Then use

   $p->init_pop($pop_size,$length_of_individual);

to initialise a random population of I<$pop_size> individuals, each
of length I<$length_of_individual>. I do not know how to make them
of variable length right now.

The main thing to do now is use the simplified C<breed()>-method

   $gen = $p->breed();

You can give an optional argument to the C<breed> method which will
be interpreted as the mutation probabiliy. This method combines
mutation and crossover (for each token there is a decision from
which parent to take the token) and returns the number of the
generation.

=head2 The Detailed Way

There are methods that provide mutation, crossover and other
functionality and can be called directly in case you do not want to
use the built in C<breed()> method. These and other methods will soon
be listed in alphabetical order. Right before that again the note that
you probably do not need this.

=over 4

=item best_fit()

Returns the fitness of the best individual of the whole population
if the population is sorted (actually returns the first element of
the internal population array).

=item calculate_fitness()

Updates the (internal) fitness values by calling the user provided
fitness function for each individual.

=item crossover($p1,$p2)

Does a simple crossover schema. All individuals are internally
represented as an array of tokens. This crossover needs the numbers of
two parents (I<$p1> and I<$p2>), usually drawn using the Roulette
Wheel technique. For each token of the offspring there is a
fifty:fifty decision whether to take from parent one or parent two.

=item crossover_mut($p1,$p2,$optional_mutation_prob)

Almost the same as C<crossover()> just that there is a little
probabiliy that a new random token is used instead of on of the
parents.

=item dump_best()

This prints the best individual to stdout in a somewhat reasonable
way.

=item dump_indivs()

Prints the whole population including their fitness values.

=item generation()

Returns the number of the current generation.

=item mutate($optional_mutation_prob)

Performs a mutation on the whole generation.

=item sort_pop()

Whenever a new population has been created and the fitness values have
been calculated it is necessary to sort the population. Some routines
rely on that.

=item random_token()

Return a random token from the user provided alphabet of allowed
tokens. 

=head1 AUTHORS

Stefan Kamphausen I<E<lt>mail@skamphausen.deE<gt>>
I<http://www.skamphausen.de/software>

=cut