Experimenting with Genetic Algorithms

Genetic algorithm trying to find a series of four mathematical operations (e.g. -3*4/7+9) that would result in the number 42.

I’m teaching a numerical methods class that’s partly an introduction to programming, and partly a survey of numerical solutions to different types of problems students might encounter in the wild. I thought I’d look into doing a session on genetic algorithms, which are an important precursor to things like networks that have been found to be useful in a wide variety of fields including image and character recognition, stock market prediction and medical diagnostics.

The ai-junkie, bare-essentials page on genetic algorithms seemed a reasonable place to start. The site is definitely readable and I was able to put together a code to try to solve its example problem: to figure out what series of four mathematical operations using only single digits (e.g. +5*3/2-7) would give target number (42 in this example).

The procedure is as follows:

Initialize: Generate several random sets of four operations,
Test for fitness: Check which ones come closest to the target number,
Select: Select the two best options (which is not quite what the ai-junkie says to do, but it worked better for me),
Mate: Combine the two best options semi-randomly (i.e. exchange some percentage of the operations) to produce a new set of operations
Mutate: swap out some small percentage of the operations randomly,
Repeat: Go back to the second step (and repeat until you hit the target).

And this is the code I came up with:

genetic_algorithm2.py

''' Write a program to combine the sequence of numbers 0123456789 and
    the operators */+- to get the target value (42 (as an integer))
'''

'''
Procedure:
    1. Randomly generate a few sequences (ns=10) where each sequence is 8
       charaters long (ng=8).
    2. Check how close the sequence's value is to the target value.
        The closer the sequence the higher the weight it will get so use:
            w = 1/(value - target)
    3. Chose two of the sequences in a way that gives preference to higher
       weights.
    4. Randomly combine the successful sequences to create new sequences (ns=10)
    5. Repeat until target is achieved.

'''
from visual import *
from visual.graph import *
from random import *
import operator

# MODEL PARAMETERS
ns = 100
target_val = 42 #the value the program is trying to achieve
sequence_length = 4  # the number of operators in the sequence
crossover_rate = 0.3
mutation_rate = 0.1
max_itterations = 400


class operation:
    def __init__(self, operator = None, number = None, nmin = 0, nmax = 9, type="int"):
        if operator == None:
            n = randrange(1,5)
            if n == 1:
                self.operator = "+"
            elif n == 2:
                self.operator = "-"
            elif n == 3:
                self.operator = "/"
            else:
                self.operator = "*"
        else:
            self.operator = operator
            
        if number == None:
            #generate random number from 0-9
            self.number = 0
            if self.operator == "/":
                while self.number == 0:
                    self.number = randrange(nmin, nmax)
            else:
                self.number = randrange(nmin, nmax)
        else:
            self.number = number
        self.number = float(self.number)

    def calc(self, val=0):
        # perform operation given the input value
        if self.operator == "+":
            val += self.number
        elif self.operator == "-":
            val -= self.number
        elif self.operator == "*":
            val *= self.number
        elif self.operator == "/":
            val /= self.number
        return val


class gene:

    def __init__(self, n_operations = 5, seq = None):
        #seq is a sequence of operations (see class above)
        #initalize
        self.n_operations = n_operations
        
        #generate sequence
        if seq == None:
            #print "Generating sequence"
            self.seq = []
            self.seq.append(operation(operator="+"))  # the default operation is + some number
            for i in range(n_operations-1):
                #generate random number
                self.seq.append(operation())

        else:
            self.seq = seq

        self.calc_seq()

        #print "Sequence: ", self.seq
    def stringify(self):
        seq = ""
        for i in self.seq:
            seq = seq + i.operator + str(i.number)
        return seq

    def calc_seq(self):
        self.val = 0
        for i in self.seq:
            #print i.calc(self.val)
            self.val = i.calc(self.val)
        return self.val

    def crossover(self, ingene, rate):
        # combine this gene with the ingene at the given rate (between 0 and 1)
        #  of mixing to create two new genes

        #print "In 1: ", self.stringify()
        #print "In 2: ", ingene.stringify()
        new_seq_a = []
        new_seq_b = []
        for i in range(len(self.seq)):
            if (random() < rate): # swap
                new_seq_a.append(ingene.seq[i])
                new_seq_b.append(self.seq[i])
            else:
                new_seq_b.append(ingene.seq[i])
                new_seq_a.append(self.seq[i])

        new_gene_a = gene(seq = new_seq_a)
        new_gene_b = gene(seq = new_seq_b)
                       
        #print "Out 1:", new_gene_a.stringify()
        #print "Out 2:", new_gene_b.stringify()

        return (new_gene_a, new_gene_b)

    def mutate(self, mutation_rate):
        for i in range(1, len(self.seq)):
            if random() < mutation_rate:
                self.seq[i] = operation()
                
            
            

def weight(target, val):
    if val <> None:
        #print abs(target - val)
        if abs(target - val) <> 0:
            w = (1. / abs(target - val))
        else:
            w = "Bingo"
            print "Bingo: target, val = ", target, val
    else:
        w = 0.
    return w

def pick_value(weights):
    #given a series of weights randomly pick one of the sequence accounting for
    # the values of the weights

    # sum all the weights (for normalization)
    total = 0
    for i in weights:
        total += i

    # make an array of the normalized cumulative totals of the weights.
    cum_wts = []
    ctot = 0.0
    cum_wts.append(ctot)
    for i in range(len(weights)):
        ctot += weights[i]/total
        cum_wts.append(ctot)
    #print cum_wts

    # get random number and find where it occurs in array
    n = random()
    index = randrange(0, len(weights)-1)
    for i in range(len(cum_wts)-1):
        #print i, cum_wts[i], n, cum_wts[i+1]
        if n >= cum_wts[i] and n < cum_wts[i+1]:
            
            index = i
            #print "Picked", i
            break
    return index

def pick_best(weights):
    # pick the top two values from the sequences
    i1 = -1
    i2 = -1
    max1 = 0.
    max2 = 0.
    for i in range(len(weights)):
        if weights[i] > max1:
            max2 = max1
            max1 = weights[i]
            i2 = i1
            i1 = i
        elif weights[i] > max2:
            max2 = weights[i]
            i2 = i

    return (i1, i2)
        
    
    
            


# Main loop
l_loop = True
loop_num = 0
best_gene = None

##test = gene()
##test.print_seq()
##print test.calc_seq()

# initialize
genes = []
for i in range(ns):
    genes.append(gene(n_operations=sequence_length))
    #print genes[-1].stringify(), genes[-1].val
    

f1 = gcurve(color=color.cyan)

while (l_loop and loop_num < max_itterations):
    loop_num += 1
    if (loop_num%10 == 0):
        print "Loop: ", loop_num

    # Calculate weights
    weights = []
    for i in range(ns):
        weights.append(weight(target_val, genes[i].val))
        # check for hit on target
        if weights[-1] == "Bingo":
            print "Bingo", genes[i].stringify(), genes[i].val
            l_loop = False
            best_gene = genes[i]
            break
    #print weights

    if l_loop:

        # indicate which was the best fit option (highest weight)
        max_w = 0.0
        max_i = -1
        for i in range(len(weights)):
            #print max_w, weights[i]
            if weights[i] > max_w:
                max_w = weights[i]
                max_i = i
        best_gene = genes[max_i]
##        print "Best operation:", max_i, genes[max_i].stringify(), \
##              genes[max_i].val, max_w
        f1.plot(pos=(loop_num, best_gene.val))


        # Pick parent gene sequences for next generation
        # pick first of the genes using weigths for preference    
##        index = pick_value(weights)
##        print "Picked operation:  ", index, genes[index].stringify(), \
##              genes[index].val, weights[index]
##
##        # pick second gene
##        index2 = index
##        while index2 == index:
##            index2 = pick_value(weights)
##        print "Picked operation 2:", index2, genes[index2].stringify(), \
##              genes[index2].val, weights[index2]
##

        (index, index2) = pick_best(weights)
        
        # Crossover: combine genes to get the new population
        new_genes = []
        for i in range(ns/2):
            (a,b) = genes[index].crossover(genes[index2], crossover_rate)
            new_genes.append(a)
            new_genes.append(b)

        # Mutate
        for i in new_genes:
            i.mutate(mutation_rate)
                

        # update genes array
        genes = []
        for i in new_genes:
            genes.append(i)


print
print "Best Gene:", best_gene.stringify(), best_gene.val
print "Number of iterations:", loop_num
##

When run, the code usually gets a valid answer, but does not always converge: The figure at the top of this post shows it finding a solution after 142 iterations (the solution it found was: +8.0 +8.0 *3.0 -6.0). The code is rough, but is all I have time for at the moment. However, it should be a reasonable starting point if I should decide to discuss these in class.

Leave a Reply