renamed ABS to REGRESS

pull/4/head
Kai Staats 2016-07-11 23:07:44 -06:00
parent 918187e1f8
commit 966a5dcf53
3 changed files with 168 additions and 174 deletions

View File

@ -2,7 +2,7 @@
# Define the methods and global variables used by Karoo GP
# by Kai Staats, MSc UCT / AIMS
# Much thanks to Emmanuel Dufourq and Arun Kumar for their support, guidance, and free psychotherapy sessions
# version 0.9.1.4
# version 0.9.1.5
'''
A NOTE TO THE NEWBIE, EXPERT, AND BRAVE
@ -16,6 +16,7 @@ import os
import sys
import time
import argparse
import numpy as np
import pprocess as pp
import sklearn.metrics as skm
@ -40,7 +41,7 @@ class Base_GP(object):
'fx_eval_' Methods to Evaluate a Tree
'fx_fitness_' Methods to Evaluate Tree Fitness
'fx_evo_' Methods to Evolve a Population
'fx_test_' Methods to Test a Tree
'fx_test_' Methods to Validate a Tree
'fx_tree_' Methods to Append & Archive
There are no sub-classes at the time of this edit - 2015 09/21
@ -216,9 +217,9 @@ class Base_GP(object):
'''
### 1) load the data file associated with the user selected fitness kernel ###
data_dict = {'a':'files/data_ABS.csv', 'b':'files/data_BOOL.csv', 'c':'files/data_CLASSIFY.csv', 'm':'files/data_MATCH.csv', 'p':'files/data_PLAY.csv'}
func_dict = {'a':'files/functions_ABS.csv', 'b':'files/functions_BOOL.csv', 'c':'files/functions_CLASSIFY.csv', 'm':'files/functions_MATCH.csv', 'p':'files/functions_PLAY.csv'}
fitt_dict = {'a':'min', 'b':'max', 'c':'max', 'm':'max', 'p':''}
data_dict = {'b':'files/data_BOOL.csv', 'c':'files/data_CLASSIFY.csv', 'r':'files/data_REGRESS.csv', 'm':'files/data_MATCH.csv', 'p':'files/data_PLAY.csv'}
func_dict = {'b':'files/functions_BOOL.csv', 'c':'files/functions_CLASSIFY.csv', 'r':'files/functions_REGRESS.csv', 'm':'files/functions_MATCH.csv', 'p':'files/functions_PLAY.csv'}
fitt_dict = {'b':'max', 'c':'max', 'r':'min', 'm':'max', 'p':''}
if len(sys.argv) == 1: # load data in the karoo_gp/files/ directory
data_x = np.loadtxt(data_dict[self.kernel], skiprows = 1, delimiter = ',', dtype = float); data_x = data_x[:,0:-1] # load all but the right-most column
@ -470,16 +471,16 @@ class Base_GP(object):
def fx_karoo_crossover(self):
'''
Through tournament selection, 2 trees are selected as parents to produce 2 offspring. Within each parent
Through tournament selection, two trees are selected as parents to produce two offspring. Within each parent
Tree a branch is selected. Parent A is copied, with its selected branch deleted. Parent B's branch is then
copied to the former location of Parent A's branch and inserted (grafted). The size and shape of the child
Tree may be smaller or larger than either of the parents, but may not exceed 'tree_depth_max' as defined
by the user.
This process combines genetic code from 2 trees, both of which were chosen by the tournament process as having
a higher fitness than the average population. Therefore, there is a chance their offspring will provide an
improvement in total fitness. In most GP applications, Crossover is the most commonly applied evolutionary
operator (~70-80%).
This process combines genetic code from two parent Trees, both of which were chosen by the tournament process
as having a higher fitness than the average population. Therefore, there is a chance their offspring will
provide an improvement in total fitness. In most GP applications, Crossover is the most commonly applied
evolutionary operator (~70-80%).
For those who like to watch, select 'db' (debug mode) at the launch of Karoo GP or at any (pause).
@ -499,14 +500,14 @@ class Base_GP(object):
parent_b = self.fx_fitness_tournament(self.tourn_size) # perform tournament selection for 'parent_b'
branch_b = self.fx_evo_branch_select(parent_b) # select branch within 'parent_b', to copy to 'parent_a' and receive a branch from 'parent_a'
parent_c = np.copy(parent_a); branch_c = np.copy(branch_a) # else the Crossover Trees leak back to the originals (not sure why)
parent_d = np.copy(parent_b); branch_d = np.copy(branch_b) # else the Crossover Trees leak back to the originals (not sure why)
parent_c = np.copy(parent_a); branch_c = np.copy(branch_a) # else the Crossover mods affect the parent Trees, due to how Python manages '='
parent_d = np.copy(parent_b); branch_d = np.copy(branch_b) # else the Crossover mods affect the parent Trees, due to how Python manages '='
child_1 = self.fx_evo_crossover(parent_a, branch_a, parent_b, branch_b) # perform Crossover
self.population_b.append(child_1) # append the 1st child to next generation of Trees
offspring_1 = self.fx_evo_crossover(parent_a, branch_a, parent_b, branch_b) # perform Crossover
self.population_b.append(offspring_1) # append the 1st child to next generation of Trees
child_2 = self.fx_evo_crossover(parent_d, branch_d, parent_c, branch_c) # perform Crossover
self.population_b.append(child_2) # append the 2nd child to next generation of Trees
offspring_2 = self.fx_evo_crossover(parent_d, branch_d, parent_c, branch_c) # perform Crossover
self.population_b.append(offspring_2) # append the 2nd child to next generation of Trees
return
@ -543,18 +544,18 @@ class Base_GP(object):
print '\t\033[36m\033[1m ts \t\033[0;0m adjust the tournament size'
print '\t\033[36m\033[1m min \t\033[0;0m adjust the minimum number of nodes'
# print '\t\033[36m\033[1m max \t\033[0;0m adjust the maximum Tree depth'
print '\t\033[36m\033[1m b \t\033[0;0m adjust the balance of genetic operators (sum to 100%)'
print '\t\033[36m\033[1m b \t\033[0;0m adjust the balance of genetic operators'
print '\t\033[36m\033[1m c \t\033[0;0m adjust the number of engaged CPU cores'
print ''
print '\t\033[36m\033[1m id \t\033[0;0m display the generation ID'
print '\t\033[36m\033[1m pop \t\033[0;0m list all Trees in designated population'
print '\t\033[36m\033[1m pop \t\033[0;0m list all Trees in current population'
print '\t\033[36m\033[1m l \t\033[0;0m list Trees with leading fitness scores'
print '\t\033[36m\033[1m p \t\033[0;0m print a Tree to screen'
print ''
print '\t\033[36m\033[1m test \t\033[0;0m evaluate a Tree for Precision & Recall'
print '\t\033[36m\033[1m test \t\033[0;0m evaluate a Tree against the test data'
print ''
print '\t\033[36m\033[1m cont \t\033[0;0m continue evolution, starting with the current population'
print '\t\033[36m\033[1m load \t\033[0;0m load population_s to replace population_a'
print '\t\033[36m\033[1m load \t\033[0;0m load population_s (seed) to replace population_a (current)'
print '\t\033[36m\033[1m w \t\033[0;0m write the evolving population_b to disk'
print '\t\033[36m\033[1m q \t\033[0;0m quit Karoo GP without saving population_b'
print ''
@ -734,9 +735,11 @@ class Base_GP(object):
query = raw_input('\n\t Select a Tree in population_b to evaluate for Precision & Recall: ')
if query not in str(menu) or query == '0': raise ValueError()
elif query == '': break
if self.kernel == 'a': self.fx_test_abs(int(query)); break
elif self.kernel == 'm': self.fx_test_match(int(query)); break
if self.kernel == 'b': self.fx_test_boolean(int(query)); break
elif self.kernel == 'c': self.fx_test_classify(int(query)); break
elif self.kernel == 'r': self.fx_test_regress(int(query)); break
elif self.kernel == 'm': self.fx_test_match(int(query)); break
# elif self.kernel == '[other]': self.fx_test_[other](int(query)); break
except ValueError: print '\n\t\033[32m Enter a number from 1 including', str(len(self.population_b) - 1) + ".", 'Try again ...\033[0;0m'
else: print '\n\t\033[32m Karoo GP does not enable evaluation of the foundation population. Be patient ...\033[0;0m'
@ -1400,15 +1403,7 @@ class Base_GP(object):
### PART 3 - COMPARE TREE FITNESS FOR DISPLAY ###
if self.kernel == 'a': # display best fit Trees for the ABSOLUTE DIFFERENCE kernel
if fitness_best == 0: # first time through
fitness_best = fitness
if fitness <= fitness_best: # find the Tree with Minimum fitness score
fitness_best = fitness # set best fitness score
self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary
elif self.kernel == 'b': # display best fit Trees for the BOOLEAN kernel
if self.kernel == 'b': # display best fit Trees for the BOOLEAN kernel
if fitness == self.data_train_rows: # find the Tree with a perfect match for all data rows
fitness_best = fitness # set best fitness score
self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary
@ -1418,12 +1413,20 @@ class Base_GP(object):
fitness_best = fitness # set best fitness score
self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary
elif self.kernel == 'r': # display best fit Trees for the REGRESSION kernel
if fitness_best == 0: # first time through
fitness_best = fitness
if fitness <= fitness_best: # find the Tree with Minimum fitness score
fitness_best = fitness # set best fitness score
self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary
elif self.kernel == 'm': # display best fit Trees for the MATCH kernel
if fitness == self.data_train_rows: # find the Tree with a perfect match for all data rows
fitness_best = fitness # set best fitness score
self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary
# elif self.kernel == '[other]': # display best fit Trees for the [OTHER] kernel
# elif self.kernel == '[other]': # display best fit Trees for the [other] kernel
# if fitness >= fitness_best: # find the Tree with [Maximum or Minimum] fitness score
# fitness_best = fitness # set best fitness score
# self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary
@ -1471,15 +1474,15 @@ class Base_GP(object):
# print 'result', result, 'solution', solution
# self.fx_karoo_pause(0)
if self.kernel == 'a': # ABSOLUTE DIFFERENCE kernel
fitness = self.fx_fitness_function_abs_diff(row, result, solution)
elif self.kernel == 'b': # BOOLEAN kernel
if self.kernel == 'b': # BOOLEAN kernel
fitness = self.fx_fitness_function_bool(row, result, solution)
elif self.kernel == 'c': # CLASSIFY kernel
fitness = self.fx_fitness_function_classify(row, result, solution)
elif self.kernel == 'r': # REGRESSION kernel
fitness = self.fx_fitness_function_regress(row, result, solution)
elif self.kernel == 'm': # MATCH kernel
fitness = self.fx_fitness_function_match(row, result, solution)
@ -1489,10 +1492,10 @@ class Base_GP(object):
return fitness
def fx_fitness_function_abs_diff(self, row, result, solution): # the ABSOLUTE DIFFERENCE kernel
def fx_fitness_function_regress(self, row, result, solution):
'''
A Symbolic Regression kernel used within the 'fitness_eval' function.
A symbolic regression kernel used within the 'fitness_eval' function.
This is a minimisation function which seeks a result which is closest to the solution.
@ -1511,7 +1514,7 @@ class Base_GP(object):
def fx_fitness_function_bool(self, row, result, solution):
'''
A Boolean kernel used within the 'fitness_eval' function.
A boolean kernel used within the 'fitness_eval' function.
This is a maximization function which seeks an exact solution (a perfect match).
@ -1578,7 +1581,7 @@ class Base_GP(object):
def fx_fitness_function_match(self, row, result, solution):
'''
A Symbolic Regression kernel used within the 'fitness_eval' function.
A Matching kernel used within the 'fitness_eval' function.
This is a maximization function which seeks an exact solution (a perfect match).
@ -1901,62 +1904,63 @@ class Base_GP(object):
return tree
def fx_evo_crossover(self, parent_x, branch_x, parent_y, branch_y):
def fx_evo_crossover(self, parent, branch_x, offspring, branch_y):
'''
Refer to the method 'fx_karoo_crossover' for a full description of the genetic operator Crossover.
This method may be called twice to produce a second children per pair of parent Trees. However, 'parent_a'
will be passed to 'parent_x' and 'parent_b' to 'parent_y' for the first child, and then 'parent_b' to
'parent_x' and 'parent_a' to 'parent_y' (and their branches) for the second child accordingly.
This method is called twice to produce 2 offspring per pair of parent Trees. Note that in the method
'karoo_fx_crossover' the parent/branch relationships are swapped from the first run to the second, such that
this method receives swapped components to produce the alternative offspring. Therefore 'parent_b' is first
passed to 'offspring' which will receive 'branch_a'. With the second run, 'parent_a' is passed to 'offspring' which
will receive 'branch_b'.
Arguments required: parent_x, branch_x, parent_y, branch_y
Arguments required: parent, branch_x, offspring, branch_y (parents_a / _b, branch_a / _b from 'fx_karoo_crossover')
'''
crossover = int(branch_x[0]) # a pointer to the top of the branch in 'parent_x'
branch_top = int(branch_y[0]) # a pointer to the top of the branch in 'parent_y'
if len(branch_x) == 1: # if the branch from 'parent_x' contains only one node (terminal)
crossover = int(branch_x[0]) # pointer to the top of the 1st parent branch passed from 'fx_karoo_crossover'
branch_top = int(branch_y[0]) # pointer to the top of the 2nd parent branch passed from 'fx_karoo_crossover'
if self.display == 'i': print '\t\033[36m terminal crossover from \033[1mparent', parent_x[0][1], '\033[0;0m\033[36mto \033[1mparent', parent_y[0][1], '\033[0;0m\033[36mat node\033[1m', branch_top, '\033[0;0m'
if len(branch_x) == 1: # if the branch from the parent contains only one node (terminal)
if self.display == 'i': print '\t\033[36m terminal crossover from \033[1mparent', parent[0][1], '\033[0;0m\033[36mto \033[1moffspring', offspring[0][1], '\033[0;0m\033[36mat node\033[1m', branch_top, '\033[0;0m'
if self.display == 'db':
print '\n In a copy of parent_y:\n', parent_y
print '\n ... we remove nodes', branch_y, 'and replace node', branch_top, 'with a terminal from branch_x'; self.fx_karoo_pause(0)
print '\n\033[36m In a copy of one parent:\033[0;0m\n', offspring
print '\n\033[36m ... we remove nodes\033[1m', branch_y, '\033[0;0m\033[36mand replace node\033[1m', branch_top, '\033[0;0m\033[36mwith a terminal from branch_x\033[0;0m'; self.fx_karoo_pause(0)
parent_y[5][branch_top] = 'term' # replace type
parent_y[6][branch_top] = parent_x[6][crossover] # replace label with that of a particular node in branch_x
parent_y[8][branch_top] = 0 # set terminal arity
offspring[5][branch_top] = 'term' # replace type
offspring[6][branch_top] = parent[6][crossover] # replace label with that of a particular node in 'branch_x'
offspring[8][branch_top] = 0 # set terminal arity
parent_y = np.delete(parent_y, branch_y[1:], axis = 1) # delete all nodes beneath point of mutation ('branch_top')
parent_y = self.fx_evo_child_link_fix(parent_y) # fix all child links
parent_y = self.fx_evo_node_renum(parent_y) # renumber all 'NODE_ID's
offspring = np.delete(offspring, branch_y[1:], axis = 1) # delete all nodes beneath point of mutation ('branch_top')
offspring = self.fx_evo_child_link_fix(offspring) # fix all child links
offspring = self.fx_evo_node_renum(offspring) # renumber all 'NODE_ID's
if self.display == 'db': print 'This is the resulting offspring:\n', parent_y; self.fx_karoo_pause(0)
if self.display == 'db': print '\n\033[36m This is the resulting offspring:\033[0;0m\n', offspring; self.fx_karoo_pause(0)
else: # we are working with a branch from 'parent_x' >= depth 1 (min 3 nodes)
else: # we are working with a branch from 'parent' >= depth 1 (min 3 nodes)
if self.display == 'i': print '\t\033[36m branch crossover from \033[1mparent', parent_x[0][1], '\033[0;0m\033[36mto \033[1mparent', parent_y[0][1], '\033[0;0m\033[36mat node\033[1m', branch_top, '\033[0;0m'
if self.display == 'i': print '\t\033[36m branch crossover from \033[1mparent', parent[0][1], '\033[0;0m\033[36mto \033[1moffspring', offspring[0][1], '\033[0;0m\033[36mat node\033[1m', branch_top, '\033[0;0m'
# self.fx_gen_tree_build('test', 'f', 2) # TEST AND DEBUG: disable the next 'self.tree ...' line
self.tree = self.fx_evo_branch_copy(parent_x, branch_x) # generate stand-alone 'gp.tree' with properties of 'branch_x'
self.tree = self.fx_evo_branch_copy(parent, branch_x) # generate stand-alone 'gp.tree' with properties of 'branch_x'
if self.display == 'db':
print '\n From parent_x:\n', parent_x
print '\n ... we copy branch_x', branch_x, 'as a new tree:\n', self.tree; self.fx_karoo_pause(0)
print '\n\033[36m From one parent:\033[0;0m\n', parent
print '\n\033[36m ... we copy branch_x\033[1m', branch_x, '\033[0;0m\033[36mas a new, sub-tree:\033[0;0m\n', self.tree; self.fx_karoo_pause(0)
if self.display == 'db':
print ' ... and insert it into a copy of parent_y in place of branch', branch_y,':\n', parent_y; self.fx_karoo_pause(0)
print '\n\033[36m ... and insert it into a copy of the second parent in place of the selected branch\033[1m', branch_y,':\033[0;0m\n', offspring; self.fx_karoo_pause(0)
parent_y = self.fx_evo_branch_top_copy(parent_y, branch_y) # copy root of 'branch_y' ('gp.tree') to 'parent_y'
parent_y = self.fx_evo_branch_body_copy(parent_y) # copy remaining nodes in 'branch_y' ('gp.tree') to 'parent_y'
# parent_y = self.fx_evo_tree_prune(parent_y, int(parent_y[2][1]) + self.tree_depth_max) # prune to the initial max Tree depth + adjustment - tested 2016 07/09
parent_y = self.fx_evo_tree_prune(parent_y, self.tree_depth_max) # prune to the max Tree depth + adjustment - tested 2016 07/10
offspring = self.fx_evo_branch_top_copy(offspring, branch_y) # copy root of 'branch_y' ('gp.tree') to 'offspring'
offspring = self.fx_evo_branch_body_copy(offspring) # copy remaining nodes in 'branch_y' ('gp.tree') to 'offspring'
offspring = self.fx_evo_tree_prune(offspring, self.tree_depth_max) # prune to the max Tree depth + adjustment - tested 2016 07/10
parent_y = self.fx_evo_fitness_wipe(parent_y) # wipe fitness data
offspring = self.fx_evo_fitness_wipe(offspring) # wipe fitness data and return 'offspring'
return parent_y
return offspring
def fx_evo_branch_select(self, tree):
@ -2052,7 +2056,7 @@ class Base_GP(object):
if self.display == 'db':
print '\n\t ... inserted node', node_count, 'of', len(self.tree[3])-1
print '\n This is the Tree after a new node is inserted:\n', tree; self.fx_karoo_pause(0)
print '\n\033[36m This is the Tree after a new node is inserted:\033[0;0m\n', tree; self.fx_karoo_pause(0)
node_count = node_count + 1 # exit loop when 'node_count' reaches the number of columns in the array 'gp.tree'
@ -2403,80 +2407,16 @@ class Base_GP(object):
#++++++++++++++++++++++++++++++++++++++++++
# Methods to Test a Tree |
#++++++++++++++++++++++++++++++++++++++++++
# Methods to Validate a Tree |
#++++++++++++++++++++++++++++++++++++++++++
def fx_test_abs(self, tree_id):
def fx_test_boolean(self, tree_id):
'''
A validation of an absolute value fitness function.
# [need to build]
Arguments required: tree_id
Arguments required: tree
'''
# switched from population_a to _b 2016 07/09
self.fx_eval_poly(self.population_b[tree_id]) # generate the raw and sympified equation for the given Tree
print '\n\t\033[36mTree', tree_id, 'yields (raw):', self.algo_raw, '\033[0;0m'
print '\t\033[36mTree', tree_id, 'yields (sym):\033[1m', self.algo_sym, '\033[0;0m\n'
for row in range(0, self.data_test_rows): # test against data_test_dict
data_test_dict = self.data_test_dict_array[row] # re-assign (unpack) a temp dictionary to each row of data
if str(self.algo_sym.subs(data_test_dict)) == 'zoo': # divide by zero demands we avoid use of the 'float' function
result = self.algo_sym.subs(data_test_dict) # print 'divide by zero', result; self.fx_karoo_pause(0)
else:
result = float(self.algo_sym.subs(data_test_dict)) # process the polynomial to produce the result
result = round(result, self.precision) # force 'result' and 'solution' to the same number of floating points
solution = float(data_test_dict['s']) # extract the desired solution from the data
solution = round(solution, self.precision) # force 'result' and 'solution' to the same number of floating points
# fitness = abs(result - solution) # this is a Minimisation function (seeking smallest fitness)
print '\t\033[36m data row', row, 'yields:', result, '\033[0;0m'
# measure the total or average difference between result and solution across all rows ???
print '\n\t (this test is not yet complete)'
return
def fx_test_match(self, tree_id):
'''
A validation of a matching fitness function.
Arguments required: tree_id
'''
# switched from population_a to _b 2016 07/09
self.fx_eval_poly(self.population_b[tree_id]) # generate the raw and sympified equation for the given Tree
print '\n\t\033[36mTree', tree_id, 'yields (raw):', self.algo_raw, '\033[0;0m'
print '\t\033[36mTree', tree_id, 'yields (sym):\033[1m', self.algo_sym, '\033[0;0m\n'
for row in range(0, self.data_test_rows): # test against data_test_dict
data_test_dict = self.data_test_dict_array[row] # re-assign (unpack) a temp dictionary to each row of data
if str(self.algo_sym.subs(data_test_dict)) == 'zoo': # divide by zero demands we avoid use of the 'float' function
result = self.algo_sym.subs(data_test_dict) # print 'divide by zero', result; self.fx_karoo_pause(0)
else:
result = float(self.algo_sym.subs(data_test_dict)) # process the polynomial to produce the result
result = round(result, self.precision) # force 'result' and 'solution' to the same number of floating points
solution = float(data_test_dict['s']) # extract the desired solution from the data
solution = round(solution, self.precision) # force 'result' and 'solution' to the same number of floating points
if result == solution:
fitness = 1 # improve the fitness score by 1
print '\t\033[36m data row', row, '\033[0;0m\033[36myields:\033[1m', result, '\033[0;0m'
else:
fitness = 0 # do not adjust the fitness score
print '\t\033[36m data row', row, 'yields:', result, '\033[0;0m'
print '\n\t Tree', tree_id, 'has an accuracy of:', float(self.population_b[tree_id][12][1]) / self.data_test_dict_array.shape[0] * 100
return
@ -2553,19 +2493,83 @@ class Base_GP(object):
print skm.confusion_matrix(y_true, y_pred)
return
def fx_test_plot(self, tree):
'''
# [need to build]
Arguments required: tree
def fx_test_regress(self, tree_id):
'''
A validation of a regression fitness function.
Arguments required: tree_id
'''
# switched from population_a to _b 2016 07/09
self.fx_eval_poly(self.population_b[tree_id]) # generate the raw and sympified equation for the given Tree
print '\n\t\033[36mTree', tree_id, 'yields (raw):', self.algo_raw, '\033[0;0m'
print '\t\033[36mTree', tree_id, 'yields (sym):\033[1m', self.algo_sym, '\033[0;0m\n'
for row in range(0, self.data_test_rows): # test against data_test_dict
data_test_dict = self.data_test_dict_array[row] # re-assign (unpack) a temp dictionary to each row of data
if str(self.algo_sym.subs(data_test_dict)) == 'zoo': # divide by zero demands we avoid use of the 'float' function
result = self.algo_sym.subs(data_test_dict) # print 'divide by zero', result; self.fx_karoo_pause(0)
else:
result = float(self.algo_sym.subs(data_test_dict)) # process the polynomial to produce the result
result = round(result, self.precision) # force 'result' and 'solution' to the same number of floating points
solution = float(data_test_dict['s']) # extract the desired solution from the data
solution = round(solution, self.precision) # force 'result' and 'solution' to the same number of floating points
# fitness = abs(result - solution) # this is a Minimisation function (seeking smallest fitness)
print '\t\033[36m data row', row, 'yields:', result, '\033[0;0m'
# measure the total or average difference between result and solution across all rows ???
print '\n\t (this test is not yet complete)'
return
def fx_test_match(self, tree_id):
'''
A validation of a matching fitness function.
Arguments required: tree_id
'''
# switched from population_a to _b 2016 07/09
self.fx_eval_poly(self.population_b[tree_id]) # generate the raw and sympified equation for the given Tree
print '\n\t\033[36mTree', tree_id, 'yields (raw):', self.algo_raw, '\033[0;0m'
print '\t\033[36mTree', tree_id, 'yields (sym):\033[1m', self.algo_sym, '\033[0;0m\n'
for row in range(0, self.data_test_rows): # test against data_test_dict
data_test_dict = self.data_test_dict_array[row] # re-assign (unpack) a temp dictionary to each row of data
if str(self.algo_sym.subs(data_test_dict)) == 'zoo': # divide by zero demands we avoid use of the 'float' function
result = self.algo_sym.subs(data_test_dict) # print 'divide by zero', result; self.fx_karoo_pause(0)
else:
result = float(self.algo_sym.subs(data_test_dict)) # process the polynomial to produce the result
result = round(result, self.precision) # force 'result' and 'solution' to the same number of floating points
solution = float(data_test_dict['s']) # extract the desired solution from the data
solution = round(solution, self.precision) # force 'result' and 'solution' to the same number of floating points
if result == solution:
fitness = 1 # improve the fitness score by 1
print '\t\033[36m data row', row, '\033[0;0m\033[36myields:\033[1m', result, '\033[0;0m'
else:
fitness = 0 # do not adjust the fitness score
print '\t\033[36m data row', row, 'yields:', result, '\033[0;0m'
print '\n\t Tree', tree_id, 'has an accuracy of:', float(self.population_b[tree_id][12][1]) / self.data_test_dict_array.shape[0] * 100
return
#++++++++++++++++++++++++++++++++++++++++++
# Methods to Append & Archive |
#++++++++++++++++++++++++++++++++++++++++++

View File

@ -2,7 +2,7 @@
# Use Genetic Programming for Classification and Symbolic Regression
# by Kai Staats, MSc UCT / AIMS
# Much thanks to Emmanuel Dufourq and Arun Kumar for their support, guidance, and free psychotherapy sessions
# version 0.9.1.4
# version 0.9.1.5
'''
A NOTE TO THE NEWBIE, EXPERT, AND BRAVE
@ -30,10 +30,10 @@ gp.karoo_banner('main')
print ''
menu = ['a','b','c','m','p','']
menu = ['r','b','c','m','p','']
while True:
try:
gp.kernel = raw_input('\t Select (a)bs diff, (c)lassify, (m)atch, or (p)lay (default m): ')
gp.kernel = raw_input('\t Select (r)egression, (c)lassification, (m)atching, or (p)lay (default m): ')
if gp.kernel not in menu: raise ValueError()
gp.kernel = gp.kernel or 'm'; break
except ValueError: print '\t\033[32m Select from the options given. Try again ...\n\033[0;0m'
@ -48,17 +48,7 @@ if gp.kernel == 'c': # if the Classification kernel is selected (above)
if gp.class_labels not in str(menu) or gp.class_labels == '0': raise ValueError()
gp.class_labels = gp.class_labels or 3; gp.class_labels = int(gp.class_labels); break
except ValueError: print '\t\033[32m Select from the options given. Try again ...\n\033[0;0m'
except KeyboardInterrupt: sys.exit()
# menu = ['f','i','']
# while True:
# try:
# gp.class_type = raw_input('\t Select (f)inite or (i)finite classification (default i): ')
# if gp.class_type not in menu: raise ValueError()
# gp.class_type = gp.class_type or 'i'; break
# except ValueError: print '\t\033[32m Select from the options given. Try again ...\n\033[0;0m'
# except KeyboardInterrupt: sys.exit()
except KeyboardInterrupt: sys.exit()
menu = ['f','g','r','']
while True:
@ -112,7 +102,7 @@ else: # if any other kernel is selected
menu = range(10,1001)
while True:
try:
gp.tree_pop_max = raw_input('\t Enter number of Trees in each Generation (default 100): ')
gp.tree_pop_max = raw_input('\t Enter number of Trees in each population (default 100): ')
if gp.tree_pop_max not in str(menu) or gp.tree_pop_max == '0': raise ValueError()
gp.tree_pop_max = gp.tree_pop_max or 100; gp.tree_pop_max = int(gp.tree_pop_max); break
except ValueError: print '\t\033[32m Enter a number from 10 including 1000. Try again ...\n\033[0;0m'
@ -121,7 +111,7 @@ else: # if any other kernel is selected
menu = range(1,101)
while True:
try:
gp.generation_max = raw_input('\t Enter max number of Generations (default 10): ')
gp.generation_max = raw_input('\t Enter max number of generations (default 10): ')
if gp.generation_max not in str(menu) or gp.generation_max == '0': raise ValueError()
gp.generation_max = gp.generation_max or 10; gp.generation_max = int(gp.generation_max); break
except ValueError: print '\t\033[32m Enter a number from 1 including 100. Try again ...\n\033[0;0m'
@ -138,10 +128,10 @@ else: # if any other kernel is selected
# define the ratio between types of mutation, where all sum to 1.0; can be adjusted in 'i'nteractive mode
gp.evolve_repro = int(0.1 * gp.tree_pop_max) # percentage of subsequent population to be generated through Reproduction
gp.evolve_point = int(0.1 * gp.tree_pop_max) # percentage of subsequent population to be generated through Point Mutation
gp.evolve_branch = int(0.1 * gp.tree_pop_max) # percentage of subsequent population to be generated through Branch Mutation
gp.evolve_cross = int(0.7 * gp.tree_pop_max) # percentage of subsequent population to be generated through Crossover Reproduction
gp.evolve_repro = int(0.0 * gp.tree_pop_max) # percentage of subsequent population to be generated through Reproduction
gp.evolve_point = int(0.0 * gp.tree_pop_max) # percentage of subsequent population to be generated through Point Mutation
gp.evolve_branch = int(0.0 * gp.tree_pop_max) # percentage of subsequent population to be generated through Branch Mutation
gp.evolve_cross = int(1.0 * gp.tree_pop_max) # percentage of subsequent population to be generated through Crossover Reproduction
gp.tourn_size = 10 # qty of individuals entered into each tournament (standard 10); can be adjusted in 'i'nteractive mode
gp.cores = 1 # replace '1' with 'int(gp.core_count)' to auto-set to max; can be adjusted in 'i'nteractive mode

View File

@ -2,7 +2,7 @@
# Use Genetic Programming for Classification and Symbolic Regression
# by Kai Staats, MSc UCT / AIMS
# Much thanks to Emmanuel Dufourq and Arun Kumar for their support, guidance, and free psychotherapy sessions
# version 0.9.1.4
# version 0.9.1.5
'''
A NOTE TO THE NEWBIE, EXPERT, AND BRAVE
@ -15,15 +15,15 @@ import sys # sys.path.append('modules/') # add the directory 'modules' to the cu
import karoo_gp_base_class; gp = karoo_gp_base_class.Base_GP()
# parameters configuration
gp.kernel = 'c' # ['a','c','m'] fitness function: ABS Value, Classification, or Matching
gp.class_labels = 3 # number of class labels in the feature set
tree_type = 'r' # ['f','g','r'] Tree type: full, grow, or ramped half/half
gp.kernel = 'c' # ['r','c','m'] fitness function: (r)egression, (c)lassification, or (m)atching
gp.class_labels = 3 # [2,3, ...] number of class labels in the feature set
tree_type = 'r' # ['f','g','r'] Tree (t)ype: (f)ull, (g)row, or (r)amped half/half
tree_depth_base = 3 # [3,10] maximum Tree depth for the initial population, where nodes = 2^(depth + 1) - 1
gp.tree_depth_max = 3 # [3,10] maximum Tree depth for the entire run; introduces potential bloat
gp.tree_depth_min = 3 # [3,100] minimum number of nodes
gp.tree_pop_max = 100 # [10,1000] maximum population
gp.generation_max = 10 # [1,1000] number of generations
gp.display = 'm' # ['i','m','g','s','db','t'] display mode: Interactive, Minimal, Generational, Server, Debug, or Timer
gp.generation_max = 10 # [1,100] number of generations
gp.display = 'm' # ['i','m','g','s'] display mode: (i)nteractive, (m)inimal, (g)enerational, or (s)erver
gp.evolve_repro = int(0.1 * gp.tree_pop_max) # percentage of subsequent population to be generated through Reproduction
gp.evolve_point = int(0.1 * gp.tree_pop_max) # percentage of subsequent population to be generated through Point Mutation