diff --git a/karoo_gp_base_class.py b/karoo_gp_base_class.py index c415b5f..c2f0af2 100644 --- a/karoo_gp_base_class.py +++ b/karoo_gp_base_class.py @@ -2,7 +2,7 @@ # Define the methods and global variables used by Karoo GP # by Kai Staats, MSc UCT / AIMS # Much thanks to Emmanuel Dufourq and Arun Kumar for their support, guidance, and free psychotherapy sessions -# version 0.9.1.4 +# version 0.9.1.5 ''' A NOTE TO THE NEWBIE, EXPERT, AND BRAVE @@ -16,6 +16,7 @@ import os import sys import time +import argparse import numpy as np import pprocess as pp import sklearn.metrics as skm @@ -40,7 +41,7 @@ class Base_GP(object): 'fx_eval_' Methods to Evaluate a Tree 'fx_fitness_' Methods to Evaluate Tree Fitness 'fx_evo_' Methods to Evolve a Population - 'fx_test_' Methods to Test a Tree + 'fx_test_' Methods to Validate a Tree 'fx_tree_' Methods to Append & Archive There are no sub-classes at the time of this edit - 2015 09/21 @@ -216,9 +217,9 @@ class Base_GP(object): ''' ### 1) load the data file associated with the user selected fitness kernel ### - data_dict = {'a':'files/data_ABS.csv', 'b':'files/data_BOOL.csv', 'c':'files/data_CLASSIFY.csv', 'm':'files/data_MATCH.csv', 'p':'files/data_PLAY.csv'} - func_dict = {'a':'files/functions_ABS.csv', 'b':'files/functions_BOOL.csv', 'c':'files/functions_CLASSIFY.csv', 'm':'files/functions_MATCH.csv', 'p':'files/functions_PLAY.csv'} - fitt_dict = {'a':'min', 'b':'max', 'c':'max', 'm':'max', 'p':''} + data_dict = {'b':'files/data_BOOL.csv', 'c':'files/data_CLASSIFY.csv', 'r':'files/data_REGRESS.csv', 'm':'files/data_MATCH.csv', 'p':'files/data_PLAY.csv'} + func_dict = {'b':'files/functions_BOOL.csv', 'c':'files/functions_CLASSIFY.csv', 'r':'files/functions_REGRESS.csv', 'm':'files/functions_MATCH.csv', 'p':'files/functions_PLAY.csv'} + fitt_dict = {'b':'max', 'c':'max', 'r':'min', 'm':'max', 'p':''} if len(sys.argv) == 1: # load data in the karoo_gp/files/ directory data_x = np.loadtxt(data_dict[self.kernel], skiprows = 1, delimiter = ',', dtype = float); data_x = data_x[:,0:-1] # load all but the right-most column @@ -470,16 +471,16 @@ class Base_GP(object): def fx_karoo_crossover(self): ''' - Through tournament selection, 2 trees are selected as parents to produce 2 offspring. Within each parent + Through tournament selection, two trees are selected as parents to produce two offspring. Within each parent Tree a branch is selected. Parent A is copied, with its selected branch deleted. Parent B's branch is then copied to the former location of Parent A's branch and inserted (grafted). The size and shape of the child Tree may be smaller or larger than either of the parents, but may not exceed 'tree_depth_max' as defined by the user. - This process combines genetic code from 2 trees, both of which were chosen by the tournament process as having - a higher fitness than the average population. Therefore, there is a chance their offspring will provide an - improvement in total fitness. In most GP applications, Crossover is the most commonly applied evolutionary - operator (~70-80%). + This process combines genetic code from two parent Trees, both of which were chosen by the tournament process + as having a higher fitness than the average population. Therefore, there is a chance their offspring will + provide an improvement in total fitness. In most GP applications, Crossover is the most commonly applied + evolutionary operator (~70-80%). For those who like to watch, select 'db' (debug mode) at the launch of Karoo GP or at any (pause). @@ -499,14 +500,14 @@ class Base_GP(object): parent_b = self.fx_fitness_tournament(self.tourn_size) # perform tournament selection for 'parent_b' branch_b = self.fx_evo_branch_select(parent_b) # select branch within 'parent_b', to copy to 'parent_a' and receive a branch from 'parent_a' - parent_c = np.copy(parent_a); branch_c = np.copy(branch_a) # else the Crossover Trees leak back to the originals (not sure why) - parent_d = np.copy(parent_b); branch_d = np.copy(branch_b) # else the Crossover Trees leak back to the originals (not sure why) + parent_c = np.copy(parent_a); branch_c = np.copy(branch_a) # else the Crossover mods affect the parent Trees, due to how Python manages '=' + parent_d = np.copy(parent_b); branch_d = np.copy(branch_b) # else the Crossover mods affect the parent Trees, due to how Python manages '=' - child_1 = self.fx_evo_crossover(parent_a, branch_a, parent_b, branch_b) # perform Crossover - self.population_b.append(child_1) # append the 1st child to next generation of Trees + offspring_1 = self.fx_evo_crossover(parent_a, branch_a, parent_b, branch_b) # perform Crossover + self.population_b.append(offspring_1) # append the 1st child to next generation of Trees - child_2 = self.fx_evo_crossover(parent_d, branch_d, parent_c, branch_c) # perform Crossover - self.population_b.append(child_2) # append the 2nd child to next generation of Trees + offspring_2 = self.fx_evo_crossover(parent_d, branch_d, parent_c, branch_c) # perform Crossover + self.population_b.append(offspring_2) # append the 2nd child to next generation of Trees return @@ -543,18 +544,18 @@ class Base_GP(object): print '\t\033[36m\033[1m ts \t\033[0;0m adjust the tournament size' print '\t\033[36m\033[1m min \t\033[0;0m adjust the minimum number of nodes' # print '\t\033[36m\033[1m max \t\033[0;0m adjust the maximum Tree depth' - print '\t\033[36m\033[1m b \t\033[0;0m adjust the balance of genetic operators (sum to 100%)' + print '\t\033[36m\033[1m b \t\033[0;0m adjust the balance of genetic operators' print '\t\033[36m\033[1m c \t\033[0;0m adjust the number of engaged CPU cores' print '' print '\t\033[36m\033[1m id \t\033[0;0m display the generation ID' - print '\t\033[36m\033[1m pop \t\033[0;0m list all Trees in designated population' + print '\t\033[36m\033[1m pop \t\033[0;0m list all Trees in current population' print '\t\033[36m\033[1m l \t\033[0;0m list Trees with leading fitness scores' print '\t\033[36m\033[1m p \t\033[0;0m print a Tree to screen' print '' - print '\t\033[36m\033[1m test \t\033[0;0m evaluate a Tree for Precision & Recall' + print '\t\033[36m\033[1m test \t\033[0;0m evaluate a Tree against the test data' print '' print '\t\033[36m\033[1m cont \t\033[0;0m continue evolution, starting with the current population' - print '\t\033[36m\033[1m load \t\033[0;0m load population_s to replace population_a' + print '\t\033[36m\033[1m load \t\033[0;0m load population_s (seed) to replace population_a (current)' print '\t\033[36m\033[1m w \t\033[0;0m write the evolving population_b to disk' print '\t\033[36m\033[1m q \t\033[0;0m quit Karoo GP without saving population_b' print '' @@ -734,9 +735,11 @@ class Base_GP(object): query = raw_input('\n\t Select a Tree in population_b to evaluate for Precision & Recall: ') if query not in str(menu) or query == '0': raise ValueError() elif query == '': break - if self.kernel == 'a': self.fx_test_abs(int(query)); break - elif self.kernel == 'm': self.fx_test_match(int(query)); break + if self.kernel == 'b': self.fx_test_boolean(int(query)); break elif self.kernel == 'c': self.fx_test_classify(int(query)); break + elif self.kernel == 'r': self.fx_test_regress(int(query)); break + elif self.kernel == 'm': self.fx_test_match(int(query)); break + # elif self.kernel == '[other]': self.fx_test_[other](int(query)); break except ValueError: print '\n\t\033[32m Enter a number from 1 including', str(len(self.population_b) - 1) + ".", 'Try again ...\033[0;0m' else: print '\n\t\033[32m Karoo GP does not enable evaluation of the foundation population. Be patient ...\033[0;0m' @@ -1400,15 +1403,7 @@ class Base_GP(object): ### PART 3 - COMPARE TREE FITNESS FOR DISPLAY ### - if self.kernel == 'a': # display best fit Trees for the ABSOLUTE DIFFERENCE kernel - if fitness_best == 0: # first time through - fitness_best = fitness - - if fitness <= fitness_best: # find the Tree with Minimum fitness score - fitness_best = fitness # set best fitness score - self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary - - elif self.kernel == 'b': # display best fit Trees for the BOOLEAN kernel + if self.kernel == 'b': # display best fit Trees for the BOOLEAN kernel if fitness == self.data_train_rows: # find the Tree with a perfect match for all data rows fitness_best = fitness # set best fitness score self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary @@ -1418,12 +1413,20 @@ class Base_GP(object): fitness_best = fitness # set best fitness score self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary + elif self.kernel == 'r': # display best fit Trees for the REGRESSION kernel + if fitness_best == 0: # first time through + fitness_best = fitness + + if fitness <= fitness_best: # find the Tree with Minimum fitness score + fitness_best = fitness # set best fitness score + self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary + elif self.kernel == 'm': # display best fit Trees for the MATCH kernel if fitness == self.data_train_rows: # find the Tree with a perfect match for all data rows fitness_best = fitness # set best fitness score self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary - # elif self.kernel == '[other]': # display best fit Trees for the [OTHER] kernel + # elif self.kernel == '[other]': # display best fit Trees for the [other] kernel # if fitness >= fitness_best: # find the Tree with [Maximum or Minimum] fitness score # fitness_best = fitness # set best fitness score # self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary @@ -1471,15 +1474,15 @@ class Base_GP(object): # print 'result', result, 'solution', solution # self.fx_karoo_pause(0) - if self.kernel == 'a': # ABSOLUTE DIFFERENCE kernel - fitness = self.fx_fitness_function_abs_diff(row, result, solution) - - elif self.kernel == 'b': # BOOLEAN kernel + if self.kernel == 'b': # BOOLEAN kernel fitness = self.fx_fitness_function_bool(row, result, solution) elif self.kernel == 'c': # CLASSIFY kernel fitness = self.fx_fitness_function_classify(row, result, solution) + elif self.kernel == 'r': # REGRESSION kernel + fitness = self.fx_fitness_function_regress(row, result, solution) + elif self.kernel == 'm': # MATCH kernel fitness = self.fx_fitness_function_match(row, result, solution) @@ -1489,10 +1492,10 @@ class Base_GP(object): return fitness - def fx_fitness_function_abs_diff(self, row, result, solution): # the ABSOLUTE DIFFERENCE kernel + def fx_fitness_function_regress(self, row, result, solution): ''' - A Symbolic Regression kernel used within the 'fitness_eval' function. + A symbolic regression kernel used within the 'fitness_eval' function. This is a minimisation function which seeks a result which is closest to the solution. @@ -1511,7 +1514,7 @@ class Base_GP(object): def fx_fitness_function_bool(self, row, result, solution): ''' - A Boolean kernel used within the 'fitness_eval' function. + A boolean kernel used within the 'fitness_eval' function. This is a maximization function which seeks an exact solution (a perfect match). @@ -1578,7 +1581,7 @@ class Base_GP(object): def fx_fitness_function_match(self, row, result, solution): ''' - A Symbolic Regression kernel used within the 'fitness_eval' function. + A Matching kernel used within the 'fitness_eval' function. This is a maximization function which seeks an exact solution (a perfect match). @@ -1901,62 +1904,63 @@ class Base_GP(object): return tree - def fx_evo_crossover(self, parent_x, branch_x, parent_y, branch_y): + def fx_evo_crossover(self, parent, branch_x, offspring, branch_y): ''' Refer to the method 'fx_karoo_crossover' for a full description of the genetic operator Crossover. - This method may be called twice to produce a second children per pair of parent Trees. However, 'parent_a' - will be passed to 'parent_x' and 'parent_b' to 'parent_y' for the first child, and then 'parent_b' to - 'parent_x' and 'parent_a' to 'parent_y' (and their branches) for the second child accordingly. + This method is called twice to produce 2 offspring per pair of parent Trees. Note that in the method + 'karoo_fx_crossover' the parent/branch relationships are swapped from the first run to the second, such that + this method receives swapped components to produce the alternative offspring. Therefore 'parent_b' is first + passed to 'offspring' which will receive 'branch_a'. With the second run, 'parent_a' is passed to 'offspring' which + will receive 'branch_b'. - Arguments required: parent_x, branch_x, parent_y, branch_y + Arguments required: parent, branch_x, offspring, branch_y (parents_a / _b, branch_a / _b from 'fx_karoo_crossover') ''' - crossover = int(branch_x[0]) # a pointer to the top of the branch in 'parent_x' - branch_top = int(branch_y[0]) # a pointer to the top of the branch in 'parent_y' - - if len(branch_x) == 1: # if the branch from 'parent_x' contains only one node (terminal) + crossover = int(branch_x[0]) # pointer to the top of the 1st parent branch passed from 'fx_karoo_crossover' + branch_top = int(branch_y[0]) # pointer to the top of the 2nd parent branch passed from 'fx_karoo_crossover' - if self.display == 'i': print '\t\033[36m terminal crossover from \033[1mparent', parent_x[0][1], '\033[0;0m\033[36mto \033[1mparent', parent_y[0][1], '\033[0;0m\033[36mat node\033[1m', branch_top, '\033[0;0m' + if len(branch_x) == 1: # if the branch from the parent contains only one node (terminal) + + if self.display == 'i': print '\t\033[36m terminal crossover from \033[1mparent', parent[0][1], '\033[0;0m\033[36mto \033[1moffspring', offspring[0][1], '\033[0;0m\033[36mat node\033[1m', branch_top, '\033[0;0m' if self.display == 'db': - print '\n In a copy of parent_y:\n', parent_y - print '\n ... we remove nodes', branch_y, 'and replace node', branch_top, 'with a terminal from branch_x'; self.fx_karoo_pause(0) + print '\n\033[36m In a copy of one parent:\033[0;0m\n', offspring + print '\n\033[36m ... we remove nodes\033[1m', branch_y, '\033[0;0m\033[36mand replace node\033[1m', branch_top, '\033[0;0m\033[36mwith a terminal from branch_x\033[0;0m'; self.fx_karoo_pause(0) - parent_y[5][branch_top] = 'term' # replace type - parent_y[6][branch_top] = parent_x[6][crossover] # replace label with that of a particular node in branch_x - parent_y[8][branch_top] = 0 # set terminal arity + offspring[5][branch_top] = 'term' # replace type + offspring[6][branch_top] = parent[6][crossover] # replace label with that of a particular node in 'branch_x' + offspring[8][branch_top] = 0 # set terminal arity - parent_y = np.delete(parent_y, branch_y[1:], axis = 1) # delete all nodes beneath point of mutation ('branch_top') - parent_y = self.fx_evo_child_link_fix(parent_y) # fix all child links - parent_y = self.fx_evo_node_renum(parent_y) # renumber all 'NODE_ID's + offspring = np.delete(offspring, branch_y[1:], axis = 1) # delete all nodes beneath point of mutation ('branch_top') + offspring = self.fx_evo_child_link_fix(offspring) # fix all child links + offspring = self.fx_evo_node_renum(offspring) # renumber all 'NODE_ID's - if self.display == 'db': print 'This is the resulting offspring:\n', parent_y; self.fx_karoo_pause(0) + if self.display == 'db': print '\n\033[36m This is the resulting offspring:\033[0;0m\n', offspring; self.fx_karoo_pause(0) - else: # we are working with a branch from 'parent_x' >= depth 1 (min 3 nodes) + else: # we are working with a branch from 'parent' >= depth 1 (min 3 nodes) - if self.display == 'i': print '\t\033[36m branch crossover from \033[1mparent', parent_x[0][1], '\033[0;0m\033[36mto \033[1mparent', parent_y[0][1], '\033[0;0m\033[36mat node\033[1m', branch_top, '\033[0;0m' + if self.display == 'i': print '\t\033[36m branch crossover from \033[1mparent', parent[0][1], '\033[0;0m\033[36mto \033[1moffspring', offspring[0][1], '\033[0;0m\033[36mat node\033[1m', branch_top, '\033[0;0m' # self.fx_gen_tree_build('test', 'f', 2) # TEST AND DEBUG: disable the next 'self.tree ...' line - self.tree = self.fx_evo_branch_copy(parent_x, branch_x) # generate stand-alone 'gp.tree' with properties of 'branch_x' + self.tree = self.fx_evo_branch_copy(parent, branch_x) # generate stand-alone 'gp.tree' with properties of 'branch_x' if self.display == 'db': - print '\n From parent_x:\n', parent_x - print '\n ... we copy branch_x', branch_x, 'as a new tree:\n', self.tree; self.fx_karoo_pause(0) + print '\n\033[36m From one parent:\033[0;0m\n', parent + print '\n\033[36m ... we copy branch_x\033[1m', branch_x, '\033[0;0m\033[36mas a new, sub-tree:\033[0;0m\n', self.tree; self.fx_karoo_pause(0) if self.display == 'db': - print ' ... and insert it into a copy of parent_y in place of branch', branch_y,':\n', parent_y; self.fx_karoo_pause(0) + print '\n\033[36m ... and insert it into a copy of the second parent in place of the selected branch\033[1m', branch_y,':\033[0;0m\n', offspring; self.fx_karoo_pause(0) - parent_y = self.fx_evo_branch_top_copy(parent_y, branch_y) # copy root of 'branch_y' ('gp.tree') to 'parent_y' - parent_y = self.fx_evo_branch_body_copy(parent_y) # copy remaining nodes in 'branch_y' ('gp.tree') to 'parent_y' - # parent_y = self.fx_evo_tree_prune(parent_y, int(parent_y[2][1]) + self.tree_depth_max) # prune to the initial max Tree depth + adjustment - tested 2016 07/09 - parent_y = self.fx_evo_tree_prune(parent_y, self.tree_depth_max) # prune to the max Tree depth + adjustment - tested 2016 07/10 + offspring = self.fx_evo_branch_top_copy(offspring, branch_y) # copy root of 'branch_y' ('gp.tree') to 'offspring' + offspring = self.fx_evo_branch_body_copy(offspring) # copy remaining nodes in 'branch_y' ('gp.tree') to 'offspring' + offspring = self.fx_evo_tree_prune(offspring, self.tree_depth_max) # prune to the max Tree depth + adjustment - tested 2016 07/10 - parent_y = self.fx_evo_fitness_wipe(parent_y) # wipe fitness data + offspring = self.fx_evo_fitness_wipe(offspring) # wipe fitness data and return 'offspring' - return parent_y + return offspring def fx_evo_branch_select(self, tree): @@ -2052,7 +2056,7 @@ class Base_GP(object): if self.display == 'db': print '\n\t ... inserted node', node_count, 'of', len(self.tree[3])-1 - print '\n This is the Tree after a new node is inserted:\n', tree; self.fx_karoo_pause(0) + print '\n\033[36m This is the Tree after a new node is inserted:\033[0;0m\n', tree; self.fx_karoo_pause(0) node_count = node_count + 1 # exit loop when 'node_count' reaches the number of columns in the array 'gp.tree' @@ -2403,80 +2407,16 @@ class Base_GP(object): #++++++++++++++++++++++++++++++++++++++++++ - # Methods to Test a Tree | - #++++++++++++++++++++++++++++++++++++++++++ + # Methods to Validate a Tree | + #++++++++++++++++++++++++++++++++++++++++++ - def fx_test_abs(self, tree_id): + def fx_test_boolean(self, tree_id): ''' - A validation of an absolute value fitness function. + # [need to build] - Arguments required: tree_id + Arguments required: tree ''' - - # switched from population_a to _b 2016 07/09 - self.fx_eval_poly(self.population_b[tree_id]) # generate the raw and sympified equation for the given Tree - print '\n\t\033[36mTree', tree_id, 'yields (raw):', self.algo_raw, '\033[0;0m' - print '\t\033[36mTree', tree_id, 'yields (sym):\033[1m', self.algo_sym, '\033[0;0m\n' - - for row in range(0, self.data_test_rows): # test against data_test_dict - data_test_dict = self.data_test_dict_array[row] # re-assign (unpack) a temp dictionary to each row of data - - if str(self.algo_sym.subs(data_test_dict)) == 'zoo': # divide by zero demands we avoid use of the 'float' function - result = self.algo_sym.subs(data_test_dict) # print 'divide by zero', result; self.fx_karoo_pause(0) - - else: - result = float(self.algo_sym.subs(data_test_dict)) # process the polynomial to produce the result - result = round(result, self.precision) # force 'result' and 'solution' to the same number of floating points - - solution = float(data_test_dict['s']) # extract the desired solution from the data - solution = round(solution, self.precision) # force 'result' and 'solution' to the same number of floating points - - # fitness = abs(result - solution) # this is a Minimisation function (seeking smallest fitness) - print '\t\033[36m data row', row, 'yields:', result, '\033[0;0m' - - # measure the total or average difference between result and solution across all rows ??? - - print '\n\t (this test is not yet complete)' - - return - - - def fx_test_match(self, tree_id): - - ''' - A validation of a matching fitness function. - - Arguments required: tree_id - ''' - - # switched from population_a to _b 2016 07/09 - self.fx_eval_poly(self.population_b[tree_id]) # generate the raw and sympified equation for the given Tree - print '\n\t\033[36mTree', tree_id, 'yields (raw):', self.algo_raw, '\033[0;0m' - print '\t\033[36mTree', tree_id, 'yields (sym):\033[1m', self.algo_sym, '\033[0;0m\n' - - for row in range(0, self.data_test_rows): # test against data_test_dict - data_test_dict = self.data_test_dict_array[row] # re-assign (unpack) a temp dictionary to each row of data - - if str(self.algo_sym.subs(data_test_dict)) == 'zoo': # divide by zero demands we avoid use of the 'float' function - result = self.algo_sym.subs(data_test_dict) # print 'divide by zero', result; self.fx_karoo_pause(0) - - else: - result = float(self.algo_sym.subs(data_test_dict)) # process the polynomial to produce the result - result = round(result, self.precision) # force 'result' and 'solution' to the same number of floating points - - solution = float(data_test_dict['s']) # extract the desired solution from the data - solution = round(solution, self.precision) # force 'result' and 'solution' to the same number of floating points - - if result == solution: - fitness = 1 # improve the fitness score by 1 - print '\t\033[36m data row', row, '\033[0;0m\033[36myields:\033[1m', result, '\033[0;0m' - - else: - fitness = 0 # do not adjust the fitness score - print '\t\033[36m data row', row, 'yields:', result, '\033[0;0m' - - print '\n\t Tree', tree_id, 'has an accuracy of:', float(self.population_b[tree_id][12][1]) / self.data_test_dict_array.shape[0] * 100 return @@ -2553,19 +2493,83 @@ class Base_GP(object): print skm.confusion_matrix(y_true, y_pred) return - - - def fx_test_plot(self, tree): - - ''' - # [need to build] - Arguments required: tree + + def fx_test_regress(self, tree_id): + ''' + A validation of a regression fitness function. + + Arguments required: tree_id + ''' + + # switched from population_a to _b 2016 07/09 + self.fx_eval_poly(self.population_b[tree_id]) # generate the raw and sympified equation for the given Tree + print '\n\t\033[36mTree', tree_id, 'yields (raw):', self.algo_raw, '\033[0;0m' + print '\t\033[36mTree', tree_id, 'yields (sym):\033[1m', self.algo_sym, '\033[0;0m\n' + + for row in range(0, self.data_test_rows): # test against data_test_dict + data_test_dict = self.data_test_dict_array[row] # re-assign (unpack) a temp dictionary to each row of data + + if str(self.algo_sym.subs(data_test_dict)) == 'zoo': # divide by zero demands we avoid use of the 'float' function + result = self.algo_sym.subs(data_test_dict) # print 'divide by zero', result; self.fx_karoo_pause(0) + + else: + result = float(self.algo_sym.subs(data_test_dict)) # process the polynomial to produce the result + result = round(result, self.precision) # force 'result' and 'solution' to the same number of floating points + + solution = float(data_test_dict['s']) # extract the desired solution from the data + solution = round(solution, self.precision) # force 'result' and 'solution' to the same number of floating points + + # fitness = abs(result - solution) # this is a Minimisation function (seeking smallest fitness) + print '\t\033[36m data row', row, 'yields:', result, '\033[0;0m' + + # measure the total or average difference between result and solution across all rows ??? + + print '\n\t (this test is not yet complete)' return + def fx_test_match(self, tree_id): + + ''' + A validation of a matching fitness function. + + Arguments required: tree_id + ''' + + # switched from population_a to _b 2016 07/09 + self.fx_eval_poly(self.population_b[tree_id]) # generate the raw and sympified equation for the given Tree + print '\n\t\033[36mTree', tree_id, 'yields (raw):', self.algo_raw, '\033[0;0m' + print '\t\033[36mTree', tree_id, 'yields (sym):\033[1m', self.algo_sym, '\033[0;0m\n' + + for row in range(0, self.data_test_rows): # test against data_test_dict + data_test_dict = self.data_test_dict_array[row] # re-assign (unpack) a temp dictionary to each row of data + + if str(self.algo_sym.subs(data_test_dict)) == 'zoo': # divide by zero demands we avoid use of the 'float' function + result = self.algo_sym.subs(data_test_dict) # print 'divide by zero', result; self.fx_karoo_pause(0) + + else: + result = float(self.algo_sym.subs(data_test_dict)) # process the polynomial to produce the result + result = round(result, self.precision) # force 'result' and 'solution' to the same number of floating points + + solution = float(data_test_dict['s']) # extract the desired solution from the data + solution = round(solution, self.precision) # force 'result' and 'solution' to the same number of floating points + + if result == solution: + fitness = 1 # improve the fitness score by 1 + print '\t\033[36m data row', row, '\033[0;0m\033[36myields:\033[1m', result, '\033[0;0m' + + else: + fitness = 0 # do not adjust the fitness score + print '\t\033[36m data row', row, 'yields:', result, '\033[0;0m' + + print '\n\t Tree', tree_id, 'has an accuracy of:', float(self.population_b[tree_id][12][1]) / self.data_test_dict_array.shape[0] * 100 + + return + + #++++++++++++++++++++++++++++++++++++++++++ # Methods to Append & Archive | #++++++++++++++++++++++++++++++++++++++++++ diff --git a/karoo_gp_main.py b/karoo_gp_main.py index 91198c5..befef87 100644 --- a/karoo_gp_main.py +++ b/karoo_gp_main.py @@ -2,7 +2,7 @@ # Use Genetic Programming for Classification and Symbolic Regression # by Kai Staats, MSc UCT / AIMS # Much thanks to Emmanuel Dufourq and Arun Kumar for their support, guidance, and free psychotherapy sessions -# version 0.9.1.4 +# version 0.9.1.5 ''' A NOTE TO THE NEWBIE, EXPERT, AND BRAVE @@ -30,10 +30,10 @@ gp.karoo_banner('main') print '' -menu = ['a','b','c','m','p',''] +menu = ['r','b','c','m','p',''] while True: try: - gp.kernel = raw_input('\t Select (a)bs diff, (c)lassify, (m)atch, or (p)lay (default m): ') + gp.kernel = raw_input('\t Select (r)egression, (c)lassification, (m)atching, or (p)lay (default m): ') if gp.kernel not in menu: raise ValueError() gp.kernel = gp.kernel or 'm'; break except ValueError: print '\t\033[32m Select from the options given. Try again ...\n\033[0;0m' @@ -48,17 +48,7 @@ if gp.kernel == 'c': # if the Classification kernel is selected (above) if gp.class_labels not in str(menu) or gp.class_labels == '0': raise ValueError() gp.class_labels = gp.class_labels or 3; gp.class_labels = int(gp.class_labels); break except ValueError: print '\t\033[32m Select from the options given. Try again ...\n\033[0;0m' - except KeyboardInterrupt: sys.exit() - - # menu = ['f','i',''] - # while True: - # try: - # gp.class_type = raw_input('\t Select (f)inite or (i)finite classification (default i): ') - # if gp.class_type not in menu: raise ValueError() - # gp.class_type = gp.class_type or 'i'; break - # except ValueError: print '\t\033[32m Select from the options given. Try again ...\n\033[0;0m' - # except KeyboardInterrupt: sys.exit() - + except KeyboardInterrupt: sys.exit() menu = ['f','g','r',''] while True: @@ -112,7 +102,7 @@ else: # if any other kernel is selected menu = range(10,1001) while True: try: - gp.tree_pop_max = raw_input('\t Enter number of Trees in each Generation (default 100): ') + gp.tree_pop_max = raw_input('\t Enter number of Trees in each population (default 100): ') if gp.tree_pop_max not in str(menu) or gp.tree_pop_max == '0': raise ValueError() gp.tree_pop_max = gp.tree_pop_max or 100; gp.tree_pop_max = int(gp.tree_pop_max); break except ValueError: print '\t\033[32m Enter a number from 10 including 1000. Try again ...\n\033[0;0m' @@ -121,7 +111,7 @@ else: # if any other kernel is selected menu = range(1,101) while True: try: - gp.generation_max = raw_input('\t Enter max number of Generations (default 10): ') + gp.generation_max = raw_input('\t Enter max number of generations (default 10): ') if gp.generation_max not in str(menu) or gp.generation_max == '0': raise ValueError() gp.generation_max = gp.generation_max or 10; gp.generation_max = int(gp.generation_max); break except ValueError: print '\t\033[32m Enter a number from 1 including 100. Try again ...\n\033[0;0m' @@ -138,10 +128,10 @@ else: # if any other kernel is selected # define the ratio between types of mutation, where all sum to 1.0; can be adjusted in 'i'nteractive mode -gp.evolve_repro = int(0.1 * gp.tree_pop_max) # percentage of subsequent population to be generated through Reproduction -gp.evolve_point = int(0.1 * gp.tree_pop_max) # percentage of subsequent population to be generated through Point Mutation -gp.evolve_branch = int(0.1 * gp.tree_pop_max) # percentage of subsequent population to be generated through Branch Mutation -gp.evolve_cross = int(0.7 * gp.tree_pop_max) # percentage of subsequent population to be generated through Crossover Reproduction +gp.evolve_repro = int(0.0 * gp.tree_pop_max) # percentage of subsequent population to be generated through Reproduction +gp.evolve_point = int(0.0 * gp.tree_pop_max) # percentage of subsequent population to be generated through Point Mutation +gp.evolve_branch = int(0.0 * gp.tree_pop_max) # percentage of subsequent population to be generated through Branch Mutation +gp.evolve_cross = int(1.0 * gp.tree_pop_max) # percentage of subsequent population to be generated through Crossover Reproduction gp.tourn_size = 10 # qty of individuals entered into each tournament (standard 10); can be adjusted in 'i'nteractive mode gp.cores = 1 # replace '1' with 'int(gp.core_count)' to auto-set to max; can be adjusted in 'i'nteractive mode diff --git a/karoo_gp_server.py b/karoo_gp_server.py index 366516f..5053a91 100644 --- a/karoo_gp_server.py +++ b/karoo_gp_server.py @@ -2,7 +2,7 @@ # Use Genetic Programming for Classification and Symbolic Regression # by Kai Staats, MSc UCT / AIMS # Much thanks to Emmanuel Dufourq and Arun Kumar for their support, guidance, and free psychotherapy sessions -# version 0.9.1.4 +# version 0.9.1.5 ''' A NOTE TO THE NEWBIE, EXPERT, AND BRAVE @@ -15,15 +15,15 @@ import sys # sys.path.append('modules/') # add the directory 'modules' to the cu import karoo_gp_base_class; gp = karoo_gp_base_class.Base_GP() # parameters configuration -gp.kernel = 'c' # ['a','c','m'] fitness function: ABS Value, Classification, or Matching -gp.class_labels = 3 # number of class labels in the feature set -tree_type = 'r' # ['f','g','r'] Tree type: full, grow, or ramped half/half +gp.kernel = 'c' # ['r','c','m'] fitness function: (r)egression, (c)lassification, or (m)atching +gp.class_labels = 3 # [2,3, ...] number of class labels in the feature set +tree_type = 'r' # ['f','g','r'] Tree (t)ype: (f)ull, (g)row, or (r)amped half/half tree_depth_base = 3 # [3,10] maximum Tree depth for the initial population, where nodes = 2^(depth + 1) - 1 gp.tree_depth_max = 3 # [3,10] maximum Tree depth for the entire run; introduces potential bloat gp.tree_depth_min = 3 # [3,100] minimum number of nodes gp.tree_pop_max = 100 # [10,1000] maximum population -gp.generation_max = 10 # [1,1000] number of generations -gp.display = 'm' # ['i','m','g','s','db','t'] display mode: Interactive, Minimal, Generational, Server, Debug, or Timer +gp.generation_max = 10 # [1,100] number of generations +gp.display = 'm' # ['i','m','g','s'] display mode: (i)nteractive, (m)inimal, (g)enerational, or (s)erver gp.evolve_repro = int(0.1 * gp.tree_pop_max) # percentage of subsequent population to be generated through Reproduction gp.evolve_point = int(0.1 * gp.tree_pop_max) # percentage of subsequent population to be generated through Point Mutation