diff --git a/Karoo_GP_User_Guide.pdf b/Karoo_GP_User_Guide.pdf index e34005c..26e7069 100644 Binary files a/Karoo_GP_User_Guide.pdf and b/Karoo_GP_User_Guide.pdf differ diff --git a/karoo_gp_base_class.py b/karoo_gp_base_class.py index 638d309..9d014d4 100644 --- a/karoo_gp_base_class.py +++ b/karoo_gp_base_class.py @@ -2,7 +2,7 @@ # Define the methods and global variables used by Karoo GP # by Kai Staats, MSc; see LICENSE.md # Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov -# version 1.0.2 +# version 1.0.3 ''' A NOTE TO THE NEWBIE, EXPERT, AND BRAVE @@ -14,6 +14,7 @@ likely find more enjoyment of this particular flavour of GP with a little unders import sys import os import csv +import time import numpy as np import sklearn.metrics as skm @@ -106,6 +107,7 @@ class Base_GP(object): ### Global variables initiated and/or used by Sympy ### 'gp.algo_raw' a Sympy string which represents a flattened tree 'gp.algo_sym' a Sympy executable version of algo_raw + 'gp.fittest_dict' a dictionary of the most fit trees, compiled during fitness function execution ### Variables used for evolutionary management ### 'gp.population_a' the root generation from which Trees are chosen for mutation and reproduction @@ -148,11 +150,12 @@ class Base_GP(object): ''' self.karoo_banner() + start = time.time() # start the clock for the timer # construct first generation of Trees self.fx_karoo_data_load(tree_type, tree_depth_base, filename) self.generation_id = 1 # set initial generation ID - self.population_a = ['Karoo GP by Kai Staats, Generation ' + str(self.generation_id)] # a list which will store all Tree arrays, one generation at a time + self.population_a = ['Karoo GP by Kai Staats, Generation ' + str(self.generation_id)] # list to store all Tree arrays, one generation at a time self.fx_karoo_construct(tree_type, tree_depth_base) # construct the first population of Trees # evaluate first generation of Trees @@ -174,12 +177,13 @@ class Base_GP(object): self.fx_eval_generation() # evaluate all Trees in a single generation self.population_a = self.fx_evolve_pop_copy(self.population_b, ['GP Tree by Kai Staats, Generation ' + str(self.generation_id)]) - + # "End of line, man!" --CLU + print '\n \033[36m Karoo GP has an ellapsed time of \033[0;0m\033[31m%f\033[0;0m' % (time.time() - start), '\033[0;0m' self.fx_archive_tree_write(self.population_b, 'f') # save the final generation of Trees to disk self.fx_archive_params_write('Server') # save run-time parameters to disk - print '\033[3m Congrats!\033[0;0m Your multi-generational Karoo GP run is complete.\n' + print '\n \033[3m Congrats!\033[0;0m Your multi-generational Karoo GP run is complete.\n' sys.exit() # return Karoo GP to the command line to support bash and chron job execution # return @@ -415,7 +419,7 @@ class Base_GP(object): for n in range(self.evolve_repro): # quantity of Trees to be copied without mutation tourn_winner = self.fx_fitness_tournament(self.tourn_size) # perform tournament selection for each reproduction - tourn_winner = self.fx_evolve_fitness_wipe(tourn_winner) # remove fitness data + tourn_winner = self.fx_evolve_fitness_wipe(tourn_winner) # wipe fitness data self.population_b.append(tourn_winner) # append array to next generation population of Trees return @@ -633,7 +637,6 @@ class Base_GP(object): query = raw_input('\t Enter quantity of Trees to be generated by Reproduction: ') if query not in str(menu): raise ValueError() elif query == '': break - # tmp_repro = int(float(query) / 100 * self.tree_pop_max); break tmp_repro = int(float(query)); break except ValueError: print '\n\t\033[32m Enter a number from 0 including 1000. Try again ...\033[0;0m' @@ -642,7 +645,6 @@ class Base_GP(object): query = raw_input('\t Enter quantity of Trees to be generated by Point Mutation: ') if query not in str(menu): raise ValueError() elif query == '': break - # tmp_point = int(float(query) / 100 * self.tree_pop_max); break tmp_point = int(float(query)); break except ValueError: print '\n\t\033[32m Enter a number from 0 including 1000. Try again ...\033[0;0m' @@ -651,7 +653,6 @@ class Base_GP(object): query = raw_input('\t Enter quantity of Trees to be generated by Branch Mutation: ') if query not in str(menu): raise ValueError() elif query == '': break - # tmp_branch = int(float(query) / 100 * self.tree_pop_max); break tmp_branch = int(float(query)); break except ValueError: print '\n\t\033[32m Enter a number from 0 including 1000. Try again ...\033[0;0m' @@ -660,11 +661,9 @@ class Base_GP(object): query = raw_input('\t Enter quantity of Trees to be generated by Crossover: ') if query not in str(menu): raise ValueError() elif query == '': break - # tmp_cross = int(float(query) / 100 * self.tree_pop_max); break tmp_cross = int(float(query)); break except ValueError: print '\n\t\033[32m Enter a number from 0 including 1000. Try again ...\033[0;0m' - # if tmp_repro + tmp_point + tmp_branch + tmp_cross != 100: print '\n\t The sum of the above does not equal 100%. Try again ...' if tmp_repro + tmp_point + tmp_branch + tmp_cross != self.tree_pop_max: print '\n\t The sum of the above does not equal', self.tree_pop_max, 'Try again ...' else: print '\n\t The revised balance of genetic operators is:' @@ -676,15 +675,15 @@ class Base_GP(object): elif pause == 'l': # display dictionary of Trees with the best fitness score print '\n\t The leading Trees and their associated expressions are:' - for item in sorted(self.fittest_dict): print '\t ', item, ':', self.fittest_dict[item] - + for n in sorted(self.fittest_dict): print '\t ', n, ':', self.fittest_dict[n] + elif pause == 't': # evaluate a Tree against the TEST data if self.generation_id > 1: menu = range(1, len(self.population_b)) while True: try: - query = raw_input('\n\t Select a Tree in population_b to evaluate for Precision & Recall: ') + query = raw_input('\n\t Select a Tree in population_b to test: ') if query not in str(menu) or query == '0': raise ValueError() elif query == '': break @@ -888,7 +887,7 @@ class Base_GP(object): self.pop_node_c1 = '' # pos 9: child node 1 self.pop_node_c2 = '' # pos 10: child node 2 self.pop_node_c3 = '' # pos 11: child node 3 (assumed max of 3 with boolean operator 'if') - self.pop_fitness = '' # pos 12: fitness value following Tree evaluation + self.pop_fitness = '' # pos 12: fitness score following Tree evaluation self.tree = np.array([ ['TREE_ID'],['tree_type'],['tree_depth_base'],['NODE_ID'],['node_depth'],['node_type'],['node_label'],['node_parent'],['node_arity'],['node_c1'],['node_c2'],['node_c3'],['fitness'] ]) @@ -1271,7 +1270,7 @@ class Base_GP(object): self.fx_evolve_tree_renum(self.population_b) # population renumber self.fx_fitness_gym(self.population_b) # run 'fx_eval', 'fx_fitness', 'fx_fitness_store', and fitness record - self.fx_archive_tree_write(self.population_b, 'a') # archive the current, evolved generation of Trees as the next foundation population + self.fx_archive_tree_write(self.population_b, 'a') # archive current population as foundation for next generation if self.display != 's': print '\n Copy gp.population_b to gp.population_a\n' @@ -1295,7 +1294,7 @@ class Base_GP(object): minimising (lower is better). The total fitness score is then saved with each Tree in the external .csv file. Part 3 compares the fitness of each Tree to the prior best fit in order to track those that improve with each - comparison. For matching functions, all the Trees will have the same fitness value, but they may present more + comparison. For matching functions, all the Trees will have the same fitness score, but they may present more than one solution. For minimisation and maximisation functions, the final Tree should present the best overall fitness for that generation. It is important to note that Part 3 does *not* in any way influence the Tournament Selection which is a stand-alone process. @@ -1319,7 +1318,7 @@ class Base_GP(object): expr = str(self.algo_sym) # get sympified expression and process it with TF - tested 2017 02/02 result = self.fx_fitness_eval(expr, self.data_train) - fitness = result['fitness'] # extract fitness value + fitness = result['fitness'] # extract fitness score if self.display == 'i': print '\t \033[36m with fitness sum:\033[1m', fitness, '\033[0;0m\n' @@ -1331,24 +1330,24 @@ class Base_GP(object): if self.kernel == 'c': # display best fit Trees for the CLASSIFY kernel if fitness >= fitness_best: # find the Tree with Maximum fitness score fitness_best = fitness # set best fitness score - self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary + self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary if fitness >= prior elif self.kernel == 'r': # display best fit Trees for the REGRESSION kernel if fitness_best == 0: fitness_best = fitness # set the baseline first time through if fitness <= fitness_best: # find the Tree with Minimum fitness score fitness_best = fitness # set best fitness score - self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary + self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary if fitness <= prior elif self.kernel == 'm': # display best fit Trees for the MATCH kernel if fitness == self.data_train_rows: # find the Tree with a perfect match for all data rows fitness_best = fitness # set best fitness score - self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary + self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary if all rows match # elif self.kernel == '[other]': # display best fit Trees for the [other] kernel - # if fitness >= fitness_best: # find the Tree with [Maximum or Minimum] fitness score + # if fitness [>=, <=] fitness_best: # find the Tree with [Maximum or Minimum] fitness score # fitness_best = fitness # set best fitness score # self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary - + print '\n\033[36m ', len(self.fittest_dict.keys()), 'trees\033[1m', np.sort(self.fittest_dict.keys()), '\033[0;0m\033[36moffer the highest fitness scores.\033[0;0m' if self.display == 'g': self.fx_karoo_pause(0) @@ -1358,7 +1357,7 @@ class Base_GP(object): def fx_fitness_eval(self, expr, data, get_labels = False): # used to be fx_fitness_eval ''' - Computes tree expression using TensorFlow (TF) returning results and fitness values. + Computes tree expression using TensorFlow (TF) returning results and fitness scores. This method orchestrates most of the TF routines by parsing input string expression and converting it into TF operation graph which then is processed in an isolated TF session to compute the results and corresponding fitness @@ -1383,7 +1382,7 @@ class Base_GP(object): 'labels' - an array of the labels extracted from the results; defined only for CLASSIFY kernel, None otherwise 'solution' - an array of the solution values extracted from the data (variable 's' in the dataset) 'pairwise_fitness' - an array of the element-wise results of applying corresponding fitness kernel function - 'fitness' - aggregated scalar fitness value + 'fitness' - aggregated scalar fitness score Arguments required: expr, data ''' @@ -1593,7 +1592,7 @@ class Base_GP(object): tree[12][1] = fitness # store the fitness with each tree tree[12][2] = len(str(self.algo_raw)) # store the length of the raw algo for parsimony - # if len(tree[3]) > 4: # if the Tree array is wide enough ... + # if len(tree[3]) > 4: # if the Tree array is wide enough -- SEE SCRATCHPAD return @@ -1701,11 +1700,8 @@ class Base_GP(object): This method is automatically invoked with every Tournament Selection ('fx_fitness_tournament'). - At this point in time, the gene pool does *not* limit the number of times any given Tree may be selected for - mutation or reproduction nor does it take into account parsimony (seeking the simplest expression). Nor does - a 'divide by zero' error keep a tree from entering the gene pool, as it might contain other, beneficial code - to contribute to the next generation. However, trees with 'error' are given a fitness score of 0 and therefore - will eventually be removed from the gene pool. + At this time, the gene pool does *not* limit the number of times any given Tree may be selected for mutation or + reproduction nor does it take into account parsimony (seeking the simplest multivariate expression). Arguments required: none ''' @@ -1717,7 +1713,7 @@ class Base_GP(object): self.fx_eval_poly(self.population_a[tree_id]) # extract the expression - if len(self.population_a[tree_id][3])-1 >= self.tree_depth_min and self.algo_sym != 1: # if Tree meets the requirements + if len(self.population_a[tree_id][3])-1 >= self.tree_depth_min and self.algo_sym != 1: # check if Tree meets the requirements if self.display == 'i': print '\t\033[36m Tree', tree_id, 'has >=', self.tree_depth_min, 'nodes and is added to the gene pool\033[0;0m' self.gene_pool.append(self.population_a[tree_id][0][1]) @@ -1751,8 +1747,8 @@ class Base_GP(object): for i in range(len(result['result'])): print '\t\033[36m Data row {} predicts class:\033[1m {} ({} label) as {:.2f}{}\033[0;0m'.format(i, int(result['labels'][0][i]), int(result['solution'][i]), result['result'][i], result['labels'][1][i]) - print '\n Fitness value: {}'.format(result['fitness']) - print '\n Classification report:\n', skm.classification_report(result['solution'], result['labels'][0]) + print '\n Fitness score: {}'.format(result['fitness']) + print '\n Precision-Recall report:\n', skm.classification_report(result['solution'], result['labels'][0]) print ' Confusion matrix:\n', skm.confusion_matrix(result['solution'], result['labels'][0]) return @@ -1761,14 +1757,14 @@ class Base_GP(object): def fx_fitness_test_regress(self, result): ''' - Print the Fitness value and Mean Squared Error for a REGRESSION run against the test data. + Print the Fitness score and Mean Squared Error for a REGRESSION run against the test data. ''' for i in range(len(result['result'])): print '\t\033[36m Data row {} predicts value:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result[ 'solution'][i]) MSE, fitness = skm.mean_squared_error(result['result'], result['solution']), result['fitness'] - print '\n\t Fitness value: {}'.format(fitness) + print '\n\t Regression fitness score: {}'.format(fitness) print '\t Mean Squared Error: {}'.format(MSE) return @@ -1783,8 +1779,8 @@ class Base_GP(object): for i in range(len(result['result'])): print '\t\033[36m Data row {} predicts value:\033[1m {} ({} label)\033[0;0m'.format(i, int(result['result'][i]), int(result['solution'][i])) - print '\n\tFitness value: {}'.format(result['fitness']) - + print '\n\tMatching fitness score: {}'.format(result['fitness']) + return @@ -1797,7 +1793,7 @@ class Base_GP(object): # for i in range(len(result['result'])): # print '\t\033[36m Data row {} predicts value:\033[1m {} ({} label)\033[0;0m'.format(i, int(result['result'][i]), int(result['solution'][i])) - # print '\n\tFitness value: {}'.format(result['fitness']) + # print '\n\tFitness score: {}'.format(result['fitness']) # return @@ -1832,7 +1828,7 @@ class Base_GP(object): else: print '\n\t\033[31m ERROR! In fx_evolve_point_mutate, node_type =', tree[5][node], '\033[0;0m'; self.fx_karoo_pause(0) - tree = self.fx_evolve_fitness_wipe(tree) # remove fitness data + tree = self.fx_evolve_fitness_wipe(tree) # wipe fitness data if self.display == 'db': print '\n\033[36m This is tourn_winner after node\033[1m', node, '\033[0;0m\033[36mmutation and updates:\033[0;0m\n', tree; self.fx_karoo_pause(0) @@ -1869,7 +1865,7 @@ class Base_GP(object): rnd = np.random.randint(0, len(self.terminals) - 1) # call the previously loaded .csv which contains all terminals tree[6][branch[n]] = self.terminals[rnd] # replace terminal (variable) - tree = self.fx_evolve_fitness_wipe(tree) # remove fitness data + tree = self.fx_evolve_fitness_wipe(tree) # wipe fitness data if self.display == 'db': print '\n\033[36m This is tourn_winner after nodes\033[1m', branch, '\033[0;0m\033[36mwere mutated and updated:\033[0;0m\n', tree; self.fx_karoo_pause(0) @@ -2014,7 +2010,7 @@ class Base_GP(object): offspring = self.fx_evolve_branch_body_copy(offspring) # copy remaining nodes in 'branch_y' ('gp.tree') to 'offspring' offspring = self.fx_evolve_tree_prune(offspring, self.tree_depth_max) # prune to the max Tree depth + adjustment - tested 2016 07/10 - offspring = self.fx_evolve_fitness_wipe(offspring) # wipe fitness data and return 'offspring' + offspring = self.fx_evolve_fitness_wipe(offspring) # wipe fitness data return offspring @@ -2389,7 +2385,7 @@ class Base_GP(object): Arguments required: tree ''' - tree[12][1:] = '' # remove all 'fitness' data + tree[12][1:] = '' # wipe fitness data return tree @@ -2605,7 +2601,6 @@ class Base_GP(object): ''' file = open(self.path + '/log_config.txt', 'w') - file.write('Karoo GP ' + app) file.write('\n launched: ' + str(self.datetime)) file.write('\n dataset: ' + str(self.dataset)) @@ -2616,7 +2611,7 @@ class Base_GP(object): # file.write('tree type: ' + tree_type) # file.write('tree depth base: ' + str(tree_depth_base)) file.write('\n tree depth max: ' + str(self.tree_depth_max)) - file.write('\n tree depth min: ' + str(self.tree_depth_min)) + file.write('\n min node count: ' + str(self.tree_depth_min)) file.write('\n') file.write('\n genetic operator Reproduction: ' + str(self.evolve_repro)) file.write('\n genetic operator Point Mutation: ' + str(self.evolve_point)) @@ -2627,12 +2622,10 @@ class Base_GP(object): file.write('\n population: ' + str(self.tree_pop_max)) file.write('\n number of generations: ' + str(self.generation_id)) file.write('\n\n') - file.close() file = open(self.path + '/log_test.txt', 'w') - file.write('Karoo GP ' + app) file.write('\n launched: ' + str(self.datetime)) file.write('\n dataset: ' + str(self.dataset)) @@ -2640,30 +2633,60 @@ class Base_GP(object): if len(self.fittest_dict) > 0: - file.write('\n The leading Trees and their associated expressions are:') - for item in sorted(self.fittest_dict): - file.write('\n\t ' + str(item) + ' : ' + str(self.fittest_dict[item])) - - # test the highest numbered Tree and write to the .txt log - self.fx_eval_poly(self.population_b[int(item)]) # generate the raw and sympified equation for the given Tree using SymPy + fitness_best = 0 + fittest_tree = 0 + + # original method, using pre-built fittest_dict + # file.write('\n The leading Trees and their associated expressions are:') + # for n in sorted(self.fittest_dict): + # file.write('\n\t ' + str(n) + ' : ' + str(self.fittest_dict[n])) + + # revised method, re-evaluating all Trees from stored fitness score + for tree_id in range(1, len(self.population_b)): + + fitness = float(self.population_b[tree_id][12][1]) + + if self.kernel == 'c': # display best fit Trees for the CLASSIFY kernel + if fitness >= fitness_best: # find the Tree with Maximum fitness score + fitness_best = fitness; fittest_tree = tree_id # set best fitness Tree + + elif self.kernel == 'r': # display best fit Trees for the REGRESSION kernel + if fitness_best == 0: fitness_best = fitness # set the baseline first time through + if fitness <= fitness_best: # find the Tree with Minimum fitness score + fitness_best = fitness; fittest_tree = tree_id # set best fitness Tree + + elif self.kernel == 'm': # display best fit Trees for the MATCH kernel + if fitness == self.data_train_rows: # find the Tree with a perfect match for all data rows + fitness_best = fitness; fittest_tree = tree_id # set best fitness Tree + + # elif self.kernel == '[other]': # display best fit Trees for the [other] kernel + # if fitness [>=, <=] fitness_best: # find the Tree with [Maximum or Minimum] fitness score + # fitness_best = fitness; fittest_tree = tree_id # set best fitness Tree + + # print 'fitness_best:', fitness_best, 'fittest_tree:', fittest_tree + + + # test the most fit Tree and write to the .txt log + self.fx_eval_poly(self.population_b[int(fittest_tree)]) # generate the raw and sympified equation for the given Tree using SymPy expr = str(self.algo_sym) # get simplified expression and process it by TF - tested 2017 02/02 result = self.fx_fitness_eval(expr, self.data_test, get_labels=True) - - file.write('\n\n Tree ' + str(item) + ' yields (sym): ' + str(self.algo_sym)) - - if self.kernel == 'c': - file.write('\n\n Fitness value: {}'.format(result['fitness'])) - file.write('\n\n Classification report:\n {}'.format(skm.classification_report(result['solution'], result['labels'][0]))) - file.write('\n Confusion matrix:\n {}'.format(skm.confusion_matrix(result['solution'], result['labels'][0]))) + file.write('\n\n Tree ' + str(fittest_tree) + ' is the most fit, with expression:') + file.write('\n\n ' + str(self.algo_sym)) + + if self.kernel == 'c': + file.write('\n\n Classification fitness score: {}'.format(result['fitness'])) + file.write('\n\n Precision-Recall report:\n {}'.format(skm.classification_report(result['solution'], result['labels'][0]))) + file.write('\n Confusion matrix:\n {}'.format(skm.confusion_matrix(result['solution'], result['labels'][0]))) + elif self.kernel == 'r': MSE, fitness = skm.mean_squared_error(result['result'], result['solution']), result['fitness'] - file.write('\n\n Fitness value: {}'.format(fitness)) + file.write('\n\n Regression fitness score: {}'.format(fitness)) file.write('\n Mean Squared Error: {}'.format(MSE)) - + elif self.kernel == 'm': - file.write('\n\n Fitness value: {}'.format(result['fitness'])) - + file.write('\n\n Matching fitness score: {}'.format(result['fitness'])) + # elif self.kernel == '[other]': # file.write( ... ) diff --git a/karoo_gp_main.py b/karoo_gp_main.py index 4ceb4af..bd176a4 100644 --- a/karoo_gp_main.py +++ b/karoo_gp_main.py @@ -2,7 +2,7 @@ # Use Genetic Programming for Classification and Symbolic Regression # by Kai Staats, MSc; see LICENSE.md # Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov -# version 1.0.1 +# version 1.0.3 ''' A word to the newbie, expert, and brave-- @@ -33,7 +33,7 @@ If you include the path to an external dataset, it will auto-load at launch: import sys # sys.path.append('modules/') to add the directory 'modules' to the current path import karoo_gp_base_class; gp = karoo_gp_base_class.Base_GP() - +import time #++++++++++++++++++++++++++++++++++++++++++ # User Defined Configuration | @@ -157,7 +157,7 @@ gp.evolve_branch = int(0.2 * gp.tree_pop_max) # quantity of a population generat gp.evolve_cross = int(0.7 * gp.tree_pop_max) # quantity of a population generated through Crossover gp.tourn_size = 10 # qty of individuals entered into each tournament (standard 10); can be adjusted in 'i'nteractive mode -gp.precision = 4 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded +gp.precision = 10 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded #++++++++++++++++++++++++++++++++++++++++++ @@ -171,6 +171,8 @@ constructed from scratch. All parameters which define the Trees were set by the If the user has selected 'Play' mode, this is the only generation to be constructed, and then GP Karoo terminates. ''' +start = time.time() # start the clock for the timer + filename = '' # temp place holder gp.fx_karoo_data_load(tree_type, tree_depth_base, filename) gp.generation_id = 1 # set initial generation ID @@ -205,7 +207,7 @@ if gp.display != 's': print ' Evaluate the first generation of Trees ...' if gp.display == 'i': gp.fx_karoo_pause(0) -gp.fx_fitness_gym(gp.population_a) # 1) extract polynomial from each Tree; 2) evaluate fitness, store; 3) display +gp.fx_fitness_gym(gp.population_a) # generate expression, evaluate fitness, compare fitness gp.fx_archive_tree_write(gp.population_a, 'a') # save the first generation of Trees to disk # no need to continue if only 1 generation or fewer than 10 Trees were designated by the user @@ -247,7 +249,9 @@ for gp.generation_id in range(2, gp.generation_max + 1): # loop through 'generat # "End of line, man!" --CLU | #++++++++++++++++++++++++++++++++++++++++++ +print '\n \033[36m Karoo GP has an ellapsed time of \033[0;0m\033[31m%f\033[0;0m' % (time.time() - start), '\033[0;0m' + gp.fx_archive_tree_write(gp.population_b, 'f') # save the final generation of Trees to disk gp.fx_karoo_eol() - + diff --git a/karoo_gp_server.py b/karoo_gp_server.py index 3a6e8ed..671b8ce 100644 --- a/karoo_gp_server.py +++ b/karoo_gp_server.py @@ -2,7 +2,7 @@ # Use Genetic Programming for Classification and Symbolic Regression # by Kai Staats, MSc; see LICENSE.md # Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov -# version 1.0.1 +# version 1.0.3 ''' A word to the newbie, expert, and brave-- @@ -54,11 +54,12 @@ import karoo_gp_base_class; gp = karoo_gp_base_class.Base_GP() ap = argparse.ArgumentParser(description = 'Karoo GP Server') ap.add_argument('-ker', action = 'store', dest = 'kernel', default = 'm', help = '[c,r,m] fitness function: (r)egression, (c)lassification, or (m)atching') ap.add_argument('-typ', action = 'store', dest = 'type', default = 'r', help = '[f,g,r] Tree type: (f)ull, (g)row, or (r)amped half/half') -ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 3, help = '[3...10] maximum Tree depth for the initial population') -ap.add_argument('-max', action = 'store', dest = 'depth_max', default = 3, help = '[3...10] maximum Tree depth for the entire run') +ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 5, help = '[3...10] maximum Tree depth for the initial population') +ap.add_argument('-max', action = 'store', dest = 'depth_max', default = 5, help = '[3...10] maximum Tree depth for the entire run') ap.add_argument('-min', action = 'store', dest = 'depth_min', default = 3, help = '[3...100] minimum number of nodes') ap.add_argument('-pop', action = 'store', dest = 'pop_max', default = 100, help = '[10...1000] maximum population') -ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 10, help = '[1...100] number of generations') +ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 30, help = '[1...100] number of generations') +ap.add_argument('-tor', action = 'store', dest = 'tor_size', default = 10, help = '[1...max pop] tournament size') ap.add_argument('-fil', action = 'store', dest = 'filename', default = 'files/data_MATCH.csv', help = '/path/to_your/[data].csv') args = ap.parse_args() @@ -79,8 +80,8 @@ gp.evolve_point = int(0.0 * gp.tree_pop_max) # quantity of a population generate gp.evolve_branch = int(0.2 * gp.tree_pop_max) # quantity of a population generated through Branch Mutation gp.evolve_cross = int(0.7 * gp.tree_pop_max) # quantity of a population generated through Crossover -gp.tourn_size = 10 # qty of individuals entered into each tournament (standard 10); can be adjusted in 'i'nteractive mode -gp.precision = 4 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded +gp.tourn_size = int(args.tor_size) # qty of individuals entered into each tournament; can be adjusted in 'i'nteractive mode +gp.precision = 4 # the number of floating points for the round function in 'fx_fitness_eval' # run Karoo GP gp.karoo_gp(tree_type, tree_depth_base, filename)