diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt index 5c963bb..e7b0609 100644 --- a/RELEASE_NOTES.txt +++ b/RELEASE_NOTES.txt @@ -1,3 +1,65 @@ +2017 07/03 + +I am pleased to announce that Karoo GP is now updated to include a full suite of mathematical operators. I thank the +expert code development of Iurii Milovanov. He was instrumental in bringing TensorFlow into Karoo last year, and has +now provided this important improvement. + +Iurii has prpared an efficient and elegant solution for the addition of a full range of operators, adding support for +boolean operations (a and b or c), comparison ops (a > b <= c == d) and generally speaking any function available in +TensorFlow, as given here: https://www.tensorflow.org/api_guides/python/math_ops + +Now there is a mapping in Karoo GP that connects an expression to the TF function: + + OPERATOR EXAMPLE + add a + b + subtract a - b + multiply a * b + divide a / b + pow a ** 2 + + negative -a + logical_and a and b + logical_or a or b + logical_not not a + equal a == b + not_equal a != b + less a < b + less_equal a <= b + greater a > b + greater_equal a >= 1 + + abs abs(a) + sign sign(a) + square square(a) + sqrt sqrt(a) + pow pow(a, b) + log log(a) + log1p log1p(a) + cos cos(a) + sin sin(a) + tan tan(a) + acos acos(a) + asin asin(a) + atan atan(a) + +Now when Karoo parses an expression like "asin(x + 10)" it first parses and transforms the args "x + 10" and passes +the output to “tf.asin” function. So now Karoo is able to evaluate incredibly complx mathematical expressions. + +However, not all are immediately supported. Please refer to the karoo_gp/files/templates/operators_list.txt for the +most recent update. The issues are with recursive function I use to flatten the GP Trees and Sympy, not TF. Please be +patient as I work to provide support for all operators now recognized with Iurii's update. + +And remember, in its current form, Karoo requires that certain "one sided" operators such as abs, cos, and sin require +a unique formating: + + + sin,2 + - sin,2 + * sin,2 + / sin,2 + +The same operators_list.txt provides a full break-down. Again, this is due to the recursive function, as noted above. + + 2017 06/06 A number of changes applied in March and April. My apologies for the delayed updates to github. diff --git a/karoo_gp_base_class.py b/karoo_gp_base_class.py index b04db2b..e34efd3 100644 --- a/karoo_gp_base_class.py +++ b/karoo_gp_base_class.py @@ -2,7 +2,7 @@ # Define the methods and global variables used by Karoo GP # by Kai Staats, MSc; see LICENSE.md # Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov -# version 1.0.3b +# version 1.0.4 ''' A NOTE TO THE NEWBIE, EXPERT, AND BRAVE @@ -204,7 +204,7 @@ class Base_GP(object): self.fx_karoo_crossover() # method 4 - Crossover self.fx_eval_generation() # evaluate all Trees in a single generation - self.population_a = self.fx_evolve_pop_copy(self.population_b, ['GP Tree by Kai Staats, Generation ' + str(self.generation_id)]) + self.population_a = self.fx_evolve_pop_copy(self.population_b, ['Karoo GP by Kai Staats, Generation ' + str(self.generation_id)]) # "End of line, man!" --CLU print '\n \033[36m Karoo GP has an ellapsed time of \033[0;0m\033[31m%f\033[0;0m' % (time.time() - start), '\033[0;0m' @@ -286,7 +286,7 @@ class Base_GP(object): self.functions = np.loadtxt(func_dict[self.kernel], delimiter=',', skiprows=1, dtype = str) # load the user defined functions (operators) self.terminals = header.readline().split(','); self.terminals[-1] = self.terminals[-1].replace('\n','') # load the user defined terminals (operands) self.class_labels = len(np.unique(data_y)) # load the user defined labels for classification or solutions for regression - self.coeff = np.loadtxt(cwd + '/files/coefficients.csv', delimiter=',', skiprows=1, dtype = str) # load the user defined coefficients - NOT USED YET + #self.coeff = np.loadtxt(cwd + '/files/coefficients.csv', delimiter=',', skiprows=1, dtype = str) # load the user defined coefficients - NOT USED YET ### 2) from the dataset, extract TRAINING and TEST data ### @@ -855,7 +855,7 @@ class Base_GP(object): self.fx_karoo_crossover() # method 4 - Crossover self.fx_eval_generation() # evaluate all Trees in a single generation - self.population_a = self.fx_evolve_pop_copy(self.population_b, ['GP Tree by Kai Staats, Generation ' + str(self.generation_id)]) + self.population_a = self.fx_evolve_pop_copy(self.population_b, ['Karoo GP by Kai Staats, Generation ' + str(self.generation_id)]) # "End of line, man!" --CLU target = open(self.filename['f'], 'w') # reset the .csv file for the final population @@ -1315,7 +1315,7 @@ class Base_GP(object): ''' Part 1 evaluates each expression against the data, line for line. This is the most time consuming and computationally expensive part of genetic programming. When GPUs are available, the performance can increase - by many orders of magnitude. + by many orders of magnitude for datasets measured in millions of data. Part 2 evaluates every Tree in each generation to determine which have the best, overall fitness score. This could be the highest or lowest depending upon if the fitness function is maximising (higher is better) or @@ -1757,20 +1757,25 @@ class Base_GP(object): def fx_fitness_gene_pool(self): - ''' - With the introduction of the minimum number of nodes parameter (gp.tree_depth_min), the means by which the - lower node count is enforced is through the creation of a gene pool from those Trees which contain equal or - greater nodes to the user defined limit. + ''' + The gene pool was introduced as means by which advanced users could define additional constraints on the evolved + functions, in an effort to guide the evolutionary process. The first constraint introduced is the 'mininum number + of nodes' parameter (gp.tree_depth_min). This defines the minimum number of nodes (in the context of Karoo, this + refers to both functions (operators) and terminals (operands)). - When the minimum node count is human guided, it can help keep the solution from defaulting to a local minimum, - as with 't/t' in the Kepler problem. However, the ramification of this limitation on the evolutionary process - has not been fully studied. + When the minimum node count is human guided, it can keep the solution from defaulting to a local minimum, as with + 't/t' in the Kepler problem, by forcing a more complex solution. If you find that when engaging the Regression + kernel you are met with a solution which is too simple (eg: linear instead of non-linear), try increasing the + minimum number of nodes (with the launch of Karoo, or mid-stream by way of the pause menu). - This method is automatically invoked with every Tournament Selection ('fx_fitness_tournament'). + What's more, you can add additional constraints to the Gene Pool, thereby customizing how the next generation is + selected. At this time, the gene pool does *not* limit the number of times any given Tree may be selected for mutation or reproduction nor does it take into account parsimony (seeking the simplest multivariate expression). + This method is automatically invoked with every Tournament Selection ('fx_fitness_tournament'). + Arguments required: none ''' @@ -2659,6 +2664,8 @@ class Base_GP(object): for row in range(0, 13): # increment through each row in the array Tree target.writerows([population[tree][row]]) + return + def fx_archive_params_write(self, app): # tested 2017 02/13 diff --git a/karoo_gp_main.py b/karoo_gp_main.py index 3f951f0..6f84b26 100644 --- a/karoo_gp_main.py +++ b/karoo_gp_main.py @@ -2,7 +2,7 @@ # Use Genetic Programming for Classification and Symbolic Regression # by Kai Staats, MSc; see LICENSE.md # Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov -# version 1.0.3b +# version 1.0.4 ''' A word to the newbie, expert, and brave-- @@ -157,7 +157,7 @@ gp.evolve_branch = int(0.2 * gp.tree_pop_max) # quantity of a population generat gp.evolve_cross = int(0.7 * gp.tree_pop_max) # quantity of a population generated through Crossover gp.tourn_size = 10 # qty of individuals entered into each tournament (standard 10); can be adjusted in 'i'nteractive mode -gp.precision = 10 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded +gp.precision = 6 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded #++++++++++++++++++++++++++++++++++++++++++ @@ -233,7 +233,7 @@ Configuration' (top). for gp.generation_id in range(2, gp.generation_max + 1): # loop through 'generation_max' print '\n Evolve a population of Trees for Generation', gp.generation_id, '...' - gp.population_b = ['GP Tree by Kai Staats, Evolving Generation'] # initialise population_b to host the next generation + gp.population_b = ['Karoo GP by Kai Staats, Evolving Generation'] # initialise population_b to host the next generation gp.fx_fitness_gene_pool() # generate the viable gene pool (compares against gp.tree_depth_min) gp.fx_karoo_reproduce() # method 1 - Reproduction @@ -242,7 +242,7 @@ for gp.generation_id in range(2, gp.generation_max + 1): # loop through 'generat gp.fx_karoo_crossover() # method 4 - Crossover Reproduction gp.fx_eval_generation() # evaluate all Trees in a single generation - gp.population_a = gp.fx_evolve_pop_copy(gp.population_b, ['GP Tree by Kai Staats, Generation ' + str(gp.generation_id)]) + gp.population_a = gp.fx_evolve_pop_copy(gp.population_b, ['Karoo GP by Kai Staats, Generation ' + str(gp.generation_id)]) #++++++++++++++++++++++++++++++++++++++++++ diff --git a/karoo_gp_server.py b/karoo_gp_server.py index 3f77303..f0f59d0 100644 --- a/karoo_gp_server.py +++ b/karoo_gp_server.py @@ -2,7 +2,7 @@ # Use Genetic Programming for Classification and Symbolic Regression # by Kai Staats, MSc; see LICENSE.md # Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov -# version 1.0.3b +# version 1.0.4 ''' A word to the newbie, expert, and brave-- @@ -34,15 +34,19 @@ If you include the path to an external dataset, it will auto-load at launch: You can include a number of additional arguments which override the default values, as follows: - -ker [r,c,m] fitness function: (r)egression, (c)lassification, or (m)atching - -typ [f,g,r] Tree type: (f)ull, (g)row, or (r)amped half/half - -bas [3...10] maximum Tree depth for the initial population - -max [3...10] maximum Tree depth for the entire run - -min [3...100] minimum number of nodes + -ker [r,c,m] fitness function: (r)egression, (c)lassification, or (m)atching + -typ [f,g,r] Tree type: (f)ull, (g)row, or (r)amped half/half + -bas [3...10] maximum Tree depth for the initial population + -max [3...10] maximum Tree depth for the entire run + -min [3...100] minimum number of nodes -pop [10...1000] maximum population - -gen [1...100] number of generations + -gen [1...100] number of generations + -tor [1...100] number of trees selected for the tournament + -fil [filename] an external dataset -Note that if you include any of the above flags, then you must also include a flag to load an external dataset: +Note that if you include any of the above flags, then you must also include a flag to load an external dataset. + +An example is given, as follows: $ python karoo_gp_server.py -ker c -typ r -bas 4 -fil /[path]/[to_your]/[filename].csv ''' @@ -72,16 +76,16 @@ gp.tree_depth_max = int(args.depth_max) gp.tree_depth_min = int(args.depth_min) gp.tree_pop_max = int(args.pop_max) gp.generation_max = int(args.gen_max) +gp.tourn_size = int(args.tor_size) filename = str(args.filename) -gp.display = 's' # display mode is set to (s)ilent gp.evolve_repro = int(0.1 * gp.tree_pop_max) # quantity of a population generated through Reproduction gp.evolve_point = int(0.0 * gp.tree_pop_max) # quantity of a population generated through Point Mutation gp.evolve_branch = int(0.2 * gp.tree_pop_max) # quantity of a population generated through Branch Mutation gp.evolve_cross = int(0.7 * gp.tree_pop_max) # quantity of a population generated through Crossover -gp.tourn_size = int(args.tor_size) # qty of individuals entered into each tournament; can be adjusted in 'i'nteractive mode -gp.precision = 4 # the number of floating points for the round function in 'fx_fitness_eval' +gp.display = 's' # display mode is set to (s)ilent +gp.precision = 6 # the number of floating points for the round function in 'fx_fitness_eval' # run Karoo GP gp.karoo_gp(tree_type, tree_depth_base, filename)