From 4b03562e4861bb1a3d0a400f1d51c9f6d90075d7 Mon Sep 17 00:00:00 2001 From: Kai Staats Date: Thu, 10 Aug 2017 16:50:06 +0100 Subject: [PATCH] v1.0.5 minor updates --- RELEASE_NOTES.txt | 22 ++++++ karoo_gp_base_class.py | 172 ++++++++++++++++------------------------- karoo_gp_main.py | 4 +- karoo_gp_server.py | 10 +-- 4 files changed, 94 insertions(+), 114 deletions(-) diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt index 82b6eac..ed80d54 100644 --- a/RELEASE_NOTES.txt +++ b/RELEASE_NOTES.txt @@ -1,3 +1,25 @@ +2017 08/10 +Relatively light updates this time. + +Iurii fixed a minor bug in which the MATCHING function (used only for demonstrations) would not find a match despite +the fact that the output was correct, due to the way in which TensorFlow was handling floating points and precision. +Iurii used numpy.allclose.html as a reference to resolve the situation. + +I also modified the autosave to the runs/ directory such that if you are using an external dataset (quite likely), the +new directory (for each run) will be saved as [filename]-[date_time_stamp]/ The idea (thank you Marco) is to help keep +multiple, automated runs organized and more readily, visually inspected by name alone. + + +2017 07/21 + +In a rather embarrassing, live demo in which I asked for the audience to create the dataset for Karoo, I discovered a +bug in the MATCH kernel in which a negative value in the dataset would cause that row to be discarded from the fitness +function --FIXED. + +I merged the 3 methods fx_fitness_train_classify, fx_fitness_train_regress, fx_fitness_train_match into fx_fitness_eval +in order to reduce the quantity of lines of code and simplify the workflow. + + 2017 07/03 I am pleased to announce that Karoo GP is now updated to include a full suite of mathematical operators. I thank the diff --git a/karoo_gp_base_class.py b/karoo_gp_base_class.py index e34efd3..d165c9d 100644 --- a/karoo_gp_base_class.py +++ b/karoo_gp_base_class.py @@ -2,7 +2,7 @@ # Define the methods and global variables used by Karoo GP # by Kai Staats, MSc; see LICENSE.md # Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov -# version 1.0.4 +# version 1.0.5 ''' A NOTE TO THE NEWBIE, EXPERT, AND BRAVE @@ -323,7 +323,8 @@ class Base_GP(object): # self.datetime = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') self.datetime = datetime.now().strftime('%Y-%m-%d_%H-%M-%S') - self.path = os.path.join(cwd, 'runs/', self.datetime) # generate a unique directory name + self.path = os.path.join(cwd, 'runs/', filename.split('.')[0] + '_' + self.datetime) # generate a unique directory name + # self.path = os.path.join(cwd, 'runs/', self.datetime) # generate a unique directory name if not os.path.isdir(self.path): os.makedirs(self.path) # make a unique directory self.filename = {} # a dictionary to hold .csv filenames @@ -1382,7 +1383,7 @@ class Base_GP(object): return - def fx_fitness_eval(self, expr, data, get_labels = False): # used to be fx_fitness_eval + def fx_fitness_eval(self, expr, data, get_labels = False): ''' Computes tree expression using TensorFlow (TF) returning results and fitness scores. @@ -1423,31 +1424,70 @@ class Base_GP(object): with tf.Session(config=config) as sess: with sess.graph.device(self.tf_device): - # Load data into TF + # 1 - Load data into TF tensors = {} for i in range(len(self.terminals)): var = self.terminals[i] tensors[var] = tf.constant(data[:, i], dtype=tf.float32) - # Transform string expression into TF operation graph + # 2- Transform string expression into TF operation graph result = self.fx_fitness_expr_parse(expr, tensors) labels = tf.no_op() # a placeholder, applies only to CLASSIFY kernel solution = tensors['s'] # solution value is assumed to be stored in 's' terminal - # Add fitness computation into TF graph + # 3- Add fitness computation into TF graph if self.kernel == 'c': # CLASSIFY kernels + + ''' + Creates element-wise fitness computation TensorFlow (TF) sub-graph for CLASSIFY kernel. + + This method uses the 'sympified' (SymPy) expression ('algo_sym') created in 'fx_eval_poly' and the data set + loaded at run-time to evaluate the fitness of the selected kernel. + + This multiclass classifer compares each row of a given Tree to the known solution, comparing estimated values + (labels) generated by Karoo GP against the correct labels. This method is able to work with any number of + class labels, from 2 to n. The left-most bin includes -inf. The right-most bin includes +inf. Those inbetween + are by default confined to the spacing of 1.0 each, as defined by: + + (solution - 1) < result <= solution + + The skew adjusts the boundaries of the bins such that they fall on both the negative and positive sides of the + origin. At the time of this writing, an odd number of class labels will generate an extra bin on the positive + side of origin as it has not yet been determined the effect of enabling the middle bin to include both a + negative and positive space. + + Arguments required: result, solution + ''' + if get_labels: labels = tf.map_fn(self.fx_fitness_labels_map, result, dtype=[tf.int32, tf.string], swap_memory=True) - pairwise_fitness = self.fx_fitness_train_classify(result, tf.cast(solution, tf.float32)) + + skew = (self.class_labels / 2) - 1 + + rule11 = tf.equal(solution, 0) + rule12 = tf.less_equal(result, 0 - skew) + rule13 = tf.logical_and(rule11, rule12) + + rule21 = tf.equal(solution, self.class_labels - 1) + rule22 = tf.greater(result, solution - 1 - skew) + rule23 = tf.logical_and(rule21, rule22) + + rule31 = tf.less(solution - 1 - skew, result) + rule32 = tf.less_equal(result, solution - skew) + rule33 = tf.logical_and(rule31, rule32) + + pairwise_fitness = tf.cast(tf.logical_or(tf.logical_or(rule13, rule23), rule33), tf.int32) elif self.kernel == 'r': # REGRESSION kernel - pairwise_fitness = self.fx_fitness_train_regress(result, tf.cast(solution, tf.float32)) + pairwise_fitness = tf.abs(solution - result) elif self.kernel == 'm': # MATCH kernel - pairwise_fitness = self.fx_fitness_train_match(result, solution) + # pairwise_fitness = tf.cast(tf.equal(solution, result), tf.int32) # breaks due to floating points + RTOL, ATOL = 1e-05, 1e-08 + pairwise_fitness = tf.cast(tf.less_equal(tf.abs(solution - result), ATOL + RTOL * tf.abs(result)), tf.int32) # elif self.kernel == '[other]': # [OTHER] kernel - # pairwise_fitness = self.fx_fitness_train_[other](result ?, solution ?) + # pairwise_fitness = tf.cast(tf.___(solution, result) else: raise Exception('Kernel type is wrong or missing. You entered {}'.format(self.kernel)) @@ -1472,7 +1512,7 @@ class Base_GP(object): return self.fx_fitness_node_parse(tree, tensors) - def fx_chain_bool(self, values, operation, tensors): + def fx_fitness_chain_bool(self, values, operation, tensors): ''' Chains a sequence of boolean operations (e.g. 'a and b and c') into a single TensorFlow (TF) sub graph. @@ -1482,12 +1522,12 @@ class Base_GP(object): x = tf.cast(self.fx_fitness_node_parse(values[0], tensors), tf.bool) if len(values) > 1: - return operation(x, self.fx_chain_bool(values[1:], operation, tensors)) + return operation(x, self.fx_fitness_chain_bool(values[1:], operation, tensors)) else: return x - def fx_chain_compare(self, comparators, ops, tensors): + def fx_fitness_chain_compare(self, comparators, ops, tensors): ''' Chains a sequence of comparison operations (e.g. 'a > b < c') into a single TensorFlow (TF) sub graph. @@ -1498,7 +1538,7 @@ class Base_GP(object): x = self.fx_fitness_node_parse(comparators[0], tensors) y = self.fx_fitness_node_parse(comparators[1], tensors) if len(comparators) > 2: - return tf.logical_and(operators[type(ops[0])](x, y), self.fx_chain_compare(comparators[1:], ops[1:], tensors)) + return tf.logical_and(operators[type(ops[0])](x, y), self.fx_fitness_chain_compare(comparators[1:], ops[1:], tensors)) else: return operators[type(ops[0])](x, y) @@ -1528,10 +1568,10 @@ class Base_GP(object): return operators[node.func.id](*[self.fx_fitness_node_parse(arg, tensors) for arg in node.args]) elif isinstance(node, ast.BoolOp): # e.g. x or y - return self.fx_chain_bool(node.values, operators[type(node.op)], tensors) + return self.fx_fitness_chain_bool(node.values, operators[type(node.op)], tensors) elif isinstance(node, ast.Compare): # e.g., a > z - return self.fx_chain_compare([node.left] + node.comparators, node.ops, tensors) + return self.fx_fitness_chain_compare([node.left] + node.comparators, node.ops, tensors) else: raise TypeError(node) @@ -1549,8 +1589,6 @@ class Base_GP(object): elif solution == self.class_labels - 1 and result > solution - 1 - skew; fitness = 1: # check for last class (the right-most bin) elif solution - 1 - skew < result <= solution - skew; fitness = 1: # check for class bins between first and last else: fitness = 0 # no class match - - See 'fx_fitness_train_classify' for a description of the multi-class classifier. Arguments required: result ''' @@ -1567,84 +1605,6 @@ class Base_GP(object): return zero_rule - def fx_fitness_train_classify(self, result, solution): # CLASSIFICATION kernel - - ''' - Creates element-wise fitness computation TensorFlow (TF) sub-graph for CLASSIFY kernel. - - This method uses the 'sympified' (SymPy) expression ('algo_sym') created in 'fx_eval_poly' and the data set - loaded at run-time to evaluate the fitness of the selected kernel. - - This multiclass classifer compares each row of a given Tree to the known solution, comparing estimated values - (labels) generated by Karoo GP against the correct labels. This method is able to work with any number of class - labels, from 2 to n. The left-most bin includes -inf. The right-most bin includes +inf. Those inbetween are - by default confined to the spacing of 1.0 each, as defined by: - - (solution - 1) < result <= solution - - The skew adjusts the boundaries of the bins such that they fall on both the negative and positive sides of the - origin. At the time of this writing, an odd number of class labels will generate an extra bin on the positive - side of origin as it has not yet been determined the effect of enabling the middle bin to include both a - negative and positive space. - - Arguments required: result, solution - ''' - - skew = (self.class_labels / 2) - 1 - rule11 = tf.equal(solution, 0) - rule12 = tf.less_equal(result, 0 - skew) - rule13 = tf.logical_and(rule11, rule12) - rule21 = tf.equal(solution, self.class_labels - 1) - rule22 = tf.greater(result, solution - 1 - skew) - rule23 = tf.logical_and(rule21, rule22) - rule31 = tf.less(solution - 1 - skew, result) - rule32 = tf.less_equal(result, solution - skew) - rule33 = tf.logical_and(rule31, rule32) - - return tf.cast(tf.logical_or(tf.logical_or(rule13, rule23), rule33), tf.int32) - - - def fx_fitness_train_regress(self, result, solution): # REGRESSION kernel - - ''' - Creates element-wise fitness computation TensorFlow (TF) sub-graph for REGRESSION kernel. - - This is a minimisation function which seeks a result which is closest to the solution. - - [need to write more] - - Arguments required: result, solution - ''' - - return tf.abs(solution - result) - - - def fx_fitness_train_match(self, result, solution): # MATCH kernel - - ''' - Creates element-wise fitness computation TensorFlow (TF) sub-graph for MATCH kernel. - - This is a maximization function which seeks an exact solution (a perfect match). - - [need to write more] - - Arguments required: result, solution - ''' - - return tf.cast(tf.equal(solution, result), tf.int32) - - - # def fx_fitness_train_[other](self, result, solution): # [OTHER] kernel - - # ''' - # Creates element-wise fitness computation TensorFlow (TF) sub-graph for [other] kernel. - - # This is a [minimisation or maximization] function which [insert description]. - - # return tf.[?]([insert formula]) - # ''' - - def fx_fitness_store(self, tree, fitness): ''' @@ -1668,18 +1628,16 @@ class Base_GP(object): def fx_fitness_tournament(self, tourn_size): ''' - Select one Tree by means of a Tournament in which 'tourn_size' contenders are randomly selected and then - compared for their respective fitness (as determined in 'fx_fitness_gym'). The tournament is engaged for each - of the four types of inter-generational evolution: reproduction, point mutation, branch (full and grow) - mutation, and crossover (sexual reproduction). + Multiple contenders ('tourn_size') are randomly selected and then compared for their respective fitness, as + determined in 'fx_fitness_gym'. The tournament is engaged to select a single Tree for each invocation of the + genetic operators: reproduction, mutation (point, branch), and crossover (sexual reproduction). The original Tournament Selection drew directly from the foundation generation (gp.generation_a). However, with the introduction of a minimum number of nodes as defined by the user ('gp.tree_depth_min'), - 'gp.gene_pool' provides only from those Trees which meet all criteria. + 'gp.gene_pool' limits the Trees to those which meet all criteria. - With upper (max depth) and lower (min nodes) invoked, one may enjoy interesting results. Stronger boundary - parameters (a reduced gap between the min and max number of nodes) may invoke more compact solutions, but also - runs the risk of elitism, even total population die-off where a healthy population once existed. + Stronger boundary parameters (a reduced gap between the min and max number of nodes) may invoke more compact + solutions, but also runs the risk of elitism, even total population die-off where a healthy population once existed. Arguments required: tourn_size ''' @@ -1818,7 +1776,7 @@ class Base_GP(object): ''' for i in range(len(result['result'])): - print '\t\033[36m Data row {} predicts class:\033[1m {} ({} label) as {:.2f}{}\033[0;0m'.format(i, int(result['labels'][0][i]), int(result['solution'][i]), result['result'][i], result['labels'][1][i]) + print '\t\033[36m Data row {} predicts class:\033[1m {} ({} True)\033[0;0m\033[36m as {:.2f}{}\033[0;0m'.format(i, int(result['labels'][0][i]), int(result['solution'][i]), result['result'][i], result['labels'][1][i]) print '\n Fitness score: {}'.format(result['fitness']) print '\n Precision-Recall report:\n', skm.classification_report(result['solution'], result['labels'][0]) @@ -1834,7 +1792,7 @@ class Base_GP(object): ''' for i in range(len(result['result'])): - print '\t\033[36m Data row {} predicts value:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result[ 'solution'][i]) + print '\t\033[36m Data row {} predicts value:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result['solution'][i]) MSE, fitness = skm.mean_squared_error(result['result'], result['solution']), result['fitness'] print '\n\t Regression fitness score: {}'.format(fitness) @@ -1850,7 +1808,7 @@ class Base_GP(object): ''' for i in range(len(result['result'])): - print '\t\033[36m Data row {} predicts value:\033[1m {} ({} label)\033[0;0m'.format(i, int(result['result'][i]), int(result['solution'][i])) + print '\t\033[36m Data row {} predicts match:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result['solution'][i]) print '\n\tMatching fitness score: {}'.format(result['fitness']) diff --git a/karoo_gp_main.py b/karoo_gp_main.py index 6f84b26..98cc3f3 100644 --- a/karoo_gp_main.py +++ b/karoo_gp_main.py @@ -2,7 +2,7 @@ # Use Genetic Programming for Classification and Symbolic Regression # by Kai Staats, MSc; see LICENSE.md # Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov -# version 1.0.4 +# version 1.0.5 ''' A word to the newbie, expert, and brave-- @@ -157,7 +157,7 @@ gp.evolve_branch = int(0.2 * gp.tree_pop_max) # quantity of a population generat gp.evolve_cross = int(0.7 * gp.tree_pop_max) # quantity of a population generated through Crossover gp.tourn_size = 10 # qty of individuals entered into each tournament (standard 10); can be adjusted in 'i'nteractive mode -gp.precision = 6 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded +gp.precision = 1 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded #++++++++++++++++++++++++++++++++++++++++++ diff --git a/karoo_gp_server.py b/karoo_gp_server.py index f0f59d0..1f9b90f 100644 --- a/karoo_gp_server.py +++ b/karoo_gp_server.py @@ -2,7 +2,7 @@ # Use Genetic Programming for Classification and Symbolic Regression # by Kai Staats, MSc; see LICENSE.md # Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov -# version 1.0.4 +# version 1.0.5 ''' A word to the newbie, expert, and brave-- @@ -56,14 +56,14 @@ import argparse import karoo_gp_base_class; gp = karoo_gp_base_class.Base_GP() ap = argparse.ArgumentParser(description = 'Karoo GP Server') -ap.add_argument('-ker', action = 'store', dest = 'kernel', default = 'm', help = '[c,r,m] fitness function: (r)egression, (c)lassification, or (m)atching') +ap.add_argument('-ker', action = 'store', dest = 'kernel', default = 'c', help = '[c,r,m] fitness function: (r)egression, (c)lassification, or (m)atching') ap.add_argument('-typ', action = 'store', dest = 'type', default = 'r', help = '[f,g,r] Tree type: (f)ull, (g)row, or (r)amped half/half') -ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 5, help = '[3...10] maximum Tree depth for the initial population') +ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 3, help = '[3...10] maximum Tree depth for the initial population') ap.add_argument('-max', action = 'store', dest = 'depth_max', default = 5, help = '[3...10] maximum Tree depth for the entire run') ap.add_argument('-min', action = 'store', dest = 'depth_min', default = 3, help = '[3...100] minimum number of nodes') ap.add_argument('-pop', action = 'store', dest = 'pop_max', default = 100, help = '[10...1000] maximum population') -ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 30, help = '[1...100] number of generations') -ap.add_argument('-tor', action = 'store', dest = 'tor_size', default = 10, help = '[1...max pop] tournament size') +ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 10, help = '[1...100] number of generations') +ap.add_argument('-tor', action = 'store', dest = 'tor_size', default = 7, help = '[1...max pop] tournament size') ap.add_argument('-fil', action = 'store', dest = 'filename', default = 'files/data_MATCH.csv', help = '/path/to_your/[data].csv') args = ap.parse_args()