v1.0.5 minor updates

2017-08-10 16:50:06 +01:00 · 2017-08-10 16:50:06 +01:00 · 4b03562e48
parent bada591746
commit 4b03562e48
4 changed files with 94 additions and 114 deletions
--- a/RELEASE_NOTES.txt
+++ b/RELEASE_NOTES.txt
@ -1,3 +1,25 @@
+2017 08/10
+Relatively light updates this time.
+
+Iurii fixed a minor bug in which the MATCHING function (used only for demonstrations) would not find a match despite
+the fact that the output was correct, due to the way in which TensorFlow was handling floating points and precision. 
+Iurii used numpy.allclose.html as a reference to resolve the situation.
+
+I also modified the autosave to the runs/ directory such that if you are using an external dataset (quite likely), the
+new directory (for each run) will be saved as [filename]-[date_time_stamp]/ The idea (thank you Marco) is to help keep 
+multiple, automated runs organized and more readily, visually inspected by name alone.
+
+
+2017 07/21
+
+In a rather embarrassing, live demo in which I asked for the audience to create the dataset for Karoo, I discovered a
+bug in the MATCH kernel in which a negative value in the dataset would cause that row to be discarded from the fitness
+function --FIXED.
+
+I merged the 3 methods fx_fitness_train_classify, fx_fitness_train_regress, fx_fitness_train_match into fx_fitness_eval
+in order to reduce the quantity of lines of code and simplify the workflow.
+
+
 2017 07/03

 I am pleased to announce that Karoo GP is now updated to include a full suite of mathematical operators. I thank the
--- a/karoo_gp_base_class.py
+++ b/karoo_gp_base_class.py
@ -2,7 +2,7 @@
 # Define the methods and global variables used by Karoo GP
 # by Kai Staats, MSc; see LICENSE.md
 # Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
-# version 1.0.4
+# version 1.0.5

 '''
 A NOTE TO THE NEWBIE, EXPERT, AND BRAVE
@ -323,7 +323,8 @@ class Base_GP(object):
 		
 		# self.datetime = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
 		self.datetime = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
-		self.path = os.path.join(cwd, 'runs/', self.datetime) # generate a unique directory name
+		self.path = os.path.join(cwd, 'runs/', filename.split('.')[0] + '_' + self.datetime) # generate a unique directory name
+		# self.path = os.path.join(cwd, 'runs/', self.datetime) # generate a unique directory name
 		if not os.path.isdir(self.path): os.makedirs(self.path) # make a unique directory
 		
 		self.filename = {} # a dictionary to hold .csv filenames
@ -1382,7 +1383,7 @@ class Base_GP(object):
 		return
 		
 	
-	def fx_fitness_eval(self, expr, data, get_labels = False): # used to be fx_fitness_eval
+	def fx_fitness_eval(self, expr, data, get_labels = False):
 	
 		'''		
 		Computes tree expression using TensorFlow (TF) returning results and fitness scores.
@ -1423,31 +1424,70 @@ class Base_GP(object):
 		with tf.Session(config=config) as sess:
 			with sess.graph.device(self.tf_device):
 			
-				# Load data into TF
+				# 1 - Load data into TF
 				tensors = {}
 				for i in range(len(self.terminals)):
 					var = self.terminals[i]
 					tensors[var] = tf.constant(data[:, i], dtype=tf.float32)
 					
-				# Transform string expression into TF operation graph
+				# 2- Transform string expression into TF operation graph
 				result = self.fx_fitness_expr_parse(expr, tensors)
 				
 				labels = tf.no_op() # a placeholder, applies only to CLASSIFY kernel
 				solution = tensors['s'] # solution value is assumed to be stored in 's' terminal
 				
-				# Add fitness computation into TF graph
+				# 3- Add fitness computation into TF graph
 				if self.kernel == 'c': # CLASSIFY kernels
+				
+					'''
+					Creates element-wise fitness computation TensorFlow (TF) sub-graph for CLASSIFY kernel.
+					
+					This method uses the 'sympified' (SymPy) expression ('algo_sym') created in 'fx_eval_poly' and the data set 
+					loaded at run-time to evaluate the fitness of the selected kernel.
+					
+					This multiclass classifer compares each row of a given Tree to the known solution, comparing estimated values 
+					(labels) generated by Karoo GP against the correct labels. This method is able to work with any number of 
+					class labels, from 2 to n. The left-most bin includes -inf. The right-most bin includes +inf. Those inbetween 
+					are by default confined to the spacing of 1.0 each, as defined by:
+					
+						(solution - 1) < result <= solution
+					
+					The skew adjusts the boundaries of the bins such that they fall on both the negative and positive sides of the 
+					origin. At the time of this writing, an odd number of class labels will generate an extra bin on the positive 
+					side of origin as it has not yet been determined the effect of enabling the middle bin to include both a 
+					negative and positive space.
+					
+					Arguments required: result, solution		
+					'''
+					
 					if get_labels: labels = tf.map_fn(self.fx_fitness_labels_map, result, dtype=[tf.int32, tf.string], swap_memory=True)
-					pairwise_fitness = self.fx_fitness_train_classify(result, tf.cast(solution, tf.float32))
+					
+					skew = (self.class_labels / 2) - 1
+					
+					rule11 = tf.equal(solution, 0)
+					rule12 = tf.less_equal(result, 0 - skew)
+					rule13 = tf.logical_and(rule11, rule12)
+					
+					rule21 = tf.equal(solution, self.class_labels - 1)
+					rule22 = tf.greater(result, solution - 1 - skew)
+					rule23 = tf.logical_and(rule21, rule22)
+					
+					rule31 = tf.less(solution - 1 - skew, result)
+					rule32 = tf.less_equal(result, solution - skew)
+					rule33 = tf.logical_and(rule31, rule32)
+					
+					pairwise_fitness = tf.cast(tf.logical_or(tf.logical_or(rule13, rule23), rule33), tf.int32)
 					
 				elif self.kernel == 'r': # REGRESSION kernel
-					pairwise_fitness = self.fx_fitness_train_regress(result, tf.cast(solution, tf.float32))
+					pairwise_fitness = tf.abs(solution - result)
 					
 				elif self.kernel == 'm': # MATCH kernel
-					pairwise_fitness = self.fx_fitness_train_match(result, solution)
+					# pairwise_fitness = tf.cast(tf.equal(solution, result), tf.int32) # breaks due to floating points
+					RTOL, ATOL = 1e-05, 1e-08
+					pairwise_fitness = tf.cast(tf.less_equal(tf.abs(solution - result), ATOL + RTOL * tf.abs(result)), tf.int32)
 					
 				# elif self.kernel == '[other]': # [OTHER] kernel
-					# pairwise_fitness = self.fx_fitness_train_[other](result ?, solution ?)
+					# pairwise_fitness = tf.cast(tf.___(solution, result)
 					
 				else: raise Exception('Kernel type is wrong or missing. You entered {}'.format(self.kernel))
 				
@ -1472,7 +1512,7 @@ class Base_GP(object):
 		return self.fx_fitness_node_parse(tree, tensors)


-	def fx_chain_bool(self, values, operation, tensors):
+	def fx_fitness_chain_bool(self, values, operation, tensors):

 		'''
 		Chains a sequence of boolean operations (e.g. 'a and b and c') into a single TensorFlow (TF) sub graph.
@ -1482,12 +1522,12 @@ class Base_GP(object):

 		x = tf.cast(self.fx_fitness_node_parse(values[0], tensors), tf.bool)
 		if len(values) > 1:
-			return operation(x, self.fx_chain_bool(values[1:], operation, tensors))
+			return operation(x, self.fx_fitness_chain_bool(values[1:], operation, tensors))
 		else:
 			return x


-	def fx_chain_compare(self, comparators, ops, tensors):
+	def fx_fitness_chain_compare(self, comparators, ops, tensors):

 		'''
 		Chains a sequence of comparison operations (e.g. 'a > b < c') into a single TensorFlow (TF) sub graph.
@ -1498,7 +1538,7 @@ class Base_GP(object):
 		x = self.fx_fitness_node_parse(comparators[0], tensors)
 		y = self.fx_fitness_node_parse(comparators[1], tensors)
 		if len(comparators) > 2:
-			return tf.logical_and(operators[type(ops[0])](x, y), self.fx_chain_compare(comparators[1:], ops[1:], tensors))
+			return tf.logical_and(operators[type(ops[0])](x, y), self.fx_fitness_chain_compare(comparators[1:], ops[1:], tensors))
 		else:
 			return operators[type(ops[0])](x, y)
 		
@ -1528,10 +1568,10 @@ class Base_GP(object):
 			return operators[node.func.id](*[self.fx_fitness_node_parse(arg, tensors) for arg in node.args])

 		elif isinstance(node, ast.BoolOp):  # <left> <bool_operator> <right> e.g. x or y
-			return self.fx_chain_bool(node.values, operators[type(node.op)], tensors)
+			return self.fx_fitness_chain_bool(node.values, operators[type(node.op)], tensors)

 		elif isinstance(node, ast.Compare):  # <left> <compare> <right> e.g., a > z
-			return self.fx_chain_compare([node.left] + node.comparators, node.ops, tensors)
+			return self.fx_fitness_chain_compare([node.left] + node.comparators, node.ops, tensors)
 			
 		else: raise TypeError(node)
 		
@ -1549,8 +1589,6 @@ class Base_GP(object):
 			elif solution == self.class_labels - 1 and result > solution - 1 - skew; fitness = 1: # check for last class (the right-most bin)
 			elif solution - 1 - skew < result <= solution - skew; fitness = 1: # check for class bins between first and last
 			else: fitness = 0 # no class match
-			
-		See 'fx_fitness_train_classify' for a description of the multi-class classifier.
 		
 		Arguments required: result
 		'''
@ -1567,84 +1605,6 @@ class Base_GP(object):
 		return zero_rule
 		
 	
-	def fx_fitness_train_classify(self, result, solution): # CLASSIFICATION kernel
-	
-		'''
-		Creates element-wise fitness computation TensorFlow (TF) sub-graph for CLASSIFY kernel.
-		
-		This method uses the 'sympified' (SymPy) expression ('algo_sym') created in 'fx_eval_poly' and the data set 
-		loaded at run-time to evaluate the fitness of the selected kernel.
-		
-		This multiclass classifer compares each row of a given Tree to the known solution, comparing estimated values 
-		(labels) generated by Karoo GP against the correct labels. This method is able to work with any number of class 
-		labels, from 2 to n. The left-most bin includes -inf. The right-most bin includes +inf. Those inbetween are 
-		by default confined to the spacing of 1.0 each, as defined by:
-		
-			(solution - 1) < result <= solution
-			
-		The skew adjusts the boundaries of the bins such that they fall on both the negative and positive sides of the 
-		origin. At the time of this writing, an odd number of class labels will generate an extra bin on the positive 
-		side of origin as it has not yet been determined the effect of enabling the middle bin to include both a 
-		negative and positive space.
-		
-		Arguments required: result, solution		
-		'''
-		
-		skew = (self.class_labels / 2) - 1
-		rule11 = tf.equal(solution, 0)
-		rule12 = tf.less_equal(result, 0 - skew)
-		rule13 = tf.logical_and(rule11, rule12)
-		rule21 = tf.equal(solution, self.class_labels - 1)
-		rule22 = tf.greater(result, solution - 1 - skew)
-		rule23 = tf.logical_and(rule21, rule22)
-		rule31 = tf.less(solution - 1 - skew, result)
-		rule32 = tf.less_equal(result, solution - skew)
-		rule33 = tf.logical_and(rule31, rule32)
-		
-		return tf.cast(tf.logical_or(tf.logical_or(rule13, rule23), rule33), tf.int32)
-		
-			
-	def fx_fitness_train_regress(self, result, solution): # REGRESSION kernel
-	
-		'''
-		Creates element-wise fitness computation TensorFlow (TF) sub-graph for REGRESSION kernel.
-		
-		This is a minimisation function which seeks a result which is closest to the solution.
-		
-		[need to write more]
-		
-		Arguments required: result, solution
-		'''
-		
-		return tf.abs(solution - result)
-		
-	
-	def fx_fitness_train_match(self, result, solution): # MATCH kernel
-	
-		'''
-		Creates element-wise fitness computation TensorFlow (TF) sub-graph for MATCH kernel.
-		
-		This is a maximization function which seeks an exact solution (a perfect match).
-		
-		[need to write more]
-		
-		Arguments required: result, solution
-		'''
-		
-		return tf.cast(tf.equal(solution, result), tf.int32)
-		
-	
-	# def fx_fitness_train_[other](self, result, solution): # [OTHER] kernel
-
-		# '''
-		# Creates element-wise fitness computation TensorFlow (TF) sub-graph for [other] kernel.
-		
-		# This is a [minimisation or maximization] function which [insert description].
-		
-		# return tf.[?]([insert formula])
-		# '''
-		
-	
 	def fx_fitness_store(self, tree, fitness):
 	
 		'''
@ -1668,18 +1628,16 @@ class Base_GP(object):
 	def fx_fitness_tournament(self, tourn_size):
 	
 		'''
-		Select one Tree by means of a Tournament in which 'tourn_size' contenders are randomly selected and then 
-		compared for their respective fitness (as determined in 'fx_fitness_gym'). The tournament is engaged for each 
-		of the four types of inter-generational evolution: reproduction, point mutation, branch (full and grow)
-		mutation, and crossover (sexual reproduction).
+		Multiple contenders ('tourn_size') are randomly selected and then compared for their respective fitness, as 
+		determined in 'fx_fitness_gym'. The tournament is engaged to select a single Tree for each invocation of the
+		genetic operators: reproduction, mutation (point, branch), and crossover (sexual reproduction).
 		
 		The original Tournament Selection drew directly from the foundation generation (gp.generation_a). However, 
 		with the introduction of a minimum number of nodes as defined by the user ('gp.tree_depth_min'), 
-		'gp.gene_pool' provides only from those Trees which meet all criteria.
+		'gp.gene_pool' limits the Trees to those which meet all criteria.
 		
-		With upper (max depth) and lower (min nodes) invoked, one may enjoy interesting results. Stronger boundary 
-		parameters (a reduced gap between the min and max number of nodes) may invoke more compact solutions, but also 
-		runs the risk of elitism, even total population die-off where a healthy population once existed.
+		Stronger boundary parameters (a reduced gap between the min and max number of nodes) may invoke more compact 
+		solutions, but also runs the risk of elitism, even total population die-off where a healthy population once existed.
 		
 		Arguments required: tourn_size
 		'''
@ -1818,7 +1776,7 @@ class Base_GP(object):
 		'''
 		
 		for i in range(len(result['result'])):
-			print '\t\033[36m Data row {} predicts class:\033[1m {} ({} label) as {:.2f}{}\033[0;0m'.format(i, int(result['labels'][0][i]), int(result['solution'][i]), result['result'][i], result['labels'][1][i])
+			print '\t\033[36m Data row {} predicts class:\033[1m {} ({} True)\033[0;0m\033[36m as {:.2f}{}\033[0;0m'.format(i, int(result['labels'][0][i]), int(result['solution'][i]), result['result'][i], result['labels'][1][i])
 			
 		print '\n Fitness score: {}'.format(result['fitness'])
 		print '\n Precision-Recall report:\n', skm.classification_report(result['solution'], result['labels'][0])
@ -1834,7 +1792,7 @@ class Base_GP(object):
 		'''
 		
 		for i in range(len(result['result'])):
-			print '\t\033[36m Data row {} predicts value:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result[ 'solution'][i])
+			print '\t\033[36m Data row {} predicts value:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result['solution'][i])
 			
 		MSE, fitness = skm.mean_squared_error(result['result'], result['solution']), result['fitness']
 		print '\n\t Regression fitness score: {}'.format(fitness)
@ -1850,7 +1808,7 @@ class Base_GP(object):
 		'''
 		
 		for i in range(len(result['result'])):
-			print '\t\033[36m Data row {} predicts value:\033[1m {} ({} label)\033[0;0m'.format(i, int(result['result'][i]), int(result['solution'][i]))
+			print '\t\033[36m Data row {} predicts match:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result['solution'][i])
 			
 		print '\n\tMatching fitness score: {}'.format(result['fitness'])
 		
--- a/karoo_gp_main.py
+++ b/karoo_gp_main.py
@ -2,7 +2,7 @@
 # Use Genetic Programming for Classification and Symbolic Regression
 # by Kai Staats, MSc; see LICENSE.md
 # Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
-# version 1.0.4
+# version 1.0.5

 '''
 A word to the newbie, expert, and brave--
@ -157,7 +157,7 @@ gp.evolve_branch = int(0.2 * gp.tree_pop_max) # quantity of a population generat
 gp.evolve_cross = int(0.7 * gp.tree_pop_max) # quantity of a population generated through Crossover

 gp.tourn_size = 10 # qty of individuals entered into each tournament (standard 10); can be adjusted in 'i'nteractive mode
-gp.precision = 6 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded
+gp.precision = 1 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded


 #++++++++++++++++++++++++++++++++++++++++++
--- a/karoo_gp_server.py
+++ b/karoo_gp_server.py
@ -2,7 +2,7 @@
 # Use Genetic Programming for Classification and Symbolic Regression
 # by Kai Staats, MSc; see LICENSE.md
 # Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
-# version 1.0.4
+# version 1.0.5

 '''
 A word to the newbie, expert, and brave--
@ -56,14 +56,14 @@ import argparse
 import karoo_gp_base_class; gp = karoo_gp_base_class.Base_GP()

 ap = argparse.ArgumentParser(description = 'Karoo GP Server')
-ap.add_argument('-ker', action = 'store', dest = 'kernel', default = 'm', help = '[c,r,m] fitness function: (r)egression, (c)lassification, or (m)atching')
+ap.add_argument('-ker', action = 'store', dest = 'kernel', default = 'c', help = '[c,r,m] fitness function: (r)egression, (c)lassification, or (m)atching')
 ap.add_argument('-typ', action = 'store', dest = 'type', default = 'r', help = '[f,g,r] Tree type: (f)ull, (g)row, or (r)amped half/half')
-ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 5, help = '[3...10] maximum Tree depth for the initial population')
+ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 3, help = '[3...10] maximum Tree depth for the initial population')
 ap.add_argument('-max', action = 'store', dest = 'depth_max', default = 5, help = '[3...10] maximum Tree depth for the entire run')
 ap.add_argument('-min', action = 'store', dest = 'depth_min', default = 3, help = '[3...100] minimum number of nodes')
 ap.add_argument('-pop', action = 'store', dest = 'pop_max', default = 100, help = '[10...1000] maximum population')
-ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 30, help = '[1...100] number of generations')
-ap.add_argument('-tor', action = 'store', dest = 'tor_size', default = 10, help = '[1...max pop] tournament size')
+ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 10, help = '[1...100] number of generations')
+ap.add_argument('-tor', action = 'store', dest = 'tor_size', default = 7, help = '[1...max pop] tournament size')
 ap.add_argument('-fil', action = 'store', dest = 'filename', default = 'files/data_MATCH.csv', help = '/path/to_your/[data].csv')

 args = ap.parse_args()