v1.0.5 minor updates

pull/9/head
Kai Staats 2017-08-10 16:50:06 +01:00
parent bada591746
commit 4b03562e48
4 changed files with 94 additions and 114 deletions

View File

@ -1,3 +1,25 @@
2017 08/10
Relatively light updates this time.
Iurii fixed a minor bug in which the MATCHING function (used only for demonstrations) would not find a match despite
the fact that the output was correct, due to the way in which TensorFlow was handling floating points and precision.
Iurii used numpy.allclose.html as a reference to resolve the situation.
I also modified the autosave to the runs/ directory such that if you are using an external dataset (quite likely), the
new directory (for each run) will be saved as [filename]-[date_time_stamp]/ The idea (thank you Marco) is to help keep
multiple, automated runs organized and more readily, visually inspected by name alone.
2017 07/21
In a rather embarrassing, live demo in which I asked for the audience to create the dataset for Karoo, I discovered a
bug in the MATCH kernel in which a negative value in the dataset would cause that row to be discarded from the fitness
function --FIXED.
I merged the 3 methods fx_fitness_train_classify, fx_fitness_train_regress, fx_fitness_train_match into fx_fitness_eval
in order to reduce the quantity of lines of code and simplify the workflow.
2017 07/03
I am pleased to announce that Karoo GP is now updated to include a full suite of mathematical operators. I thank the

View File

@ -2,7 +2,7 @@
# Define the methods and global variables used by Karoo GP
# by Kai Staats, MSc; see LICENSE.md
# Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
# version 1.0.4
# version 1.0.5
'''
A NOTE TO THE NEWBIE, EXPERT, AND BRAVE
@ -323,7 +323,8 @@ class Base_GP(object):
# self.datetime = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
self.datetime = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
self.path = os.path.join(cwd, 'runs/', self.datetime) # generate a unique directory name
self.path = os.path.join(cwd, 'runs/', filename.split('.')[0] + '_' + self.datetime) # generate a unique directory name
# self.path = os.path.join(cwd, 'runs/', self.datetime) # generate a unique directory name
if not os.path.isdir(self.path): os.makedirs(self.path) # make a unique directory
self.filename = {} # a dictionary to hold .csv filenames
@ -1382,7 +1383,7 @@ class Base_GP(object):
return
def fx_fitness_eval(self, expr, data, get_labels = False): # used to be fx_fitness_eval
def fx_fitness_eval(self, expr, data, get_labels = False):
'''
Computes tree expression using TensorFlow (TF) returning results and fitness scores.
@ -1423,31 +1424,70 @@ class Base_GP(object):
with tf.Session(config=config) as sess:
with sess.graph.device(self.tf_device):
# Load data into TF
# 1 - Load data into TF
tensors = {}
for i in range(len(self.terminals)):
var = self.terminals[i]
tensors[var] = tf.constant(data[:, i], dtype=tf.float32)
# Transform string expression into TF operation graph
# 2- Transform string expression into TF operation graph
result = self.fx_fitness_expr_parse(expr, tensors)
labels = tf.no_op() # a placeholder, applies only to CLASSIFY kernel
solution = tensors['s'] # solution value is assumed to be stored in 's' terminal
# Add fitness computation into TF graph
# 3- Add fitness computation into TF graph
if self.kernel == 'c': # CLASSIFY kernels
'''
Creates element-wise fitness computation TensorFlow (TF) sub-graph for CLASSIFY kernel.
This method uses the 'sympified' (SymPy) expression ('algo_sym') created in 'fx_eval_poly' and the data set
loaded at run-time to evaluate the fitness of the selected kernel.
This multiclass classifer compares each row of a given Tree to the known solution, comparing estimated values
(labels) generated by Karoo GP against the correct labels. This method is able to work with any number of
class labels, from 2 to n. The left-most bin includes -inf. The right-most bin includes +inf. Those inbetween
are by default confined to the spacing of 1.0 each, as defined by:
(solution - 1) < result <= solution
The skew adjusts the boundaries of the bins such that they fall on both the negative and positive sides of the
origin. At the time of this writing, an odd number of class labels will generate an extra bin on the positive
side of origin as it has not yet been determined the effect of enabling the middle bin to include both a
negative and positive space.
Arguments required: result, solution
'''
if get_labels: labels = tf.map_fn(self.fx_fitness_labels_map, result, dtype=[tf.int32, tf.string], swap_memory=True)
pairwise_fitness = self.fx_fitness_train_classify(result, tf.cast(solution, tf.float32))
skew = (self.class_labels / 2) - 1
rule11 = tf.equal(solution, 0)
rule12 = tf.less_equal(result, 0 - skew)
rule13 = tf.logical_and(rule11, rule12)
rule21 = tf.equal(solution, self.class_labels - 1)
rule22 = tf.greater(result, solution - 1 - skew)
rule23 = tf.logical_and(rule21, rule22)
rule31 = tf.less(solution - 1 - skew, result)
rule32 = tf.less_equal(result, solution - skew)
rule33 = tf.logical_and(rule31, rule32)
pairwise_fitness = tf.cast(tf.logical_or(tf.logical_or(rule13, rule23), rule33), tf.int32)
elif self.kernel == 'r': # REGRESSION kernel
pairwise_fitness = self.fx_fitness_train_regress(result, tf.cast(solution, tf.float32))
pairwise_fitness = tf.abs(solution - result)
elif self.kernel == 'm': # MATCH kernel
pairwise_fitness = self.fx_fitness_train_match(result, solution)
# pairwise_fitness = tf.cast(tf.equal(solution, result), tf.int32) # breaks due to floating points
RTOL, ATOL = 1e-05, 1e-08
pairwise_fitness = tf.cast(tf.less_equal(tf.abs(solution - result), ATOL + RTOL * tf.abs(result)), tf.int32)
# elif self.kernel == '[other]': # [OTHER] kernel
# pairwise_fitness = self.fx_fitness_train_[other](result ?, solution ?)
# pairwise_fitness = tf.cast(tf.___(solution, result)
else: raise Exception('Kernel type is wrong or missing. You entered {}'.format(self.kernel))
@ -1472,7 +1512,7 @@ class Base_GP(object):
return self.fx_fitness_node_parse(tree, tensors)
def fx_chain_bool(self, values, operation, tensors):
def fx_fitness_chain_bool(self, values, operation, tensors):
'''
Chains a sequence of boolean operations (e.g. 'a and b and c') into a single TensorFlow (TF) sub graph.
@ -1482,12 +1522,12 @@ class Base_GP(object):
x = tf.cast(self.fx_fitness_node_parse(values[0], tensors), tf.bool)
if len(values) > 1:
return operation(x, self.fx_chain_bool(values[1:], operation, tensors))
return operation(x, self.fx_fitness_chain_bool(values[1:], operation, tensors))
else:
return x
def fx_chain_compare(self, comparators, ops, tensors):
def fx_fitness_chain_compare(self, comparators, ops, tensors):
'''
Chains a sequence of comparison operations (e.g. 'a > b < c') into a single TensorFlow (TF) sub graph.
@ -1498,7 +1538,7 @@ class Base_GP(object):
x = self.fx_fitness_node_parse(comparators[0], tensors)
y = self.fx_fitness_node_parse(comparators[1], tensors)
if len(comparators) > 2:
return tf.logical_and(operators[type(ops[0])](x, y), self.fx_chain_compare(comparators[1:], ops[1:], tensors))
return tf.logical_and(operators[type(ops[0])](x, y), self.fx_fitness_chain_compare(comparators[1:], ops[1:], tensors))
else:
return operators[type(ops[0])](x, y)
@ -1528,10 +1568,10 @@ class Base_GP(object):
return operators[node.func.id](*[self.fx_fitness_node_parse(arg, tensors) for arg in node.args])
elif isinstance(node, ast.BoolOp): # <left> <bool_operator> <right> e.g. x or y
return self.fx_chain_bool(node.values, operators[type(node.op)], tensors)
return self.fx_fitness_chain_bool(node.values, operators[type(node.op)], tensors)
elif isinstance(node, ast.Compare): # <left> <compare> <right> e.g., a > z
return self.fx_chain_compare([node.left] + node.comparators, node.ops, tensors)
return self.fx_fitness_chain_compare([node.left] + node.comparators, node.ops, tensors)
else: raise TypeError(node)
@ -1549,8 +1589,6 @@ class Base_GP(object):
elif solution == self.class_labels - 1 and result > solution - 1 - skew; fitness = 1: # check for last class (the right-most bin)
elif solution - 1 - skew < result <= solution - skew; fitness = 1: # check for class bins between first and last
else: fitness = 0 # no class match
See 'fx_fitness_train_classify' for a description of the multi-class classifier.
Arguments required: result
'''
@ -1567,84 +1605,6 @@ class Base_GP(object):
return zero_rule
def fx_fitness_train_classify(self, result, solution): # CLASSIFICATION kernel
'''
Creates element-wise fitness computation TensorFlow (TF) sub-graph for CLASSIFY kernel.
This method uses the 'sympified' (SymPy) expression ('algo_sym') created in 'fx_eval_poly' and the data set
loaded at run-time to evaluate the fitness of the selected kernel.
This multiclass classifer compares each row of a given Tree to the known solution, comparing estimated values
(labels) generated by Karoo GP against the correct labels. This method is able to work with any number of class
labels, from 2 to n. The left-most bin includes -inf. The right-most bin includes +inf. Those inbetween are
by default confined to the spacing of 1.0 each, as defined by:
(solution - 1) < result <= solution
The skew adjusts the boundaries of the bins such that they fall on both the negative and positive sides of the
origin. At the time of this writing, an odd number of class labels will generate an extra bin on the positive
side of origin as it has not yet been determined the effect of enabling the middle bin to include both a
negative and positive space.
Arguments required: result, solution
'''
skew = (self.class_labels / 2) - 1
rule11 = tf.equal(solution, 0)
rule12 = tf.less_equal(result, 0 - skew)
rule13 = tf.logical_and(rule11, rule12)
rule21 = tf.equal(solution, self.class_labels - 1)
rule22 = tf.greater(result, solution - 1 - skew)
rule23 = tf.logical_and(rule21, rule22)
rule31 = tf.less(solution - 1 - skew, result)
rule32 = tf.less_equal(result, solution - skew)
rule33 = tf.logical_and(rule31, rule32)
return tf.cast(tf.logical_or(tf.logical_or(rule13, rule23), rule33), tf.int32)
def fx_fitness_train_regress(self, result, solution): # REGRESSION kernel
'''
Creates element-wise fitness computation TensorFlow (TF) sub-graph for REGRESSION kernel.
This is a minimisation function which seeks a result which is closest to the solution.
[need to write more]
Arguments required: result, solution
'''
return tf.abs(solution - result)
def fx_fitness_train_match(self, result, solution): # MATCH kernel
'''
Creates element-wise fitness computation TensorFlow (TF) sub-graph for MATCH kernel.
This is a maximization function which seeks an exact solution (a perfect match).
[need to write more]
Arguments required: result, solution
'''
return tf.cast(tf.equal(solution, result), tf.int32)
# def fx_fitness_train_[other](self, result, solution): # [OTHER] kernel
# '''
# Creates element-wise fitness computation TensorFlow (TF) sub-graph for [other] kernel.
# This is a [minimisation or maximization] function which [insert description].
# return tf.[?]([insert formula])
# '''
def fx_fitness_store(self, tree, fitness):
'''
@ -1668,18 +1628,16 @@ class Base_GP(object):
def fx_fitness_tournament(self, tourn_size):
'''
Select one Tree by means of a Tournament in which 'tourn_size' contenders are randomly selected and then
compared for their respective fitness (as determined in 'fx_fitness_gym'). The tournament is engaged for each
of the four types of inter-generational evolution: reproduction, point mutation, branch (full and grow)
mutation, and crossover (sexual reproduction).
Multiple contenders ('tourn_size') are randomly selected and then compared for their respective fitness, as
determined in 'fx_fitness_gym'. The tournament is engaged to select a single Tree for each invocation of the
genetic operators: reproduction, mutation (point, branch), and crossover (sexual reproduction).
The original Tournament Selection drew directly from the foundation generation (gp.generation_a). However,
with the introduction of a minimum number of nodes as defined by the user ('gp.tree_depth_min'),
'gp.gene_pool' provides only from those Trees which meet all criteria.
'gp.gene_pool' limits the Trees to those which meet all criteria.
With upper (max depth) and lower (min nodes) invoked, one may enjoy interesting results. Stronger boundary
parameters (a reduced gap between the min and max number of nodes) may invoke more compact solutions, but also
runs the risk of elitism, even total population die-off where a healthy population once existed.
Stronger boundary parameters (a reduced gap between the min and max number of nodes) may invoke more compact
solutions, but also runs the risk of elitism, even total population die-off where a healthy population once existed.
Arguments required: tourn_size
'''
@ -1818,7 +1776,7 @@ class Base_GP(object):
'''
for i in range(len(result['result'])):
print '\t\033[36m Data row {} predicts class:\033[1m {} ({} label) as {:.2f}{}\033[0;0m'.format(i, int(result['labels'][0][i]), int(result['solution'][i]), result['result'][i], result['labels'][1][i])
print '\t\033[36m Data row {} predicts class:\033[1m {} ({} True)\033[0;0m\033[36m as {:.2f}{}\033[0;0m'.format(i, int(result['labels'][0][i]), int(result['solution'][i]), result['result'][i], result['labels'][1][i])
print '\n Fitness score: {}'.format(result['fitness'])
print '\n Precision-Recall report:\n', skm.classification_report(result['solution'], result['labels'][0])
@ -1834,7 +1792,7 @@ class Base_GP(object):
'''
for i in range(len(result['result'])):
print '\t\033[36m Data row {} predicts value:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result[ 'solution'][i])
print '\t\033[36m Data row {} predicts value:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result['solution'][i])
MSE, fitness = skm.mean_squared_error(result['result'], result['solution']), result['fitness']
print '\n\t Regression fitness score: {}'.format(fitness)
@ -1850,7 +1808,7 @@ class Base_GP(object):
'''
for i in range(len(result['result'])):
print '\t\033[36m Data row {} predicts value:\033[1m {} ({} label)\033[0;0m'.format(i, int(result['result'][i]), int(result['solution'][i]))
print '\t\033[36m Data row {} predicts match:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result['solution'][i])
print '\n\tMatching fitness score: {}'.format(result['fitness'])

View File

@ -2,7 +2,7 @@
# Use Genetic Programming for Classification and Symbolic Regression
# by Kai Staats, MSc; see LICENSE.md
# Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
# version 1.0.4
# version 1.0.5
'''
A word to the newbie, expert, and brave--
@ -157,7 +157,7 @@ gp.evolve_branch = int(0.2 * gp.tree_pop_max) # quantity of a population generat
gp.evolve_cross = int(0.7 * gp.tree_pop_max) # quantity of a population generated through Crossover
gp.tourn_size = 10 # qty of individuals entered into each tournament (standard 10); can be adjusted in 'i'nteractive mode
gp.precision = 6 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded
gp.precision = 1 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded
#++++++++++++++++++++++++++++++++++++++++++

View File

@ -2,7 +2,7 @@
# Use Genetic Programming for Classification and Symbolic Regression
# by Kai Staats, MSc; see LICENSE.md
# Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
# version 1.0.4
# version 1.0.5
'''
A word to the newbie, expert, and brave--
@ -56,14 +56,14 @@ import argparse
import karoo_gp_base_class; gp = karoo_gp_base_class.Base_GP()
ap = argparse.ArgumentParser(description = 'Karoo GP Server')
ap.add_argument('-ker', action = 'store', dest = 'kernel', default = 'm', help = '[c,r,m] fitness function: (r)egression, (c)lassification, or (m)atching')
ap.add_argument('-ker', action = 'store', dest = 'kernel', default = 'c', help = '[c,r,m] fitness function: (r)egression, (c)lassification, or (m)atching')
ap.add_argument('-typ', action = 'store', dest = 'type', default = 'r', help = '[f,g,r] Tree type: (f)ull, (g)row, or (r)amped half/half')
ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 5, help = '[3...10] maximum Tree depth for the initial population')
ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 3, help = '[3...10] maximum Tree depth for the initial population')
ap.add_argument('-max', action = 'store', dest = 'depth_max', default = 5, help = '[3...10] maximum Tree depth for the entire run')
ap.add_argument('-min', action = 'store', dest = 'depth_min', default = 3, help = '[3...100] minimum number of nodes')
ap.add_argument('-pop', action = 'store', dest = 'pop_max', default = 100, help = '[10...1000] maximum population')
ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 30, help = '[1...100] number of generations')
ap.add_argument('-tor', action = 'store', dest = 'tor_size', default = 10, help = '[1...max pop] tournament size')
ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 10, help = '[1...100] number of generations')
ap.add_argument('-tor', action = 'store', dest = 'tor_size', default = 7, help = '[1...max pop] tournament size')
ap.add_argument('-fil', action = 'store', dest = 'filename', default = 'files/data_MATCH.csv', help = '/path/to_your/[data].csv')
args = ap.parse_args()