v1.0.5 minor updates
parent
bada591746
commit
4b03562e48
|
@ -1,3 +1,25 @@
|
|||
2017 08/10
|
||||
Relatively light updates this time.
|
||||
|
||||
Iurii fixed a minor bug in which the MATCHING function (used only for demonstrations) would not find a match despite
|
||||
the fact that the output was correct, due to the way in which TensorFlow was handling floating points and precision.
|
||||
Iurii used numpy.allclose.html as a reference to resolve the situation.
|
||||
|
||||
I also modified the autosave to the runs/ directory such that if you are using an external dataset (quite likely), the
|
||||
new directory (for each run) will be saved as [filename]-[date_time_stamp]/ The idea (thank you Marco) is to help keep
|
||||
multiple, automated runs organized and more readily, visually inspected by name alone.
|
||||
|
||||
|
||||
2017 07/21
|
||||
|
||||
In a rather embarrassing, live demo in which I asked for the audience to create the dataset for Karoo, I discovered a
|
||||
bug in the MATCH kernel in which a negative value in the dataset would cause that row to be discarded from the fitness
|
||||
function --FIXED.
|
||||
|
||||
I merged the 3 methods fx_fitness_train_classify, fx_fitness_train_regress, fx_fitness_train_match into fx_fitness_eval
|
||||
in order to reduce the quantity of lines of code and simplify the workflow.
|
||||
|
||||
|
||||
2017 07/03
|
||||
|
||||
I am pleased to announce that Karoo GP is now updated to include a full suite of mathematical operators. I thank the
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# Define the methods and global variables used by Karoo GP
|
||||
# by Kai Staats, MSc; see LICENSE.md
|
||||
# Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
|
||||
# version 1.0.4
|
||||
# version 1.0.5
|
||||
|
||||
'''
|
||||
A NOTE TO THE NEWBIE, EXPERT, AND BRAVE
|
||||
|
@ -323,7 +323,8 @@ class Base_GP(object):
|
|||
|
||||
# self.datetime = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
|
||||
self.datetime = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
|
||||
self.path = os.path.join(cwd, 'runs/', self.datetime) # generate a unique directory name
|
||||
self.path = os.path.join(cwd, 'runs/', filename.split('.')[0] + '_' + self.datetime) # generate a unique directory name
|
||||
# self.path = os.path.join(cwd, 'runs/', self.datetime) # generate a unique directory name
|
||||
if not os.path.isdir(self.path): os.makedirs(self.path) # make a unique directory
|
||||
|
||||
self.filename = {} # a dictionary to hold .csv filenames
|
||||
|
@ -1382,7 +1383,7 @@ class Base_GP(object):
|
|||
return
|
||||
|
||||
|
||||
def fx_fitness_eval(self, expr, data, get_labels = False): # used to be fx_fitness_eval
|
||||
def fx_fitness_eval(self, expr, data, get_labels = False):
|
||||
|
||||
'''
|
||||
Computes tree expression using TensorFlow (TF) returning results and fitness scores.
|
||||
|
@ -1423,31 +1424,70 @@ class Base_GP(object):
|
|||
with tf.Session(config=config) as sess:
|
||||
with sess.graph.device(self.tf_device):
|
||||
|
||||
# Load data into TF
|
||||
# 1 - Load data into TF
|
||||
tensors = {}
|
||||
for i in range(len(self.terminals)):
|
||||
var = self.terminals[i]
|
||||
tensors[var] = tf.constant(data[:, i], dtype=tf.float32)
|
||||
|
||||
# Transform string expression into TF operation graph
|
||||
# 2- Transform string expression into TF operation graph
|
||||
result = self.fx_fitness_expr_parse(expr, tensors)
|
||||
|
||||
labels = tf.no_op() # a placeholder, applies only to CLASSIFY kernel
|
||||
solution = tensors['s'] # solution value is assumed to be stored in 's' terminal
|
||||
|
||||
# Add fitness computation into TF graph
|
||||
# 3- Add fitness computation into TF graph
|
||||
if self.kernel == 'c': # CLASSIFY kernels
|
||||
|
||||
'''
|
||||
Creates element-wise fitness computation TensorFlow (TF) sub-graph for CLASSIFY kernel.
|
||||
|
||||
This method uses the 'sympified' (SymPy) expression ('algo_sym') created in 'fx_eval_poly' and the data set
|
||||
loaded at run-time to evaluate the fitness of the selected kernel.
|
||||
|
||||
This multiclass classifer compares each row of a given Tree to the known solution, comparing estimated values
|
||||
(labels) generated by Karoo GP against the correct labels. This method is able to work with any number of
|
||||
class labels, from 2 to n. The left-most bin includes -inf. The right-most bin includes +inf. Those inbetween
|
||||
are by default confined to the spacing of 1.0 each, as defined by:
|
||||
|
||||
(solution - 1) < result <= solution
|
||||
|
||||
The skew adjusts the boundaries of the bins such that they fall on both the negative and positive sides of the
|
||||
origin. At the time of this writing, an odd number of class labels will generate an extra bin on the positive
|
||||
side of origin as it has not yet been determined the effect of enabling the middle bin to include both a
|
||||
negative and positive space.
|
||||
|
||||
Arguments required: result, solution
|
||||
'''
|
||||
|
||||
if get_labels: labels = tf.map_fn(self.fx_fitness_labels_map, result, dtype=[tf.int32, tf.string], swap_memory=True)
|
||||
pairwise_fitness = self.fx_fitness_train_classify(result, tf.cast(solution, tf.float32))
|
||||
|
||||
skew = (self.class_labels / 2) - 1
|
||||
|
||||
rule11 = tf.equal(solution, 0)
|
||||
rule12 = tf.less_equal(result, 0 - skew)
|
||||
rule13 = tf.logical_and(rule11, rule12)
|
||||
|
||||
rule21 = tf.equal(solution, self.class_labels - 1)
|
||||
rule22 = tf.greater(result, solution - 1 - skew)
|
||||
rule23 = tf.logical_and(rule21, rule22)
|
||||
|
||||
rule31 = tf.less(solution - 1 - skew, result)
|
||||
rule32 = tf.less_equal(result, solution - skew)
|
||||
rule33 = tf.logical_and(rule31, rule32)
|
||||
|
||||
pairwise_fitness = tf.cast(tf.logical_or(tf.logical_or(rule13, rule23), rule33), tf.int32)
|
||||
|
||||
elif self.kernel == 'r': # REGRESSION kernel
|
||||
pairwise_fitness = self.fx_fitness_train_regress(result, tf.cast(solution, tf.float32))
|
||||
pairwise_fitness = tf.abs(solution - result)
|
||||
|
||||
elif self.kernel == 'm': # MATCH kernel
|
||||
pairwise_fitness = self.fx_fitness_train_match(result, solution)
|
||||
# pairwise_fitness = tf.cast(tf.equal(solution, result), tf.int32) # breaks due to floating points
|
||||
RTOL, ATOL = 1e-05, 1e-08
|
||||
pairwise_fitness = tf.cast(tf.less_equal(tf.abs(solution - result), ATOL + RTOL * tf.abs(result)), tf.int32)
|
||||
|
||||
# elif self.kernel == '[other]': # [OTHER] kernel
|
||||
# pairwise_fitness = self.fx_fitness_train_[other](result ?, solution ?)
|
||||
# pairwise_fitness = tf.cast(tf.___(solution, result)
|
||||
|
||||
else: raise Exception('Kernel type is wrong or missing. You entered {}'.format(self.kernel))
|
||||
|
||||
|
@ -1472,7 +1512,7 @@ class Base_GP(object):
|
|||
return self.fx_fitness_node_parse(tree, tensors)
|
||||
|
||||
|
||||
def fx_chain_bool(self, values, operation, tensors):
|
||||
def fx_fitness_chain_bool(self, values, operation, tensors):
|
||||
|
||||
'''
|
||||
Chains a sequence of boolean operations (e.g. 'a and b and c') into a single TensorFlow (TF) sub graph.
|
||||
|
@ -1482,12 +1522,12 @@ class Base_GP(object):
|
|||
|
||||
x = tf.cast(self.fx_fitness_node_parse(values[0], tensors), tf.bool)
|
||||
if len(values) > 1:
|
||||
return operation(x, self.fx_chain_bool(values[1:], operation, tensors))
|
||||
return operation(x, self.fx_fitness_chain_bool(values[1:], operation, tensors))
|
||||
else:
|
||||
return x
|
||||
|
||||
|
||||
def fx_chain_compare(self, comparators, ops, tensors):
|
||||
def fx_fitness_chain_compare(self, comparators, ops, tensors):
|
||||
|
||||
'''
|
||||
Chains a sequence of comparison operations (e.g. 'a > b < c') into a single TensorFlow (TF) sub graph.
|
||||
|
@ -1498,7 +1538,7 @@ class Base_GP(object):
|
|||
x = self.fx_fitness_node_parse(comparators[0], tensors)
|
||||
y = self.fx_fitness_node_parse(comparators[1], tensors)
|
||||
if len(comparators) > 2:
|
||||
return tf.logical_and(operators[type(ops[0])](x, y), self.fx_chain_compare(comparators[1:], ops[1:], tensors))
|
||||
return tf.logical_and(operators[type(ops[0])](x, y), self.fx_fitness_chain_compare(comparators[1:], ops[1:], tensors))
|
||||
else:
|
||||
return operators[type(ops[0])](x, y)
|
||||
|
||||
|
@ -1528,10 +1568,10 @@ class Base_GP(object):
|
|||
return operators[node.func.id](*[self.fx_fitness_node_parse(arg, tensors) for arg in node.args])
|
||||
|
||||
elif isinstance(node, ast.BoolOp): # <left> <bool_operator> <right> e.g. x or y
|
||||
return self.fx_chain_bool(node.values, operators[type(node.op)], tensors)
|
||||
return self.fx_fitness_chain_bool(node.values, operators[type(node.op)], tensors)
|
||||
|
||||
elif isinstance(node, ast.Compare): # <left> <compare> <right> e.g., a > z
|
||||
return self.fx_chain_compare([node.left] + node.comparators, node.ops, tensors)
|
||||
return self.fx_fitness_chain_compare([node.left] + node.comparators, node.ops, tensors)
|
||||
|
||||
else: raise TypeError(node)
|
||||
|
||||
|
@ -1550,8 +1590,6 @@ class Base_GP(object):
|
|||
elif solution - 1 - skew < result <= solution - skew; fitness = 1: # check for class bins between first and last
|
||||
else: fitness = 0 # no class match
|
||||
|
||||
See 'fx_fitness_train_classify' for a description of the multi-class classifier.
|
||||
|
||||
Arguments required: result
|
||||
'''
|
||||
|
||||
|
@ -1567,84 +1605,6 @@ class Base_GP(object):
|
|||
return zero_rule
|
||||
|
||||
|
||||
def fx_fitness_train_classify(self, result, solution): # CLASSIFICATION kernel
|
||||
|
||||
'''
|
||||
Creates element-wise fitness computation TensorFlow (TF) sub-graph for CLASSIFY kernel.
|
||||
|
||||
This method uses the 'sympified' (SymPy) expression ('algo_sym') created in 'fx_eval_poly' and the data set
|
||||
loaded at run-time to evaluate the fitness of the selected kernel.
|
||||
|
||||
This multiclass classifer compares each row of a given Tree to the known solution, comparing estimated values
|
||||
(labels) generated by Karoo GP against the correct labels. This method is able to work with any number of class
|
||||
labels, from 2 to n. The left-most bin includes -inf. The right-most bin includes +inf. Those inbetween are
|
||||
by default confined to the spacing of 1.0 each, as defined by:
|
||||
|
||||
(solution - 1) < result <= solution
|
||||
|
||||
The skew adjusts the boundaries of the bins such that they fall on both the negative and positive sides of the
|
||||
origin. At the time of this writing, an odd number of class labels will generate an extra bin on the positive
|
||||
side of origin as it has not yet been determined the effect of enabling the middle bin to include both a
|
||||
negative and positive space.
|
||||
|
||||
Arguments required: result, solution
|
||||
'''
|
||||
|
||||
skew = (self.class_labels / 2) - 1
|
||||
rule11 = tf.equal(solution, 0)
|
||||
rule12 = tf.less_equal(result, 0 - skew)
|
||||
rule13 = tf.logical_and(rule11, rule12)
|
||||
rule21 = tf.equal(solution, self.class_labels - 1)
|
||||
rule22 = tf.greater(result, solution - 1 - skew)
|
||||
rule23 = tf.logical_and(rule21, rule22)
|
||||
rule31 = tf.less(solution - 1 - skew, result)
|
||||
rule32 = tf.less_equal(result, solution - skew)
|
||||
rule33 = tf.logical_and(rule31, rule32)
|
||||
|
||||
return tf.cast(tf.logical_or(tf.logical_or(rule13, rule23), rule33), tf.int32)
|
||||
|
||||
|
||||
def fx_fitness_train_regress(self, result, solution): # REGRESSION kernel
|
||||
|
||||
'''
|
||||
Creates element-wise fitness computation TensorFlow (TF) sub-graph for REGRESSION kernel.
|
||||
|
||||
This is a minimisation function which seeks a result which is closest to the solution.
|
||||
|
||||
[need to write more]
|
||||
|
||||
Arguments required: result, solution
|
||||
'''
|
||||
|
||||
return tf.abs(solution - result)
|
||||
|
||||
|
||||
def fx_fitness_train_match(self, result, solution): # MATCH kernel
|
||||
|
||||
'''
|
||||
Creates element-wise fitness computation TensorFlow (TF) sub-graph for MATCH kernel.
|
||||
|
||||
This is a maximization function which seeks an exact solution (a perfect match).
|
||||
|
||||
[need to write more]
|
||||
|
||||
Arguments required: result, solution
|
||||
'''
|
||||
|
||||
return tf.cast(tf.equal(solution, result), tf.int32)
|
||||
|
||||
|
||||
# def fx_fitness_train_[other](self, result, solution): # [OTHER] kernel
|
||||
|
||||
# '''
|
||||
# Creates element-wise fitness computation TensorFlow (TF) sub-graph for [other] kernel.
|
||||
|
||||
# This is a [minimisation or maximization] function which [insert description].
|
||||
|
||||
# return tf.[?]([insert formula])
|
||||
# '''
|
||||
|
||||
|
||||
def fx_fitness_store(self, tree, fitness):
|
||||
|
||||
'''
|
||||
|
@ -1668,18 +1628,16 @@ class Base_GP(object):
|
|||
def fx_fitness_tournament(self, tourn_size):
|
||||
|
||||
'''
|
||||
Select one Tree by means of a Tournament in which 'tourn_size' contenders are randomly selected and then
|
||||
compared for their respective fitness (as determined in 'fx_fitness_gym'). The tournament is engaged for each
|
||||
of the four types of inter-generational evolution: reproduction, point mutation, branch (full and grow)
|
||||
mutation, and crossover (sexual reproduction).
|
||||
Multiple contenders ('tourn_size') are randomly selected and then compared for their respective fitness, as
|
||||
determined in 'fx_fitness_gym'. The tournament is engaged to select a single Tree for each invocation of the
|
||||
genetic operators: reproduction, mutation (point, branch), and crossover (sexual reproduction).
|
||||
|
||||
The original Tournament Selection drew directly from the foundation generation (gp.generation_a). However,
|
||||
with the introduction of a minimum number of nodes as defined by the user ('gp.tree_depth_min'),
|
||||
'gp.gene_pool' provides only from those Trees which meet all criteria.
|
||||
'gp.gene_pool' limits the Trees to those which meet all criteria.
|
||||
|
||||
With upper (max depth) and lower (min nodes) invoked, one may enjoy interesting results. Stronger boundary
|
||||
parameters (a reduced gap between the min and max number of nodes) may invoke more compact solutions, but also
|
||||
runs the risk of elitism, even total population die-off where a healthy population once existed.
|
||||
Stronger boundary parameters (a reduced gap between the min and max number of nodes) may invoke more compact
|
||||
solutions, but also runs the risk of elitism, even total population die-off where a healthy population once existed.
|
||||
|
||||
Arguments required: tourn_size
|
||||
'''
|
||||
|
@ -1818,7 +1776,7 @@ class Base_GP(object):
|
|||
'''
|
||||
|
||||
for i in range(len(result['result'])):
|
||||
print '\t\033[36m Data row {} predicts class:\033[1m {} ({} label) as {:.2f}{}\033[0;0m'.format(i, int(result['labels'][0][i]), int(result['solution'][i]), result['result'][i], result['labels'][1][i])
|
||||
print '\t\033[36m Data row {} predicts class:\033[1m {} ({} True)\033[0;0m\033[36m as {:.2f}{}\033[0;0m'.format(i, int(result['labels'][0][i]), int(result['solution'][i]), result['result'][i], result['labels'][1][i])
|
||||
|
||||
print '\n Fitness score: {}'.format(result['fitness'])
|
||||
print '\n Precision-Recall report:\n', skm.classification_report(result['solution'], result['labels'][0])
|
||||
|
@ -1834,7 +1792,7 @@ class Base_GP(object):
|
|||
'''
|
||||
|
||||
for i in range(len(result['result'])):
|
||||
print '\t\033[36m Data row {} predicts value:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result[ 'solution'][i])
|
||||
print '\t\033[36m Data row {} predicts value:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result['solution'][i])
|
||||
|
||||
MSE, fitness = skm.mean_squared_error(result['result'], result['solution']), result['fitness']
|
||||
print '\n\t Regression fitness score: {}'.format(fitness)
|
||||
|
@ -1850,7 +1808,7 @@ class Base_GP(object):
|
|||
'''
|
||||
|
||||
for i in range(len(result['result'])):
|
||||
print '\t\033[36m Data row {} predicts value:\033[1m {} ({} label)\033[0;0m'.format(i, int(result['result'][i]), int(result['solution'][i]))
|
||||
print '\t\033[36m Data row {} predicts match:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result['solution'][i])
|
||||
|
||||
print '\n\tMatching fitness score: {}'.format(result['fitness'])
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# Use Genetic Programming for Classification and Symbolic Regression
|
||||
# by Kai Staats, MSc; see LICENSE.md
|
||||
# Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
|
||||
# version 1.0.4
|
||||
# version 1.0.5
|
||||
|
||||
'''
|
||||
A word to the newbie, expert, and brave--
|
||||
|
@ -157,7 +157,7 @@ gp.evolve_branch = int(0.2 * gp.tree_pop_max) # quantity of a population generat
|
|||
gp.evolve_cross = int(0.7 * gp.tree_pop_max) # quantity of a population generated through Crossover
|
||||
|
||||
gp.tourn_size = 10 # qty of individuals entered into each tournament (standard 10); can be adjusted in 'i'nteractive mode
|
||||
gp.precision = 6 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded
|
||||
gp.precision = 1 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded
|
||||
|
||||
|
||||
#++++++++++++++++++++++++++++++++++++++++++
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# Use Genetic Programming for Classification and Symbolic Regression
|
||||
# by Kai Staats, MSc; see LICENSE.md
|
||||
# Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
|
||||
# version 1.0.4
|
||||
# version 1.0.5
|
||||
|
||||
'''
|
||||
A word to the newbie, expert, and brave--
|
||||
|
@ -56,14 +56,14 @@ import argparse
|
|||
import karoo_gp_base_class; gp = karoo_gp_base_class.Base_GP()
|
||||
|
||||
ap = argparse.ArgumentParser(description = 'Karoo GP Server')
|
||||
ap.add_argument('-ker', action = 'store', dest = 'kernel', default = 'm', help = '[c,r,m] fitness function: (r)egression, (c)lassification, or (m)atching')
|
||||
ap.add_argument('-ker', action = 'store', dest = 'kernel', default = 'c', help = '[c,r,m] fitness function: (r)egression, (c)lassification, or (m)atching')
|
||||
ap.add_argument('-typ', action = 'store', dest = 'type', default = 'r', help = '[f,g,r] Tree type: (f)ull, (g)row, or (r)amped half/half')
|
||||
ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 5, help = '[3...10] maximum Tree depth for the initial population')
|
||||
ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 3, help = '[3...10] maximum Tree depth for the initial population')
|
||||
ap.add_argument('-max', action = 'store', dest = 'depth_max', default = 5, help = '[3...10] maximum Tree depth for the entire run')
|
||||
ap.add_argument('-min', action = 'store', dest = 'depth_min', default = 3, help = '[3...100] minimum number of nodes')
|
||||
ap.add_argument('-pop', action = 'store', dest = 'pop_max', default = 100, help = '[10...1000] maximum population')
|
||||
ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 30, help = '[1...100] number of generations')
|
||||
ap.add_argument('-tor', action = 'store', dest = 'tor_size', default = 10, help = '[1...max pop] tournament size')
|
||||
ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 10, help = '[1...100] number of generations')
|
||||
ap.add_argument('-tor', action = 'store', dest = 'tor_size', default = 7, help = '[1...max pop] tournament size')
|
||||
ap.add_argument('-fil', action = 'store', dest = 'filename', default = 'files/data_MATCH.csv', help = '/path/to_your/[data].csv')
|
||||
|
||||
args = ap.parse_args()
|
||||
|
|
Loading…
Reference in New Issue