v1.0.3 update; see README
parent
6f0720941c
commit
a46a4dd696
Binary file not shown.
|
@ -2,7 +2,7 @@
|
|||
# Define the methods and global variables used by Karoo GP
|
||||
# by Kai Staats, MSc; see LICENSE.md
|
||||
# Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
|
||||
# version 1.0.2
|
||||
# version 1.0.3
|
||||
|
||||
'''
|
||||
A NOTE TO THE NEWBIE, EXPERT, AND BRAVE
|
||||
|
@ -14,6 +14,7 @@ likely find more enjoyment of this particular flavour of GP with a little unders
|
|||
import sys
|
||||
import os
|
||||
import csv
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import sklearn.metrics as skm
|
||||
|
@ -106,6 +107,7 @@ class Base_GP(object):
|
|||
### Global variables initiated and/or used by Sympy ###
|
||||
'gp.algo_raw' a Sympy string which represents a flattened tree
|
||||
'gp.algo_sym' a Sympy executable version of algo_raw
|
||||
'gp.fittest_dict' a dictionary of the most fit trees, compiled during fitness function execution
|
||||
|
||||
### Variables used for evolutionary management ###
|
||||
'gp.population_a' the root generation from which Trees are chosen for mutation and reproduction
|
||||
|
@ -148,11 +150,12 @@ class Base_GP(object):
|
|||
'''
|
||||
|
||||
self.karoo_banner()
|
||||
start = time.time() # start the clock for the timer
|
||||
|
||||
# construct first generation of Trees
|
||||
self.fx_karoo_data_load(tree_type, tree_depth_base, filename)
|
||||
self.generation_id = 1 # set initial generation ID
|
||||
self.population_a = ['Karoo GP by Kai Staats, Generation ' + str(self.generation_id)] # a list which will store all Tree arrays, one generation at a time
|
||||
self.population_a = ['Karoo GP by Kai Staats, Generation ' + str(self.generation_id)] # list to store all Tree arrays, one generation at a time
|
||||
self.fx_karoo_construct(tree_type, tree_depth_base) # construct the first population of Trees
|
||||
|
||||
# evaluate first generation of Trees
|
||||
|
@ -174,12 +177,13 @@ class Base_GP(object):
|
|||
self.fx_eval_generation() # evaluate all Trees in a single generation
|
||||
|
||||
self.population_a = self.fx_evolve_pop_copy(self.population_b, ['GP Tree by Kai Staats, Generation ' + str(self.generation_id)])
|
||||
|
||||
|
||||
# "End of line, man!" --CLU
|
||||
print '\n \033[36m Karoo GP has an ellapsed time of \033[0;0m\033[31m%f\033[0;0m' % (time.time() - start), '\033[0;0m'
|
||||
self.fx_archive_tree_write(self.population_b, 'f') # save the final generation of Trees to disk
|
||||
self.fx_archive_params_write('Server') # save run-time parameters to disk
|
||||
|
||||
print '\033[3m Congrats!\033[0;0m Your multi-generational Karoo GP run is complete.\n'
|
||||
print '\n \033[3m Congrats!\033[0;0m Your multi-generational Karoo GP run is complete.\n'
|
||||
sys.exit() # return Karoo GP to the command line to support bash and chron job execution
|
||||
|
||||
# return
|
||||
|
@ -415,7 +419,7 @@ class Base_GP(object):
|
|||
|
||||
for n in range(self.evolve_repro): # quantity of Trees to be copied without mutation
|
||||
tourn_winner = self.fx_fitness_tournament(self.tourn_size) # perform tournament selection for each reproduction
|
||||
tourn_winner = self.fx_evolve_fitness_wipe(tourn_winner) # remove fitness data
|
||||
tourn_winner = self.fx_evolve_fitness_wipe(tourn_winner) # wipe fitness data
|
||||
self.population_b.append(tourn_winner) # append array to next generation population of Trees
|
||||
|
||||
return
|
||||
|
@ -633,7 +637,6 @@ class Base_GP(object):
|
|||
query = raw_input('\t Enter quantity of Trees to be generated by Reproduction: ')
|
||||
if query not in str(menu): raise ValueError()
|
||||
elif query == '': break
|
||||
# tmp_repro = int(float(query) / 100 * self.tree_pop_max); break
|
||||
tmp_repro = int(float(query)); break
|
||||
except ValueError: print '\n\t\033[32m Enter a number from 0 including 1000. Try again ...\033[0;0m'
|
||||
|
||||
|
@ -642,7 +645,6 @@ class Base_GP(object):
|
|||
query = raw_input('\t Enter quantity of Trees to be generated by Point Mutation: ')
|
||||
if query not in str(menu): raise ValueError()
|
||||
elif query == '': break
|
||||
# tmp_point = int(float(query) / 100 * self.tree_pop_max); break
|
||||
tmp_point = int(float(query)); break
|
||||
except ValueError: print '\n\t\033[32m Enter a number from 0 including 1000. Try again ...\033[0;0m'
|
||||
|
||||
|
@ -651,7 +653,6 @@ class Base_GP(object):
|
|||
query = raw_input('\t Enter quantity of Trees to be generated by Branch Mutation: ')
|
||||
if query not in str(menu): raise ValueError()
|
||||
elif query == '': break
|
||||
# tmp_branch = int(float(query) / 100 * self.tree_pop_max); break
|
||||
tmp_branch = int(float(query)); break
|
||||
except ValueError: print '\n\t\033[32m Enter a number from 0 including 1000. Try again ...\033[0;0m'
|
||||
|
||||
|
@ -660,11 +661,9 @@ class Base_GP(object):
|
|||
query = raw_input('\t Enter quantity of Trees to be generated by Crossover: ')
|
||||
if query not in str(menu): raise ValueError()
|
||||
elif query == '': break
|
||||
# tmp_cross = int(float(query) / 100 * self.tree_pop_max); break
|
||||
tmp_cross = int(float(query)); break
|
||||
except ValueError: print '\n\t\033[32m Enter a number from 0 including 1000. Try again ...\033[0;0m'
|
||||
|
||||
# if tmp_repro + tmp_point + tmp_branch + tmp_cross != 100: print '\n\t The sum of the above does not equal 100%. Try again ...'
|
||||
if tmp_repro + tmp_point + tmp_branch + tmp_cross != self.tree_pop_max: print '\n\t The sum of the above does not equal', self.tree_pop_max, 'Try again ...'
|
||||
else:
|
||||
print '\n\t The revised balance of genetic operators is:'
|
||||
|
@ -676,15 +675,15 @@ class Base_GP(object):
|
|||
|
||||
elif pause == 'l': # display dictionary of Trees with the best fitness score
|
||||
print '\n\t The leading Trees and their associated expressions are:'
|
||||
for item in sorted(self.fittest_dict): print '\t ', item, ':', self.fittest_dict[item]
|
||||
|
||||
for n in sorted(self.fittest_dict): print '\t ', n, ':', self.fittest_dict[n]
|
||||
|
||||
|
||||
elif pause == 't': # evaluate a Tree against the TEST data
|
||||
if self.generation_id > 1:
|
||||
menu = range(1, len(self.population_b))
|
||||
while True:
|
||||
try:
|
||||
query = raw_input('\n\t Select a Tree in population_b to evaluate for Precision & Recall: ')
|
||||
query = raw_input('\n\t Select a Tree in population_b to test: ')
|
||||
if query not in str(menu) or query == '0': raise ValueError()
|
||||
elif query == '': break
|
||||
|
||||
|
@ -888,7 +887,7 @@ class Base_GP(object):
|
|||
self.pop_node_c1 = '' # pos 9: child node 1
|
||||
self.pop_node_c2 = '' # pos 10: child node 2
|
||||
self.pop_node_c3 = '' # pos 11: child node 3 (assumed max of 3 with boolean operator 'if')
|
||||
self.pop_fitness = '' # pos 12: fitness value following Tree evaluation
|
||||
self.pop_fitness = '' # pos 12: fitness score following Tree evaluation
|
||||
|
||||
self.tree = np.array([ ['TREE_ID'],['tree_type'],['tree_depth_base'],['NODE_ID'],['node_depth'],['node_type'],['node_label'],['node_parent'],['node_arity'],['node_c1'],['node_c2'],['node_c3'],['fitness'] ])
|
||||
|
||||
|
@ -1271,7 +1270,7 @@ class Base_GP(object):
|
|||
|
||||
self.fx_evolve_tree_renum(self.population_b) # population renumber
|
||||
self.fx_fitness_gym(self.population_b) # run 'fx_eval', 'fx_fitness', 'fx_fitness_store', and fitness record
|
||||
self.fx_archive_tree_write(self.population_b, 'a') # archive the current, evolved generation of Trees as the next foundation population
|
||||
self.fx_archive_tree_write(self.population_b, 'a') # archive current population as foundation for next generation
|
||||
|
||||
if self.display != 's':
|
||||
print '\n Copy gp.population_b to gp.population_a\n'
|
||||
|
@ -1295,7 +1294,7 @@ class Base_GP(object):
|
|||
minimising (lower is better). The total fitness score is then saved with each Tree in the external .csv file.
|
||||
|
||||
Part 3 compares the fitness of each Tree to the prior best fit in order to track those that improve with each
|
||||
comparison. For matching functions, all the Trees will have the same fitness value, but they may present more
|
||||
comparison. For matching functions, all the Trees will have the same fitness score, but they may present more
|
||||
than one solution. For minimisation and maximisation functions, the final Tree should present the best overall
|
||||
fitness for that generation. It is important to note that Part 3 does *not* in any way influence the Tournament
|
||||
Selection which is a stand-alone process.
|
||||
|
@ -1319,7 +1318,7 @@ class Base_GP(object):
|
|||
|
||||
expr = str(self.algo_sym) # get sympified expression and process it with TF - tested 2017 02/02
|
||||
result = self.fx_fitness_eval(expr, self.data_train)
|
||||
fitness = result['fitness'] # extract fitness value
|
||||
fitness = result['fitness'] # extract fitness score
|
||||
|
||||
if self.display == 'i':
|
||||
print '\t \033[36m with fitness sum:\033[1m', fitness, '\033[0;0m\n'
|
||||
|
@ -1331,24 +1330,24 @@ class Base_GP(object):
|
|||
if self.kernel == 'c': # display best fit Trees for the CLASSIFY kernel
|
||||
if fitness >= fitness_best: # find the Tree with Maximum fitness score
|
||||
fitness_best = fitness # set best fitness score
|
||||
self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary
|
||||
self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary if fitness >= prior
|
||||
|
||||
elif self.kernel == 'r': # display best fit Trees for the REGRESSION kernel
|
||||
if fitness_best == 0: fitness_best = fitness # set the baseline first time through
|
||||
if fitness <= fitness_best: # find the Tree with Minimum fitness score
|
||||
fitness_best = fitness # set best fitness score
|
||||
self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary
|
||||
self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary if fitness <= prior
|
||||
|
||||
elif self.kernel == 'm': # display best fit Trees for the MATCH kernel
|
||||
if fitness == self.data_train_rows: # find the Tree with a perfect match for all data rows
|
||||
fitness_best = fitness # set best fitness score
|
||||
self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary
|
||||
self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary if all rows match
|
||||
|
||||
# elif self.kernel == '[other]': # display best fit Trees for the [other] kernel
|
||||
# if fitness >= fitness_best: # find the Tree with [Maximum or Minimum] fitness score
|
||||
# if fitness [>=, <=] fitness_best: # find the Tree with [Maximum or Minimum] fitness score
|
||||
# fitness_best = fitness # set best fitness score
|
||||
# self.fittest_dict.update({tree_id:self.algo_sym}) # add to dictionary
|
||||
|
||||
|
||||
print '\n\033[36m ', len(self.fittest_dict.keys()), 'trees\033[1m', np.sort(self.fittest_dict.keys()), '\033[0;0m\033[36moffer the highest fitness scores.\033[0;0m'
|
||||
if self.display == 'g': self.fx_karoo_pause(0)
|
||||
|
||||
|
@ -1358,7 +1357,7 @@ class Base_GP(object):
|
|||
def fx_fitness_eval(self, expr, data, get_labels = False): # used to be fx_fitness_eval
|
||||
|
||||
'''
|
||||
Computes tree expression using TensorFlow (TF) returning results and fitness values.
|
||||
Computes tree expression using TensorFlow (TF) returning results and fitness scores.
|
||||
|
||||
This method orchestrates most of the TF routines by parsing input string expression and converting it into TF
|
||||
operation graph which then is processed in an isolated TF session to compute the results and corresponding fitness
|
||||
|
@ -1383,7 +1382,7 @@ class Base_GP(object):
|
|||
'labels' - an array of the labels extracted from the results; defined only for CLASSIFY kernel, None otherwise
|
||||
'solution' - an array of the solution values extracted from the data (variable 's' in the dataset)
|
||||
'pairwise_fitness' - an array of the element-wise results of applying corresponding fitness kernel function
|
||||
'fitness' - aggregated scalar fitness value
|
||||
'fitness' - aggregated scalar fitness score
|
||||
|
||||
Arguments required: expr, data
|
||||
'''
|
||||
|
@ -1593,7 +1592,7 @@ class Base_GP(object):
|
|||
|
||||
tree[12][1] = fitness # store the fitness with each tree
|
||||
tree[12][2] = len(str(self.algo_raw)) # store the length of the raw algo for parsimony
|
||||
# if len(tree[3]) > 4: # if the Tree array is wide enough ...
|
||||
# if len(tree[3]) > 4: # if the Tree array is wide enough -- SEE SCRATCHPAD
|
||||
|
||||
return
|
||||
|
||||
|
@ -1701,11 +1700,8 @@ class Base_GP(object):
|
|||
|
||||
This method is automatically invoked with every Tournament Selection ('fx_fitness_tournament').
|
||||
|
||||
At this point in time, the gene pool does *not* limit the number of times any given Tree may be selected for
|
||||
mutation or reproduction nor does it take into account parsimony (seeking the simplest expression). Nor does
|
||||
a 'divide by zero' error keep a tree from entering the gene pool, as it might contain other, beneficial code
|
||||
to contribute to the next generation. However, trees with 'error' are given a fitness score of 0 and therefore
|
||||
will eventually be removed from the gene pool.
|
||||
At this time, the gene pool does *not* limit the number of times any given Tree may be selected for mutation or
|
||||
reproduction nor does it take into account parsimony (seeking the simplest multivariate expression).
|
||||
|
||||
Arguments required: none
|
||||
'''
|
||||
|
@ -1717,7 +1713,7 @@ class Base_GP(object):
|
|||
|
||||
self.fx_eval_poly(self.population_a[tree_id]) # extract the expression
|
||||
|
||||
if len(self.population_a[tree_id][3])-1 >= self.tree_depth_min and self.algo_sym != 1: # if Tree meets the requirements
|
||||
if len(self.population_a[tree_id][3])-1 >= self.tree_depth_min and self.algo_sym != 1: # check if Tree meets the requirements
|
||||
if self.display == 'i': print '\t\033[36m Tree', tree_id, 'has >=', self.tree_depth_min, 'nodes and is added to the gene pool\033[0;0m'
|
||||
self.gene_pool.append(self.population_a[tree_id][0][1])
|
||||
|
||||
|
@ -1751,8 +1747,8 @@ class Base_GP(object):
|
|||
for i in range(len(result['result'])):
|
||||
print '\t\033[36m Data row {} predicts class:\033[1m {} ({} label) as {:.2f}{}\033[0;0m'.format(i, int(result['labels'][0][i]), int(result['solution'][i]), result['result'][i], result['labels'][1][i])
|
||||
|
||||
print '\n Fitness value: {}'.format(result['fitness'])
|
||||
print '\n Classification report:\n', skm.classification_report(result['solution'], result['labels'][0])
|
||||
print '\n Fitness score: {}'.format(result['fitness'])
|
||||
print '\n Precision-Recall report:\n', skm.classification_report(result['solution'], result['labels'][0])
|
||||
print ' Confusion matrix:\n', skm.confusion_matrix(result['solution'], result['labels'][0])
|
||||
|
||||
return
|
||||
|
@ -1761,14 +1757,14 @@ class Base_GP(object):
|
|||
def fx_fitness_test_regress(self, result):
|
||||
|
||||
'''
|
||||
Print the Fitness value and Mean Squared Error for a REGRESSION run against the test data.
|
||||
Print the Fitness score and Mean Squared Error for a REGRESSION run against the test data.
|
||||
'''
|
||||
|
||||
for i in range(len(result['result'])):
|
||||
print '\t\033[36m Data row {} predicts value:\033[1m {:.2f} ({:.2f} True)\033[0;0m'.format(i, result['result'][i], result[ 'solution'][i])
|
||||
|
||||
MSE, fitness = skm.mean_squared_error(result['result'], result['solution']), result['fitness']
|
||||
print '\n\t Fitness value: {}'.format(fitness)
|
||||
print '\n\t Regression fitness score: {}'.format(fitness)
|
||||
print '\t Mean Squared Error: {}'.format(MSE)
|
||||
|
||||
return
|
||||
|
@ -1783,8 +1779,8 @@ class Base_GP(object):
|
|||
for i in range(len(result['result'])):
|
||||
print '\t\033[36m Data row {} predicts value:\033[1m {} ({} label)\033[0;0m'.format(i, int(result['result'][i]), int(result['solution'][i]))
|
||||
|
||||
print '\n\tFitness value: {}'.format(result['fitness'])
|
||||
|
||||
print '\n\tMatching fitness score: {}'.format(result['fitness'])
|
||||
|
||||
return
|
||||
|
||||
|
||||
|
@ -1797,7 +1793,7 @@ class Base_GP(object):
|
|||
# for i in range(len(result['result'])):
|
||||
# print '\t\033[36m Data row {} predicts value:\033[1m {} ({} label)\033[0;0m'.format(i, int(result['result'][i]), int(result['solution'][i]))
|
||||
|
||||
# print '\n\tFitness value: {}'.format(result['fitness'])
|
||||
# print '\n\tFitness score: {}'.format(result['fitness'])
|
||||
|
||||
# return
|
||||
|
||||
|
@ -1832,7 +1828,7 @@ class Base_GP(object):
|
|||
|
||||
else: print '\n\t\033[31m ERROR! In fx_evolve_point_mutate, node_type =', tree[5][node], '\033[0;0m'; self.fx_karoo_pause(0)
|
||||
|
||||
tree = self.fx_evolve_fitness_wipe(tree) # remove fitness data
|
||||
tree = self.fx_evolve_fitness_wipe(tree) # wipe fitness data
|
||||
|
||||
if self.display == 'db': print '\n\033[36m This is tourn_winner after node\033[1m', node, '\033[0;0m\033[36mmutation and updates:\033[0;0m\n', tree; self.fx_karoo_pause(0)
|
||||
|
||||
|
@ -1869,7 +1865,7 @@ class Base_GP(object):
|
|||
rnd = np.random.randint(0, len(self.terminals) - 1) # call the previously loaded .csv which contains all terminals
|
||||
tree[6][branch[n]] = self.terminals[rnd] # replace terminal (variable)
|
||||
|
||||
tree = self.fx_evolve_fitness_wipe(tree) # remove fitness data
|
||||
tree = self.fx_evolve_fitness_wipe(tree) # wipe fitness data
|
||||
|
||||
if self.display == 'db': print '\n\033[36m This is tourn_winner after nodes\033[1m', branch, '\033[0;0m\033[36mwere mutated and updated:\033[0;0m\n', tree; self.fx_karoo_pause(0)
|
||||
|
||||
|
@ -2014,7 +2010,7 @@ class Base_GP(object):
|
|||
offspring = self.fx_evolve_branch_body_copy(offspring) # copy remaining nodes in 'branch_y' ('gp.tree') to 'offspring'
|
||||
offspring = self.fx_evolve_tree_prune(offspring, self.tree_depth_max) # prune to the max Tree depth + adjustment - tested 2016 07/10
|
||||
|
||||
offspring = self.fx_evolve_fitness_wipe(offspring) # wipe fitness data and return 'offspring'
|
||||
offspring = self.fx_evolve_fitness_wipe(offspring) # wipe fitness data
|
||||
|
||||
return offspring
|
||||
|
||||
|
@ -2389,7 +2385,7 @@ class Base_GP(object):
|
|||
Arguments required: tree
|
||||
'''
|
||||
|
||||
tree[12][1:] = '' # remove all 'fitness' data
|
||||
tree[12][1:] = '' # wipe fitness data
|
||||
|
||||
return tree
|
||||
|
||||
|
@ -2605,7 +2601,6 @@ class Base_GP(object):
|
|||
'''
|
||||
|
||||
file = open(self.path + '/log_config.txt', 'w')
|
||||
|
||||
file.write('Karoo GP ' + app)
|
||||
file.write('\n launched: ' + str(self.datetime))
|
||||
file.write('\n dataset: ' + str(self.dataset))
|
||||
|
@ -2616,7 +2611,7 @@ class Base_GP(object):
|
|||
# file.write('tree type: ' + tree_type)
|
||||
# file.write('tree depth base: ' + str(tree_depth_base))
|
||||
file.write('\n tree depth max: ' + str(self.tree_depth_max))
|
||||
file.write('\n tree depth min: ' + str(self.tree_depth_min))
|
||||
file.write('\n min node count: ' + str(self.tree_depth_min))
|
||||
file.write('\n')
|
||||
file.write('\n genetic operator Reproduction: ' + str(self.evolve_repro))
|
||||
file.write('\n genetic operator Point Mutation: ' + str(self.evolve_point))
|
||||
|
@ -2627,12 +2622,10 @@ class Base_GP(object):
|
|||
file.write('\n population: ' + str(self.tree_pop_max))
|
||||
file.write('\n number of generations: ' + str(self.generation_id))
|
||||
file.write('\n\n')
|
||||
|
||||
file.close()
|
||||
|
||||
|
||||
file = open(self.path + '/log_test.txt', 'w')
|
||||
|
||||
file.write('Karoo GP ' + app)
|
||||
file.write('\n launched: ' + str(self.datetime))
|
||||
file.write('\n dataset: ' + str(self.dataset))
|
||||
|
@ -2640,30 +2633,60 @@ class Base_GP(object):
|
|||
|
||||
if len(self.fittest_dict) > 0:
|
||||
|
||||
file.write('\n The leading Trees and their associated expressions are:')
|
||||
for item in sorted(self.fittest_dict):
|
||||
file.write('\n\t ' + str(item) + ' : ' + str(self.fittest_dict[item]))
|
||||
|
||||
# test the highest numbered Tree and write to the .txt log
|
||||
self.fx_eval_poly(self.population_b[int(item)]) # generate the raw and sympified equation for the given Tree using SymPy
|
||||
fitness_best = 0
|
||||
fittest_tree = 0
|
||||
|
||||
# original method, using pre-built fittest_dict
|
||||
# file.write('\n The leading Trees and their associated expressions are:')
|
||||
# for n in sorted(self.fittest_dict):
|
||||
# file.write('\n\t ' + str(n) + ' : ' + str(self.fittest_dict[n]))
|
||||
|
||||
# revised method, re-evaluating all Trees from stored fitness score
|
||||
for tree_id in range(1, len(self.population_b)):
|
||||
|
||||
fitness = float(self.population_b[tree_id][12][1])
|
||||
|
||||
if self.kernel == 'c': # display best fit Trees for the CLASSIFY kernel
|
||||
if fitness >= fitness_best: # find the Tree with Maximum fitness score
|
||||
fitness_best = fitness; fittest_tree = tree_id # set best fitness Tree
|
||||
|
||||
elif self.kernel == 'r': # display best fit Trees for the REGRESSION kernel
|
||||
if fitness_best == 0: fitness_best = fitness # set the baseline first time through
|
||||
if fitness <= fitness_best: # find the Tree with Minimum fitness score
|
||||
fitness_best = fitness; fittest_tree = tree_id # set best fitness Tree
|
||||
|
||||
elif self.kernel == 'm': # display best fit Trees for the MATCH kernel
|
||||
if fitness == self.data_train_rows: # find the Tree with a perfect match for all data rows
|
||||
fitness_best = fitness; fittest_tree = tree_id # set best fitness Tree
|
||||
|
||||
# elif self.kernel == '[other]': # display best fit Trees for the [other] kernel
|
||||
# if fitness [>=, <=] fitness_best: # find the Tree with [Maximum or Minimum] fitness score
|
||||
# fitness_best = fitness; fittest_tree = tree_id # set best fitness Tree
|
||||
|
||||
# print 'fitness_best:', fitness_best, 'fittest_tree:', fittest_tree
|
||||
|
||||
|
||||
# test the most fit Tree and write to the .txt log
|
||||
self.fx_eval_poly(self.population_b[int(fittest_tree)]) # generate the raw and sympified equation for the given Tree using SymPy
|
||||
expr = str(self.algo_sym) # get simplified expression and process it by TF - tested 2017 02/02
|
||||
result = self.fx_fitness_eval(expr, self.data_test, get_labels=True)
|
||||
|
||||
file.write('\n\n Tree ' + str(item) + ' yields (sym): ' + str(self.algo_sym))
|
||||
|
||||
if self.kernel == 'c':
|
||||
file.write('\n\n Fitness value: {}'.format(result['fitness']))
|
||||
file.write('\n\n Classification report:\n {}'.format(skm.classification_report(result['solution'], result['labels'][0])))
|
||||
file.write('\n Confusion matrix:\n {}'.format(skm.confusion_matrix(result['solution'], result['labels'][0])))
|
||||
|
||||
file.write('\n\n Tree ' + str(fittest_tree) + ' is the most fit, with expression:')
|
||||
file.write('\n\n ' + str(self.algo_sym))
|
||||
|
||||
if self.kernel == 'c':
|
||||
file.write('\n\n Classification fitness score: {}'.format(result['fitness']))
|
||||
file.write('\n\n Precision-Recall report:\n {}'.format(skm.classification_report(result['solution'], result['labels'][0])))
|
||||
file.write('\n Confusion matrix:\n {}'.format(skm.confusion_matrix(result['solution'], result['labels'][0])))
|
||||
|
||||
elif self.kernel == 'r':
|
||||
MSE, fitness = skm.mean_squared_error(result['result'], result['solution']), result['fitness']
|
||||
file.write('\n\n Fitness value: {}'.format(fitness))
|
||||
file.write('\n\n Regression fitness score: {}'.format(fitness))
|
||||
file.write('\n Mean Squared Error: {}'.format(MSE))
|
||||
|
||||
|
||||
elif self.kernel == 'm':
|
||||
file.write('\n\n Fitness value: {}'.format(result['fitness']))
|
||||
|
||||
file.write('\n\n Matching fitness score: {}'.format(result['fitness']))
|
||||
|
||||
# elif self.kernel == '[other]':
|
||||
# file.write( ... )
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# Use Genetic Programming for Classification and Symbolic Regression
|
||||
# by Kai Staats, MSc; see LICENSE.md
|
||||
# Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
|
||||
# version 1.0.1
|
||||
# version 1.0.3
|
||||
|
||||
'''
|
||||
A word to the newbie, expert, and brave--
|
||||
|
@ -33,7 +33,7 @@ If you include the path to an external dataset, it will auto-load at launch:
|
|||
|
||||
import sys # sys.path.append('modules/') to add the directory 'modules' to the current path
|
||||
import karoo_gp_base_class; gp = karoo_gp_base_class.Base_GP()
|
||||
|
||||
import time
|
||||
|
||||
#++++++++++++++++++++++++++++++++++++++++++
|
||||
# User Defined Configuration |
|
||||
|
@ -157,7 +157,7 @@ gp.evolve_branch = int(0.2 * gp.tree_pop_max) # quantity of a population generat
|
|||
gp.evolve_cross = int(0.7 * gp.tree_pop_max) # quantity of a population generated through Crossover
|
||||
|
||||
gp.tourn_size = 10 # qty of individuals entered into each tournament (standard 10); can be adjusted in 'i'nteractive mode
|
||||
gp.precision = 4 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded
|
||||
gp.precision = 10 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded
|
||||
|
||||
|
||||
#++++++++++++++++++++++++++++++++++++++++++
|
||||
|
@ -171,6 +171,8 @@ constructed from scratch. All parameters which define the Trees were set by the
|
|||
If the user has selected 'Play' mode, this is the only generation to be constructed, and then GP Karoo terminates.
|
||||
'''
|
||||
|
||||
start = time.time() # start the clock for the timer
|
||||
|
||||
filename = '' # temp place holder
|
||||
gp.fx_karoo_data_load(tree_type, tree_depth_base, filename)
|
||||
gp.generation_id = 1 # set initial generation ID
|
||||
|
@ -205,7 +207,7 @@ if gp.display != 's':
|
|||
print ' Evaluate the first generation of Trees ...'
|
||||
if gp.display == 'i': gp.fx_karoo_pause(0)
|
||||
|
||||
gp.fx_fitness_gym(gp.population_a) # 1) extract polynomial from each Tree; 2) evaluate fitness, store; 3) display
|
||||
gp.fx_fitness_gym(gp.population_a) # generate expression, evaluate fitness, compare fitness
|
||||
gp.fx_archive_tree_write(gp.population_a, 'a') # save the first generation of Trees to disk
|
||||
|
||||
# no need to continue if only 1 generation or fewer than 10 Trees were designated by the user
|
||||
|
@ -247,7 +249,9 @@ for gp.generation_id in range(2, gp.generation_max + 1): # loop through 'generat
|
|||
# "End of line, man!" --CLU |
|
||||
#++++++++++++++++++++++++++++++++++++++++++
|
||||
|
||||
print '\n \033[36m Karoo GP has an ellapsed time of \033[0;0m\033[31m%f\033[0;0m' % (time.time() - start), '\033[0;0m'
|
||||
|
||||
gp.fx_archive_tree_write(gp.population_b, 'f') # save the final generation of Trees to disk
|
||||
gp.fx_karoo_eol()
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
# Use Genetic Programming for Classification and Symbolic Regression
|
||||
# by Kai Staats, MSc; see LICENSE.md
|
||||
# Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
|
||||
# version 1.0.1
|
||||
# version 1.0.3
|
||||
|
||||
'''
|
||||
A word to the newbie, expert, and brave--
|
||||
|
@ -54,11 +54,12 @@ import karoo_gp_base_class; gp = karoo_gp_base_class.Base_GP()
|
|||
ap = argparse.ArgumentParser(description = 'Karoo GP Server')
|
||||
ap.add_argument('-ker', action = 'store', dest = 'kernel', default = 'm', help = '[c,r,m] fitness function: (r)egression, (c)lassification, or (m)atching')
|
||||
ap.add_argument('-typ', action = 'store', dest = 'type', default = 'r', help = '[f,g,r] Tree type: (f)ull, (g)row, or (r)amped half/half')
|
||||
ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 3, help = '[3...10] maximum Tree depth for the initial population')
|
||||
ap.add_argument('-max', action = 'store', dest = 'depth_max', default = 3, help = '[3...10] maximum Tree depth for the entire run')
|
||||
ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 5, help = '[3...10] maximum Tree depth for the initial population')
|
||||
ap.add_argument('-max', action = 'store', dest = 'depth_max', default = 5, help = '[3...10] maximum Tree depth for the entire run')
|
||||
ap.add_argument('-min', action = 'store', dest = 'depth_min', default = 3, help = '[3...100] minimum number of nodes')
|
||||
ap.add_argument('-pop', action = 'store', dest = 'pop_max', default = 100, help = '[10...1000] maximum population')
|
||||
ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 10, help = '[1...100] number of generations')
|
||||
ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 30, help = '[1...100] number of generations')
|
||||
ap.add_argument('-tor', action = 'store', dest = 'tor_size', default = 10, help = '[1...max pop] tournament size')
|
||||
ap.add_argument('-fil', action = 'store', dest = 'filename', default = 'files/data_MATCH.csv', help = '/path/to_your/[data].csv')
|
||||
|
||||
args = ap.parse_args()
|
||||
|
@ -79,8 +80,8 @@ gp.evolve_point = int(0.0 * gp.tree_pop_max) # quantity of a population generate
|
|||
gp.evolve_branch = int(0.2 * gp.tree_pop_max) # quantity of a population generated through Branch Mutation
|
||||
gp.evolve_cross = int(0.7 * gp.tree_pop_max) # quantity of a population generated through Crossover
|
||||
|
||||
gp.tourn_size = 10 # qty of individuals entered into each tournament (standard 10); can be adjusted in 'i'nteractive mode
|
||||
gp.precision = 4 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded
|
||||
gp.tourn_size = int(args.tor_size) # qty of individuals entered into each tournament; can be adjusted in 'i'nteractive mode
|
||||
gp.precision = 4 # the number of floating points for the round function in 'fx_fitness_eval'
|
||||
|
||||
# run Karoo GP
|
||||
gp.karoo_gp(tree_type, tree_depth_base, filename)
|
||||
|
|
Loading…
Reference in New Issue