see README for v1.0.3 updates

pull/6/head
Kai Staats 2017-06-06 20:57:09 -07:00
parent 9cb83ce4c1
commit 1fb65b2781
16 changed files with 3250 additions and 290 deletions

View File

@ -0,0 +1,11 @@
coefficients
.1
.2
.3
.4
.5
1
2
3
4
5
1 coefficients
2 .1
3 .2
4 .3
5 .4
6 .5
7 1
8 2
9 3
10 4
11 5

View File

@ -0,0 +1,151 @@
sl,sw,pl,pw,s
5.1,3.5,1.4,0.2,0
4.9,3,1.4,0.2,0
4.7,3.2,1.3,0.2,0
4.6,3.1,1.5,0.2,0
5,3.6,1.4,0.2,0
5.4,3.9,1.7,0.4,0
4.6,3.4,1.4,0.3,0
5,3.4,1.5,0.2,0
4.4,2.9,1.4,0.2,0
4.9,3.1,1.5,0.1,0
5.4,3.7,1.5,0.2,0
4.8,3.4,1.6,0.2,0
4.8,3,1.4,0.1,0
4.3,3,1.1,0.1,0
5.8,4,1.2,0.2,0
5.7,4.4,1.5,0.4,0
5.4,3.9,1.3,0.4,0
5.1,3.5,1.4,0.3,0
5.7,3.8,1.7,0.3,0
5.1,3.8,1.5,0.3,0
5.4,3.4,1.7,0.2,0
5.1,3.7,1.5,0.4,0
4.6,3.6,1,0.2,0
5.1,3.3,1.7,0.5,0
4.8,3.4,1.9,0.2,0
5,3,1.6,0.2,0
5,3.4,1.6,0.4,0
5.2,3.5,1.5,0.2,0
5.2,3.4,1.4,0.2,0
4.7,3.2,1.6,0.2,0
4.8,3.1,1.6,0.2,0
5.4,3.4,1.5,0.4,0
5.2,4.1,1.5,0.1,0
5.5,4.2,1.4,0.2,0
4.9,3.1,1.5,0.2,0
5,3.2,1.2,0.2,0
5.5,3.5,1.3,0.2,0
4.9,3.6,1.4,0.1,0
4.4,3,1.3,0.2,0
5.1,3.4,1.5,0.2,0
5,3.5,1.3,0.3,0
4.5,2.3,1.3,0.3,0
4.4,3.2,1.3,0.2,0
5,3.5,1.6,0.6,0
5.1,3.8,1.9,0.4,0
4.8,3,1.4,0.3,0
5.1,3.8,1.6,0.2,0
4.6,3.2,1.4,0.2,0
5.3,3.7,1.5,0.2,0
5,3.3,1.4,0.2,0
7,3.2,4.7,1.4,1
6.4,3.2,4.5,1.5,1
6.9,3.1,4.9,1.5,1
5.5,2.3,4,1.3,1
6.5,2.8,4.6,1.5,1
5.7,2.8,4.5,1.3,1
6.3,3.3,4.7,1.6,1
4.9,2.4,3.3,1,1
6.6,2.9,4.6,1.3,1
5.2,2.7,3.9,1.4,1
5,2,3.5,1,1
5.9,3,4.2,1.5,1
6,2.2,4,1,1
6.1,2.9,4.7,1.4,1
5.6,2.9,3.6,1.3,1
6.7,3.1,4.4,1.4,1
5.6,3,4.5,1.5,1
5.8,2.7,4.1,1,1
6.2,2.2,4.5,1.5,1
5.6,2.5,3.9,1.1,1
5.9,3.2,4.8,1.8,1
6.1,2.8,4,1.3,1
6.3,2.5,4.9,1.5,1
6.1,2.8,4.7,1.2,1
6.4,2.9,4.3,1.3,1
6.6,3,4.4,1.4,1
6.8,2.8,4.8,1.4,1
6.7,3,5,1.7,1
6,2.9,4.5,1.5,1
5.7,2.6,3.5,1,1
5.5,2.4,3.8,1.1,1
5.5,2.4,3.7,1,1
5.8,2.7,3.9,1.2,1
6,2.7,5.1,1.6,1
5.4,3,4.5,1.5,1
6,3.4,4.5,1.6,1
6.7,3.1,4.7,1.5,1
6.3,2.3,4.4,1.3,1
5.6,3,4.1,1.3,1
5.5,2.5,4,1.3,1
5.5,2.6,4.4,1.2,1
6.1,3,4.6,1.4,1
5.8,2.6,4,1.2,1
5,2.3,3.3,1,1
5.6,2.7,4.2,1.3,1
5.7,3,4.2,1.2,1
5.7,2.9,4.2,1.3,1
6.2,2.9,4.3,1.3,1
5.1,2.5,3,1.1,1
5.7,2.8,4.1,1.3,1
6.3,3.3,6,2.5,2
5.8,2.7,5.1,1.9,2
7.1,3,5.9,2.1,2
6.3,2.9,5.6,1.8,2
6.5,3,5.8,2.2,2
7.6,3,6.6,2.1,2
4.9,2.5,4.5,1.7,2
7.3,2.9,6.3,1.8,2
6.7,2.5,5.8,1.8,2
7.2,3.6,6.1,2.5,2
6.5,3.2,5.1,2,2
6.4,2.7,5.3,1.9,2
6.8,3,5.5,2.1,2
5.7,2.5,5,2,2
5.8,2.8,5.1,2.4,2
6.4,3.2,5.3,2.3,2
6.5,3,5.5,1.8,2
7.7,3.8,6.7,2.2,2
7.7,2.6,6.9,2.3,2
6,2.2,5,1.5,2
6.9,3.2,5.7,2.3,2
5.6,2.8,4.9,2,2
7.7,2.8,6.7,2,2
6.3,2.7,4.9,1.8,2
6.7,3.3,5.7,2.1,2
7.2,3.2,6,1.8,2
6.2,2.8,4.8,1.8,2
6.1,3,4.9,1.8,2
6.4,2.8,5.6,2.1,2
7.2,3,5.8,1.6,2
7.4,2.8,6.1,1.9,2
7.9,3.8,6.4,2,2
6.4,2.8,5.6,2.2,2
6.3,2.8,5.1,1.5,2
6.1,2.6,5.6,1.4,2
7.7,3,6.1,2.3,2
6.3,3.4,5.6,2.4,2
6.4,3.1,5.5,1.8,2
6,3,4.8,1.8,2
6.9,3.1,5.4,2.1,2
6.7,3.1,5.6,2.4,2
6.9,3.1,5.1,2.3,2
5.8,2.7,5.1,1.9,2
6.8,3.2,5.9,2.3,2
6.7,3.3,5.7,2.5,2
6.7,3,5.2,2.3,2
6.3,2.5,5,1.9,2
6.5,3,5.2,2,2
6.2,3.4,5.4,2.3,2
5.9,3,5.1,1.8,2
1 sl sw pl pw s
2 5.1 3.5 1.4 0.2 0
3 4.9 3 1.4 0.2 0
4 4.7 3.2 1.3 0.2 0
5 4.6 3.1 1.5 0.2 0
6 5 3.6 1.4 0.2 0
7 5.4 3.9 1.7 0.4 0
8 4.6 3.4 1.4 0.3 0
9 5 3.4 1.5 0.2 0
10 4.4 2.9 1.4 0.2 0
11 4.9 3.1 1.5 0.1 0
12 5.4 3.7 1.5 0.2 0
13 4.8 3.4 1.6 0.2 0
14 4.8 3 1.4 0.1 0
15 4.3 3 1.1 0.1 0
16 5.8 4 1.2 0.2 0
17 5.7 4.4 1.5 0.4 0
18 5.4 3.9 1.3 0.4 0
19 5.1 3.5 1.4 0.3 0
20 5.7 3.8 1.7 0.3 0
21 5.1 3.8 1.5 0.3 0
22 5.4 3.4 1.7 0.2 0
23 5.1 3.7 1.5 0.4 0
24 4.6 3.6 1 0.2 0
25 5.1 3.3 1.7 0.5 0
26 4.8 3.4 1.9 0.2 0
27 5 3 1.6 0.2 0
28 5 3.4 1.6 0.4 0
29 5.2 3.5 1.5 0.2 0
30 5.2 3.4 1.4 0.2 0
31 4.7 3.2 1.6 0.2 0
32 4.8 3.1 1.6 0.2 0
33 5.4 3.4 1.5 0.4 0
34 5.2 4.1 1.5 0.1 0
35 5.5 4.2 1.4 0.2 0
36 4.9 3.1 1.5 0.2 0
37 5 3.2 1.2 0.2 0
38 5.5 3.5 1.3 0.2 0
39 4.9 3.6 1.4 0.1 0
40 4.4 3 1.3 0.2 0
41 5.1 3.4 1.5 0.2 0
42 5 3.5 1.3 0.3 0
43 4.5 2.3 1.3 0.3 0
44 4.4 3.2 1.3 0.2 0
45 5 3.5 1.6 0.6 0
46 5.1 3.8 1.9 0.4 0
47 4.8 3 1.4 0.3 0
48 5.1 3.8 1.6 0.2 0
49 4.6 3.2 1.4 0.2 0
50 5.3 3.7 1.5 0.2 0
51 5 3.3 1.4 0.2 0
52 7 3.2 4.7 1.4 1
53 6.4 3.2 4.5 1.5 1
54 6.9 3.1 4.9 1.5 1
55 5.5 2.3 4 1.3 1
56 6.5 2.8 4.6 1.5 1
57 5.7 2.8 4.5 1.3 1
58 6.3 3.3 4.7 1.6 1
59 4.9 2.4 3.3 1 1
60 6.6 2.9 4.6 1.3 1
61 5.2 2.7 3.9 1.4 1
62 5 2 3.5 1 1
63 5.9 3 4.2 1.5 1
64 6 2.2 4 1 1
65 6.1 2.9 4.7 1.4 1
66 5.6 2.9 3.6 1.3 1
67 6.7 3.1 4.4 1.4 1
68 5.6 3 4.5 1.5 1
69 5.8 2.7 4.1 1 1
70 6.2 2.2 4.5 1.5 1
71 5.6 2.5 3.9 1.1 1
72 5.9 3.2 4.8 1.8 1
73 6.1 2.8 4 1.3 1
74 6.3 2.5 4.9 1.5 1
75 6.1 2.8 4.7 1.2 1
76 6.4 2.9 4.3 1.3 1
77 6.6 3 4.4 1.4 1
78 6.8 2.8 4.8 1.4 1
79 6.7 3 5 1.7 1
80 6 2.9 4.5 1.5 1
81 5.7 2.6 3.5 1 1
82 5.5 2.4 3.8 1.1 1
83 5.5 2.4 3.7 1 1
84 5.8 2.7 3.9 1.2 1
85 6 2.7 5.1 1.6 1
86 5.4 3 4.5 1.5 1
87 6 3.4 4.5 1.6 1
88 6.7 3.1 4.7 1.5 1
89 6.3 2.3 4.4 1.3 1
90 5.6 3 4.1 1.3 1
91 5.5 2.5 4 1.3 1
92 5.5 2.6 4.4 1.2 1
93 6.1 3 4.6 1.4 1
94 5.8 2.6 4 1.2 1
95 5 2.3 3.3 1 1
96 5.6 2.7 4.2 1.3 1
97 5.7 3 4.2 1.2 1
98 5.7 2.9 4.2 1.3 1
99 6.2 2.9 4.3 1.3 1
100 5.1 2.5 3 1.1 1
101 5.7 2.8 4.1 1.3 1
102 6.3 3.3 6 2.5 2
103 5.8 2.7 5.1 1.9 2
104 7.1 3 5.9 2.1 2
105 6.3 2.9 5.6 1.8 2
106 6.5 3 5.8 2.2 2
107 7.6 3 6.6 2.1 2
108 4.9 2.5 4.5 1.7 2
109 7.3 2.9 6.3 1.8 2
110 6.7 2.5 5.8 1.8 2
111 7.2 3.6 6.1 2.5 2
112 6.5 3.2 5.1 2 2
113 6.4 2.7 5.3 1.9 2
114 6.8 3 5.5 2.1 2
115 5.7 2.5 5 2 2
116 5.8 2.8 5.1 2.4 2
117 6.4 3.2 5.3 2.3 2
118 6.5 3 5.5 1.8 2
119 7.7 3.8 6.7 2.2 2
120 7.7 2.6 6.9 2.3 2
121 6 2.2 5 1.5 2
122 6.9 3.2 5.7 2.3 2
123 5.6 2.8 4.9 2 2
124 7.7 2.8 6.7 2 2
125 6.3 2.7 4.9 1.8 2
126 6.7 3.3 5.7 2.1 2
127 7.2 3.2 6 1.8 2
128 6.2 2.8 4.8 1.8 2
129 6.1 3 4.9 1.8 2
130 6.4 2.8 5.6 2.1 2
131 7.2 3 5.8 1.6 2
132 7.4 2.8 6.1 1.9 2
133 7.9 3.8 6.4 2 2
134 6.4 2.8 5.6 2.2 2
135 6.3 2.8 5.1 1.5 2
136 6.1 2.6 5.6 1.4 2
137 7.7 3 6.1 2.3 2
138 6.3 3.4 5.6 2.4 2
139 6.4 3.1 5.5 1.8 2
140 6 3 4.8 1.8 2
141 6.9 3.1 5.4 2.1 2
142 6.7 3.1 5.6 2.4 2
143 6.9 3.1 5.1 2.3 2
144 5.8 2.7 5.1 1.9 2
145 6.8 3.2 5.9 2.3 2
146 6.7 3.3 5.7 2.5 2
147 6.7 3 5.2 2.3 2
148 6.3 2.5 5 1.9 2
149 6.5 3 5.2 2 2
150 6.2 3.4 5.4 2.3 2
151 5.9 3 5.1 1.8 2

View File

@ -0,0 +1,6 @@
a,b,c,s
0,1,2,3
1,2,3,6
2,3,4,9
3,4,5,12
4,5,6,15
1 a b c s
2 0 1 2 3
3 1 2 3 6
4 2 3 4 9
5 3 4 5 12
6 4 5 6 15

View File

@ -0,0 +1,6 @@
a,b,c,s
0,1,2,3
1,2,3,6
2,3,4,9
3,4,5,12
4,5,6,15
1 a b c s
2 0 1 2 3
3 1 2 3 6
4 2 3 4 9
5 3 4 5 12
6 4 5 6 15

View File

@ -0,0 +1,10 @@
t,r,s
0.241,0.39,0.98
.615,0.72,1.01
1.00,1.00,1.00
1.88,1.52,1.01
11.8,5.20,0.99
29.5,9.54,1.00
84.0,19.18,1.00
165,30.06,1.00
248,39.44,1.00
1 t r s
2 0.241 0.39 0.98
3 .615 0.72 1.01
4 1.00 1.00 1.00
5 1.88 1.52 1.01
6 11.8 5.20 0.99
7 29.5 9.54 1.00
8 84.0 19.18 1.00
9 165 30.06 1.00
10 248 39.44 1.00

View File

@ -0,0 +1,5 @@
operator, arity
+,2
-,2
*,2
/,2
1 operator arity
2 + 2
3 - 2
4 * 2
5 / 2

View File

@ -0,0 +1,5 @@
operator, arity
+,2
-,2
*,2
/,2
1 operator arity
2 + 2
3 - 2
4 * 2
5 / 2

View File

@ -0,0 +1,5 @@
operator, arity
+,2
-,2
*,2
/,2
1 operator arity
2 + 2
3 - 2
4 * 2
5 / 2

View File

@ -0,0 +1,5 @@
operator, arity
+,2
-,2
*,2
/,2
1 operator arity
2 + 2
3 - 2
4 * 2
5 / 2

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,257 @@
# Karoo GP Main (desktop)
# Use Genetic Programming for Classification and Symbolic Regression
# by Kai Staats, MSc; see LICENSE.md
# Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
# version 1.0.3
'''
A word to the newbie, expert, and brave--
Even if you are highly experienced in Genetic Programming, it is recommended that you review the 'Karoo User Guide'
before running this application. While your computer will not burst into flames nor will the sun collapse into a black
hole if you do not, you will likely find more enjoyment of this particular flavour of GP with a little understanding
of its intent and design.
KAROO GP DESKTOP
This is the Karoo GP desktop application. It presents a simple yet functional user interface for configuring each
Karoo GP run. While this can be launched on a remote server, you may find that once you get the hang of using Karoo,
and are in more of a production mode than one of experimentation, using karoo_gp_server.py is more to your liking as
it provides both a scripted and/or command-line launch vehicle.
To launch Karoo GP desktop:
$ python karoo_gp_main.py
(or from iPython)
$ run karoo_gp_main.py
If you include the path to an external dataset, it will auto-load at launch:
$ python karoo_gp_main.py /[path]/[to_your]/[filename].csv
'''
import sys # sys.path.append('modules/') to add the directory 'modules' to the current path
import karoo_gp_base_class; gp = karoo_gp_base_class.Base_GP()
import time
#++++++++++++++++++++++++++++++++++++++++++
# User Defined Configuration |
#++++++++++++++++++++++++++++++++++++++++++
'''
Karoo GP queries the user for key parameters, some of which may be adjusted during run-time
at user invoked pauses. See the User Guide for meaning and value of each of the following parameters.
Future versions will enable all of these parameters to be configured via an external configuration file and/or
command-line arguments passed at launch.
'''
gp.karoo_banner()
print ''
menu = ['c','r','m','p','']
while True:
try:
gp.kernel = raw_input('\t Select (c)lassification, (r)egression, (m)atching, or (p)lay (default m): ')
if gp.kernel not in menu: raise ValueError()
gp.kernel = gp.kernel or 'm'; break
except ValueError: print '\t\033[32m Select from the options given. Try again ...\n\033[0;0m'
except KeyboardInterrupt: sys.exit()
if gp.kernel == 'p':
menu = ['f','g','']
while True:
try:
tree_type = raw_input('\t Select (f)ull or (g)row method (default f): ')
if tree_type not in menu: raise ValueError()
tree_type = tree_type or 'f'; break
except ValueError: print '\t\033[32m Select from the options given. Try again ...\n\033[0;0m'
except KeyboardInterrupt: sys.exit()
else:
menu = ['f','g','r','']
while True:
try:
tree_type = raw_input('\t Select (f)ull, (g)row, or (r)amped 50/50 method (default r): ')
if tree_type not in menu: raise ValueError()
tree_type = tree_type or 'r'; break
except ValueError: print '\t\033[32m Select from the options given. Try again ...\n\033[0;0m'
except KeyboardInterrupt: sys.exit()
menu = range(1,11)
while True:
try:
tree_depth_base = raw_input('\t Enter depth of the \033[3minitial\033[0;0m population of Trees (default 3): ')
if tree_depth_base not in str(menu) or tree_depth_base == '0': raise ValueError()
tree_depth_base = tree_depth_base or 3; tree_depth_base = int(tree_depth_base); break
except ValueError: print '\t\033[32m Enter a number from 1 including 10. Try again ...\n\033[0;0m'
except KeyboardInterrupt: sys.exit()
if gp.kernel == 'p': # if the Play kernel is selected
gp.tree_depth_max = tree_depth_base
gp.tree_pop_max = 1
gp.display = 'm'
else: # if any other kernel is selected
if tree_type == 'f': gp.tree_depth_max = tree_depth_base
else: # if type is Full, the maximum Tree depth for the full run is equal to the initial population
menu = range(tree_depth_base,11)
while True:
try:
gp.tree_depth_max = raw_input('\t Enter maximum Tree depth (default matches \033[3minitial\033[0;0m): ')
if gp.tree_depth_max not in str(menu) or gp.tree_depth_max == '0': raise ValueError()
gp.tree_depth_max = gp.tree_depth_max or tree_depth_base; gp.tree_depth_max = int(gp.tree_depth_max); break
# gp.tree_depth_max = int(gp.tree_depth_max) - tree_depth_base; break
except ValueError: print '\t\033[32m Enter a number >= the maximum Tree depth. Try again ...\n\033[0;0m'
except KeyboardInterrupt: sys.exit()
menu = range(3,101)
while True:
try:
gp.tree_depth_min = raw_input('\t Enter minimum number of nodes for any given Tree (default 3): ')
if gp.tree_depth_min not in str(menu) or gp.tree_depth_min == '0': raise ValueError()
gp.tree_depth_min = gp.tree_depth_min or 3; gp.tree_depth_min = int(gp.tree_depth_min); break
except ValueError: print '\t\033[32m Enter a number from 3 to 2^(depth + 1) - 1 including 100. Try again ...\n\033[0;0m'
except KeyboardInterrupt: sys.exit()
menu = range(10,1001)
while True:
try:
gp.tree_pop_max = raw_input('\t Enter number of Trees in each population (default 100): ')
if gp.tree_pop_max not in str(menu) or gp.tree_pop_max == '0': raise ValueError()
gp.tree_pop_max = gp.tree_pop_max or 100; gp.tree_pop_max = int(gp.tree_pop_max); break
except ValueError: print '\t\033[32m Enter a number from 10 including 1000. Try again ...\n\033[0;0m'
except KeyboardInterrupt: sys.exit()
menu = range(1,101)
while True:
try:
gp.generation_max = raw_input('\t Enter max number of generations (default 10): ')
if gp.generation_max not in str(menu) or gp.generation_max == '0': raise ValueError()
gp.generation_max = gp.generation_max or 10; gp.generation_max = int(gp.generation_max); break
except ValueError: print '\t\033[32m Enter a number from 1 including 100. Try again ...\n\033[0;0m'
except KeyboardInterrupt: sys.exit()
menu = ['i','g','m','s','db','']
while True:
try:
gp.display = raw_input('\t Display (i)nteractive, (g)eneration, (m)iminal, (s)ilent, or (d)e(b)ug (default m): ')
if gp.display not in menu: raise ValueError()
gp.display = gp.display or 'm'; break
except ValueError: print '\t\033[32m Select from the options given. Try again ...\n\033[0;0m'
except KeyboardInterrupt: sys.exit()
# define the ratio between types of mutation, where all sum to 1.0; can be adjusted in 'i'nteractive mode
gp.evolve_repro = int(0.1 * gp.tree_pop_max) # quantity of a population generated through Reproduction
gp.evolve_point = int(0.0 * gp.tree_pop_max) # quantity of a population generated through Point Mutation
gp.evolve_branch = int(0.2 * gp.tree_pop_max) # quantity of a population generated through Branch Mutation
gp.evolve_cross = int(0.7 * gp.tree_pop_max) # quantity of a population generated through Crossover
gp.tourn_size = 10 # qty of individuals entered into each tournament (standard 10); can be adjusted in 'i'nteractive mode
gp.precision = 10 # the number of floating points for the round function in 'fx_fitness_eval'; hard coded
#++++++++++++++++++++++++++++++++++++++++++
# Construct First Generation of Trees |
#++++++++++++++++++++++++++++++++++++++++++
'''
Karoo GP constructs the first generation of Trees. All subsequent generations evolve from priors, with no new Trees
constructed from scratch. All parameters which define the Trees were set by the user in the previous section.
If the user has selected 'Play' mode, this is the only generation to be constructed, and then GP Karoo terminates.
'''
start = time.time() # start the clock for the timer
filename = '' # temp place holder
gp.fx_karoo_data_load(tree_type, tree_depth_base, filename)
gp.generation_id = 1 # set initial generation ID
gp.population_a = ['Karoo GP by Kai Staats, Generation ' + str(gp.generation_id)] # an empty list which will store all Tree arrays, one generation at a time
gp.fx_karoo_construct(tree_type, tree_depth_base) # construct the first population of Trees
if gp.kernel != 'p': print '\n We have constructed a population of', gp.tree_pop_max,'Trees for Generation 1\n'
else: # EOL for Play mode
gp.fx_display_tree(gp.tree) # print the current Tree
gp.fx_archive_tree_write(gp.population_a, 'a') # save this one Tree to disk
sys.exit()
#++++++++++++++++++++++++++++++++++++++++++
# Evaluate First Generation of Trees |
#++++++++++++++++++++++++++++++++++++++++++
'''
Karoo GP evaluates the first generation of Trees. This process flattens each GP Tree into a standard
equation by means of a recursive algorithm and subsequent processing by the SymPy library which
simultaneously evaluates the Tree for its results, returns null for divide by zero, reorganises
and then rewrites the expression in its simplest form.
If the user has defined only 1 generation, then this is the end of the run. Else, Karoo GP
continues into multi-generational evolution.
'''
if gp.display != 's':
print ' Evaluate the first generation of Trees ...'
if gp.display == 'i': gp.fx_karoo_pause(0)
gp.fx_fitness_gym(gp.population_a) # generate expression, evaluate fitness, compare fitness
gp.fx_archive_tree_write(gp.population_a, 'a') # save the first generation of Trees to disk
# no need to continue if only 1 generation or fewer than 10 Trees were designated by the user
if gp.tree_pop_max < 10 or gp.generation_max == 1:
gp.fx_archive_params_write('Desktop') # save run-time parameters to disk
gp.fx_karoo_eol()
sys.exit()
#++++++++++++++++++++++++++++++++++++++++++
# Evolve Multiple Generations |
#++++++++++++++++++++++++++++++++++++++++++
'''
Karoo GP moves into multi-generational evolution.
In the following four evolutionary methods, the global list of arrays 'gp.population_a' is repeatedly recycled as
the prior generation from which the local list of arrays 'gp.population_b' is created, one array at a time. The ratio of
invocation of the four evolutionary processes for each generation is set by the parameters in the 'User Defined
Configuration' (top).
'''
for gp.generation_id in range(2, gp.generation_max + 1): # loop through 'generation_max'
print '\n Evolve a population of Trees for Generation', gp.generation_id, '...'
gp.population_b = ['GP Tree by Kai Staats, Evolving Generation'] # initialise population_b to host the next generation
gp.fx_fitness_gene_pool() # generate the viable gene pool (compares against gp.tree_depth_min)
gp.fx_karoo_reproduce() # method 1 - Reproduction
gp.fx_karoo_point_mutate() # method 2 - Point Mutation
gp.fx_karoo_branch_mutate() # method 3 - Branch Mutation
gp.fx_karoo_crossover() # method 4 - Crossover Reproduction
gp.fx_eval_generation() # evaluate all Trees in a single generation
gp.population_a = gp.fx_evolve_pop_copy(gp.population_b, ['GP Tree by Kai Staats, Generation ' + str(gp.generation_id)])
#++++++++++++++++++++++++++++++++++++++++++
# "End of line, man!" --CLU |
#++++++++++++++++++++++++++++++++++++++++++
print '\n \033[36m Karoo GP has an ellapsed time of \033[0;0m\033[31m%f\033[0;0m' % (time.time() - start), '\033[0;0m'
gp.fx_archive_tree_write(gp.population_b, 'f') # save the final generation of Trees to disk
gp.fx_karoo_eol()

View File

@ -0,0 +1,89 @@
# Karoo GP Server
# Use Genetic Programming for Classification and Symbolic Regression
# by Kai Staats, MSc; see LICENSE.md
# Thanks to Emmanuel Dufourq and Arun Kumar for support during 2014-15 devel; TensorFlow support provided by Iurii Milovanov
# version 1.0.3
'''
A word to the newbie, expert, and brave--
Even if you are highly experienced in Genetic Programming, it is recommended that you review the 'Karoo User Guide'
before running this application. While your computer will not burst into flames nor will the sun collapse into a black
hole if you do not, you will likely find more enjoyment of this particular flavour of GP with a little understanding
of its intent and design.
KAROO GP SERVER
This is the Karoo GP server application. It can be internally scripted, fully command-line configured, or a combination
of both. If this is your first time using Karoo GP, please run the desktop application karoo_gp_main.py first in order
that you come to understand the full functionality of this particular Genetic Programming platform.
To launch Karoo GP server:
$ python karoo_gp_server.py
(or from iPython)
$ run karoo_gp_server.py
Without any arguments, Karoo GP relies entirely upon the scripted settings and the datasets located in karoo_gp/files/.
If you include the path to an external dataset, it will auto-load at launch:
$ python karoo_gp_server.py /[path]/[to_your]/[filename].csv
You can include a number of additional arguments which override the default values, as follows:
-ker [r,c,m] fitness function: (r)egression, (c)lassification, or (m)atching
-typ [f,g,r] Tree type: (f)ull, (g)row, or (r)amped half/half
-bas [3...10] maximum Tree depth for the initial population
-max [3...10] maximum Tree depth for the entire run
-min [3...100] minimum number of nodes
-pop [10...1000] maximum population
-gen [1...100] number of generations
Note that if you include any of the above flags, then you must also include a flag to load an external dataset:
$ python karoo_gp_server.py -ker c -typ r -bas 4 -fil /[path]/[to_your]/[filename].csv
'''
import sys # sys.path.append('modules/') to add the directory 'modules' to the current path
import argparse
import karoo_gp_base_class; gp = karoo_gp_base_class.Base_GP()
ap = argparse.ArgumentParser(description = 'Karoo GP Server')
ap.add_argument('-ker', action = 'store', dest = 'kernel', default = 'm', help = '[c,r,m] fitness function: (r)egression, (c)lassification, or (m)atching')
ap.add_argument('-typ', action = 'store', dest = 'type', default = 'r', help = '[f,g,r] Tree type: (f)ull, (g)row, or (r)amped half/half')
ap.add_argument('-bas', action = 'store', dest = 'depth_base', default = 5, help = '[3...10] maximum Tree depth for the initial population')
ap.add_argument('-max', action = 'store', dest = 'depth_max', default = 5, help = '[3...10] maximum Tree depth for the entire run')
ap.add_argument('-min', action = 'store', dest = 'depth_min', default = 3, help = '[3...100] minimum number of nodes')
ap.add_argument('-pop', action = 'store', dest = 'pop_max', default = 100, help = '[10...1000] maximum population')
ap.add_argument('-gen', action = 'store', dest = 'gen_max', default = 30, help = '[1...100] number of generations')
ap.add_argument('-tor', action = 'store', dest = 'tor_size', default = 10, help = '[1...max pop] tournament size')
ap.add_argument('-fil', action = 'store', dest = 'filename', default = 'files/data_MATCH.csv', help = '/path/to_your/[data].csv')
args = ap.parse_args()
# pass the argparse defaults and/or user inputs to the required variables
gp.kernel = str(args.kernel)
tree_type = str(args.type)
tree_depth_base = int(args.depth_base)
gp.tree_depth_max = int(args.depth_max)
gp.tree_depth_min = int(args.depth_min)
gp.tree_pop_max = int(args.pop_max)
gp.generation_max = int(args.gen_max)
filename = str(args.filename)
gp.display = 's' # display mode is set to (s)ilent
gp.evolve_repro = int(0.1 * gp.tree_pop_max) # quantity of a population generated through Reproduction
gp.evolve_point = int(0.0 * gp.tree_pop_max) # quantity of a population generated through Point Mutation
gp.evolve_branch = int(0.2 * gp.tree_pop_max) # quantity of a population generated through Branch Mutation
gp.evolve_cross = int(0.7 * gp.tree_pop_max) # quantity of a population generated through Crossover
gp.tourn_size = int(args.tor_size) # qty of individuals entered into each tournament; can be adjusted in 'i'nteractive mode
gp.precision = 4 # the number of floating points for the round function in 'fx_fitness_eval'
# run Karoo GP
gp.karoo_gp(tree_type, tree_depth_base, filename)

View File

@ -1,77 +0,0 @@
# Karoo Iris Plot
# by Kai Staats, MSc UCT / AIMS and Arun Kumar, PhD
# version 0.9.2.1
import sys
import numpy as np
import matplotlib.pyplot as mpl
from mpl_toolkits.mplot3d import Axes3D
np.set_printoptions(linewidth = 320) # set the terminal to print 320 characters before line-wrapping in order to view Trees
'''
THIS SCRIPT IS NOT YET COMPLETE!
This is a functional yet *not* complete script designed to help you visualise your 2D or 3D data against a
function generated by Karoo GP. The script currently uses a simple plot of evenly spaced data, not the real data from
the Iris dataset.
Once complete, by default, this script will plot a Karoo GP derived function against a scatter plot of one of the Iris
datasets included with this package: karoo_gp/files/Iris_dataset/data_IRIS_virginica-vs-setosa_3-col_PLOT.csv
If you are new to plotting, https://www.youtube.com/channel/UCfzlCWGWYyIQ0aLC5w48gBQ for a good plotting tutorial
provides a good, visual tutorial, as do many, many other web and video based guides.
'''
### USER INTERACTION ###
if len(sys.argv) == 1:
filename = '../files/Iris_dataset/data_IRIS_virginica-vs-setosa_3-col_PLOT.csv'
print '\n\t\033[31mYou have not assigned an input file, therefore "IRIS_virginica-vs-setosa_3-col_PLOT" will be used.\033[0;0m'
elif len(sys.argv) > 2: print '\n\t\033[31mERROR! You have assigned too many command line arguments. Try again ...\033[0;0m'; sys.exit()
else: filename = sys.argv[1]
### LOAD THE DATA and PREPARE AN EMPTY ARRAY ###
print '\n\t\033[36mLoading dataset:', filename, '\033[0;0m\n'
data = np.loadtxt(filename, delimiter=',', dtype = str)
data_a, data_b, data_c = [], [], []
tmp = data[:,0]
for n in range(len(tmp)):
data_a.append(float(tmp[n]))
tmp = data[:,1]
for n in range(len(tmp)):
data_b.append(float(tmp[n]))
tmp = data[:,2]
for n in range(len(tmp)):
data_c.append(float(tmp[n]))
### PREP THE FUNCTION ###
b = np.arange(2, 4, 0.25) # plot from n to m in steps o
c = np.arange(2, 4, 0.25) # plot from n to m in steps o
b, c = np.meshgrid(b, c)
# -b*c + c**2 + c - 1 # Karoo GP derived function
# -a/c - b**2 + c**2 # Karoo GP derived function
# -a - b + c**2 # Karoo GP derived function becomes a = -b + c**2
a = -b + c**2
### PLOT THE FUNCTION and DATA ###
fig = mpl.figure()
ax = fig.add_subplot(111, projection = '3d')
ax.scatter(data_a, data_b, data_c, c = 'r', marker = 'o') # 3D data
ax.plot_wireframe(a,b,c) # 3D function
ax.set_xlabel('a')
ax.set_ylabel('b')
ax.set_zlabel('c')
mpl.show()

View File

@ -1,68 +0,0 @@
# Karoo Multiclass Classifer Test
# by Kai Staats, MSc UCT / AIMS
# version 0.9.2.1
'''
This is a toy script, designed to allow you to play with multiclass classification using the same underlying function
as employed by Karoo GP. Keep in mind that a linear multiclass classifier such as this is suited only for data which
itself has a linear (eg: time series) component, else GP will struggle to force the data to fit.
'''
from numpy import arange
while True:
try:
class_type = raw_input('\t Select (i)nfinite or (f)inite wing bins (default i): ')
if class_type not in ('i','f',''): raise ValueError()
class_type = class_type or 'i'; break
except ValueError: print '\033[32mSelect from the options given. Try again ...\n\033[0;0m'
n = range(1,100)
while True:
try:
class_labels = raw_input('\t Enter the number of class labels / solutions (default 4): ')
if class_labels not in str(n) and class_labels not in '': raise ValueError()
if class_labels == '0': class_labels = 1; break
class_labels = class_labels or 4; class_labels = int(class_labels); break
except ValueError: print '\033[32m Enter a number from 3 including 100. Try again ...\n\033[0;0m'
skew = (class_labels / 2) - 1
min_val = 0 - skew - 1 # add a data point to the left
if class_labels & 1: max_val = 0 + skew + 3 # add a data point to the right if odd number of class labels
else: max_val = 0 + skew + 2 # add a data point to the right if even number of class labels
print '\n\t solutions =', range(class_labels)
print '\t results = [', min_val, '...', max_val,']'
print '\t skew =', skew, '\n'
if class_type == 'i':
for result in arange(min_val, max_val, 0.5):
for solution in range(class_labels):
if solution == 0 and result <= 0 - skew: # check for the first class
fitness = 1; print '\t\033[36m\033[1m class', solution, '\033[0;0m\033[36mas\033[1m', result, '\033[0;0m\033[36m<=', 0 - skew, '\033[0;0m'
elif solution == class_labels - 1 and result > solution - 1 - skew: # check for the last class
fitness = 1; print '\t\033[36m\033[1m class', solution, '\033[0;0m\033[36mas\033[1m', result, '\033[0;0m\033[36m>', solution - 1 - skew, '\033[0;0m'
elif solution - 1 - skew < result <= solution - skew: # check for class bins between first and last
fitness = 1; print '\t\033[36m\033[1m class', solution, '\033[0;0m\033[36mas', solution - 1 - skew, '<\033[1m', result, '\033[0;0m\033[36m<=', solution - skew, '\033[0;0m'
else: fitness = 0 #; print '\t\033[36m no match for', result, 'in class', solution, '\033[0;0m' # no class match
# print ''
if class_type == 'f':
for result in arange(min_val, max_val, .5):
for solution in range(class_labels):
if solution - 1 - skew < result <= solution - skew: # check for discrete, finite class bins
fitness = 1; print '\t\033[36m\033[1m class', solution, '\033[0;0m\033[36mas', solution - 1 - skew, '<\033[1m', result, '\033[0;0m\033[36m<=', solution - skew, '\033[0;0m'
else: fitness = 0 #; print '\t\033[36m no match for', result, 'in class', solution, '\033[0;0m' # no class match
# print ''

View File

@ -1,78 +0,0 @@
# Karoo Data Normalisation
# by Kai Staats, MSc UCT
# version 0.9.2.1
import sys
import numpy as np
np.set_printoptions(linewidth = 320) # set the terminal to print 320 characters before line-wrapping in order to view Trees
'''
This script works with a dataset to prepare a new, normalised dataset. It does so by comparing all values in each given
column, finding the maximum and minimum values, and then modifying each value to fall between a high of 1 and low of 0.
The modified values are written to a new file, the original remaining untouched.
This script can be used *after* karoo_features_sort.py, and assumes no header has yet been applied to the .csv.
'''
def normalise(array):
'''
The formula was derived from stn.spotfire.com/spotfire_client_help/norm/norm_normalizing_columns.htm
'''
norm = []
array_norm = []
array_min = np.min(array)
array_max = np.max(array)
for col in range(1, len(array) + 1):
# norm = float((array[col - 1] - array_min) / (array_max - array_min))
norm = float(array[col - 1] - array_min) / float(array_max - array_min)
norm = round(norm, fp) # force to 4 decimal points
array_norm = np.append(array_norm, norm)
return array_norm
### USER INTERACTION ###
if len(sys.argv) == 1: print '\n\t\033[31mERROR! You have not assigned an input file. Try again ...\033[0;0m'; sys.exit()
elif len(sys.argv) > 2: print '\n\t\033[31mERROR! You have assigned too many command line arguments. Try again ...\033[0;0m'; sys.exit()
else: filename = sys.argv[1]
n = range(1,9)
while True:
try:
fp = raw_input('\n\tEnter number of floating points desired in normalised data (default 4): ')
if fp not in str(n) and fp not in '': raise ValueError()
if fp == '0': fp = 1; break
fp = fp or 4; fp = int(fp); break
except ValueError: print '\n\t\033[32mEnter a number from 1 including 8. Try again ...\033[0;0m'
### LOAD THE DATA and PREPARE AN EMPTY ARRAY ###
print '\n\t\033[36mLoading dataset:', filename, '\033[0;0m\n'
data = np.loadtxt(filename, delimiter = ',') # load data
data_norm = np.zeros(shape = (data.shape[0], data.shape[1])) # build an empty dataset which matches the shape of the original
### NORMALISE THE DATA ###
for col in range(data.shape[1] - 1):
print '\tnormalising column:', col
colsum = []
for row in range(data.shape[0]):
colsum = np.append(colsum, data[row,col])
data_norm[:,col] = normalise(colsum) # add each normalised column of data
data_norm[:,data.shape[1] - 1] = data[:,data.shape[1] - 1] # add the labels again
### SAVE THE NORMALISED DATA ###
file_tmp = filename.split('.')[0]
np.savetxt(file_tmp + '-NORM.csv', data_norm, delimiter = ',')
print '\n\t\033[36mThe normlised dataset has been written to the file:', file_tmp + '-NORM.csv', '\033[0;0m'

View File

@ -1,67 +0,0 @@
# Karoo Dataset Builder
# by Kai Staats, MSc UCT / AIMS and Arun Kumar, PhD
# version 0.9.2.1
import sys
import numpy as np
np.set_printoptions(linewidth = 320) # set the terminal to print 320 characters before line-wrapping in order to view Trees
'''
In machine learning, it is often the case that your engaged dataset is derived from a larger parent. In constructing
the subset, if we grab a series of datapoints (rows in a .csv) from the larger dataset in sequential order, only from
the top, middle, or bottom, we will likely bias the new dataset and incorrectly train the machine learning algorithm.
Therefore, it is imperative that we engage a random function, guided only by the number of data points for each class.
This script can be used *before* karoo_normalise.py, and assumes no header has yet been applied to the .csv.
'''
### USER INTERACTION ###
if len(sys.argv) == 1: print '\n\t\033[31mERROR! You have not assigned an input file. Try again ...\033[0;0m'; sys.exit()
elif len(sys.argv) > 2: print '\n\t\033[31mERROR! You have assigned too many command line arguments. Try again ...\033[0;0m'; sys.exit()
else: filename = sys.argv[1]
#n = range(1,101)
#while True:
# try:
# labels = raw_input('\n\tEnter number of unique class labels, or 0 for a regression dataset (default 2): ')
# if labels not in str(n) and labels not in '': raise ValueError()
# # if labels == '0': labels = 1; break
# labels = labels or 2; labels = int(labels); break
# except ValueError: print '\n\t\033[32mEnter a number from 0 including 100. Try again ...\033[0;0m'
n = range(10,10001)
while True:
try:
samples = raw_input('\n\tEnter number of desired datapoints per class (default 100): ')
if samples not in str(n) and samples not in '': raise ValueError()
if samples == '0': samples = 10; break
samples = samples or 100; samples = int(samples); break
except ValueError: print '\n\t\033[32mEnter a number from 10 including 10000. Try again ...\033[0;0m'
### LOAD THE ORIGINAL DATASET ###
print '\n\t\033[36m\n\tLoading dataset:', filename, '\033[0;0m\n'
data = np.loadtxt(filename, delimiter = ',') # load data
data_sort = np.empty(shape = [0, data.shape[1]]) # build an empty array of the proper dimensions
### SORT DATA by LABEL ###
labels = len(np.unique(data[:,-1]))
for label in range(labels):
data_list = np.where(data[:,-1] == label) # build a list of all rows which end in the current label
data_select = np.random.choice(data_list[0], samples, replace = False) # select user defined 'samples' from list
print data_select
data_sort = np.append(data_sort, data[data_select], axis = 0)
### SAVE THE SORTED DATASET ###
file_tmp = filename.split('.')[0]
np.savetxt(file_tmp + '-SORT.csv', data_sort, delimiter = ',')
print '\n\t\033[36mThe sorted dataset has been written to the file:', file_tmp + '-SORT.csv', '\033[0;0m'