# coding=utf-8
"""
.. moduleauthor:: Shuhua Gao
The module :mod:`entity` defines the core data structures used in GEP, including the gene, the chromosome, the
K-expression and the expression tree. We use primitives (functions and terminals) to build genes and then compose
a chromosome with one or multiple genes. Refer to the :mod:`geppy.core.symbol` module for information on primitives.
A chromosome composed of only one gene is called a *monogenic* chromosome, while one composed of more than one genes
is named a *multigenic* chromosome. A multigenic chromosome can be assigned a linking function to combine the results
from multiple genes into a single result.
"""
import copy
from ..tools.generator import *
from ..core.symbol import Function, RNCTerminal, Terminal
_DEBUG = False
[docs]class KExpression(list):
"""
Class representing the K-expression, or the open reading frame (ORF), of a gene in GEP. A K-expression is actually
a linear form of an expression tree obtained by level-order traversal.
"""
[docs] def __init__(self, content):
"""
Initialize a K-expression.
:param content: iterable, each element is a primitive (function or terminal)
"""
list.__init__(self, content)
[docs] def __str__(self):
"""
Get a string representation of this expression by joining the name of each primitive.
:return:
"""
return '[' + ', '.join(ele.name for ele in self) + ']'
def __repr__(self):
return str(self.__class__) + '[{}]'.format(', '.join(repr(p) for p in self))
[docs]class ExpressionTree:
"""
Class representing an expression tree (ET) in GEP, which may be obtained by translating a K-expression, a
gene, or a chromosome, i.e., genotype-phenotype mapping.
"""
[docs] def __init__(self, root):
"""
Initialize a tree with the given *root* node.
:param root: :class:`ExpressionTree.Node`, the root node
"""
self._root = root
@property
def root(self):
"""
Get the root node of this expression tree.
"""
return self._root
[docs] class Node:
"""
Class representing a node in the expression tree. Each node has a variable number of children, depending on
the arity of the primitive at this node.
"""
[docs] def __init__(self, name):
self._children = []
self._name = name
@property
def children(self):
"""
Get the children of this node.
"""
return self._children
@property
def name(self):
"""
Get the name (label) of this node.
"""
return self._name
[docs] @classmethod
def from_genotype(cls, genome):
"""
Create an expression tree by translating *genome*, which may be a K-expression, a gene, or a chromosome.
:param genome: :class:`KExpression`, :class:`Gene`, or :class:`Chromosome`, the genotype of an individual
:return: :class:`ExpressionTree`, an expression tree
"""
if isinstance(genome, Gene):
return cls._from_kexpression(genome.kexpression)
elif isinstance(genome, KExpression):
return cls._from_kexpression(genome)
elif isinstance(genome, Chromosome):
if len(genome) == 1:
return cls._from_kexpression(genome[0].kexpression)
sub_trees = [cls._from_kexpression(gene.kexpression) for gene in genome]
# combine the sub_trees with the linking function
root = cls.Node(genome.linker.__name__)
root.children[:] = sub_trees
return cls(root)
raise TypeError('Only an argument of type KExpression, Gene, and Chromosome is acceptable. The provided '
'genome type is {}.'.format(type(genome)))
@classmethod
def _from_kexpression(cls, expr):
"""
Create an expression tree from a K-expression.
:param expr: a K-expression
:return: :class:`ExpressionTree`, an expression tree
"""
if len(expr) == 0:
return None
# first build a node for each primitive
nodes = [cls.Node(p.name) for p in expr]
# connect each node to its children if any
i = 0
j = 0
while i < len(nodes):
for _ in range(expr[i].arity):
j += 1
nodes[i].children.append(nodes[j])
i += 1
return cls(nodes[0])
[docs]class Gene(list):
"""
A single gene in GEP, which is a fixed-length linear sequence composed of functions and terminals.
"""
[docs] def __init__(self, pset, head_length):
"""
Instantiate a gene.
:param head_length: length of the head domain
:param pset: a primitive set including functions and terminals for genome construction.
Supposing the maximum arity of functions in *pset* is *max_arity*, then the tail length is automatically
determined to be ``tail_length = head_length * (max_arity - 1) + 1``. The genome, i.e., list of symbols in
the instantiated gene is formed randomly from *pset*.
"""
self._head_length = head_length
genome = generate_genome(pset, head_length)
list.__init__(self, genome)
[docs] @classmethod
def from_genome(cls, genome, head_length):
"""
Build a gene directly from the given *genome*.
:param genome: iterable, a list of symbols representing functions and terminals
:param head_length: length of the head domain
:return: :class:`Gene`, a gene
"""
g = super().__new__(cls)
super().__init__(g, genome)
g._head_length = head_length
return g
[docs] def __str__(self):
"""
Return the expression in a human readable string, which is also a legal python code that can be evaluated.
:return: string form of the expression
"""
expr = self.kexpression
i = len(expr) - 1
while i >= 0:
if expr[i].arity > 0: # a function
f = expr[i]
args = []
for _ in range(f.arity):
ele = expr.pop()
if isinstance(ele, str):
args.append(ele)
else: # a terminal or an ephemeral class
args.append(ele.format())
expr[i] = f.format(*reversed(args)) # replace the operator with its result (str)
i -= 1
# the final result is at the root. It may happen that the K-expression contains only one terminal at its root.
return expr[0] if isinstance(expr[0], str) else expr[0].format()
@property
def kexpression(self):
"""Get the K-expression of type :class:`KExpression` represented by this gene.
"""
# get the level-order K expression
expr = KExpression([self[0]])
i = 0
j = 1
while i < len(expr):
for _ in range(expr[i].arity):
expr.append(self[j])
j += 1
i += 1
return expr
@property
def head_length(self):
"""
Get the length of the head domain.
"""
return self._head_length
@property
def tail_length(self):
"""
Get the length of the tail domain.
"""
return len(self) - self.head_length
@property
def max_arity(self):
"""
Get the max arity of the functions in the primitive set with which the gene is built.
"""
return (len(self) - 1) // self.head_length
@property
def head(self):
"""
Get the head domain of the gene.
"""
return self[0: self.head_length]
@property
def tail(self):
"""
Get the tail domain of the gene.
"""
return self[self.head_length: self.head_length + self.tail_length]
def __repr__(self):
"""
Return a string representation of a gene including all the symbols. Mainly for debugging purpose.
:return: a string
"""
info = [str(p) for p in self]
return '{} ['.format(self.__class__) + ', '.join(info) + ']'
[docs]class GeneDc(Gene):
"""
Class represents a gene with an additional Dc domain to handle numerical constants in GEP.
The basic :class:`Gene`
has two domains, a head and a tail, while this :class:`GeneDc` class introduces another domain called *Dc*
after the tail. The length of the Dc domain is equal to the length of the tail domain. The Dc domain only stores
the indices of numbers present in a separate array :meth:`rnc_array`, which collects a group of candidate
random numerical constants (RNCs). Thus, each :class:`GeneDc` instance comes with a *rnc_array*, which are
generally different among different instances.
In addition to the operators of the basic gene expression algorithm (mutation, inversion, transposition, and
recombination), Dc-specific operators in GEP-RNC are also created to better evolve the Dc domain and to manipulate
the attached set of constants *rnc_array*. Such operators are all suffixed with '_dc' in
:mod:`~geppy.tools.crossover` and :mod:`~geppy.tools.mutation` modules, for example, the method
:func:`~geppy.tools.mutation.invert_dc`.
The *rnc_array* associated with each :class:`GeneDc` instance can be provided explicitly when creating
a :class:`GeneDc` instance. See :meth:`from_genome`. Or more generally, a
random number generator *rnc_gen* can be provided, with which a specified number of RNCs are generated during the
creation of gene instances.
A special terminal of type :class:`~geppy.core.symbol.TerminalRNC` is used internally to represent the RNCs.
.. note::
To create an effective :class:`GeneDc` gene, the primitive set should contain at least one
:class:`~geppy.core.symbol.TerminalRNC` terminal. See :meth:`~geppy.core.symbol.PrimitiveSet.add_rnc`
for details.
Refer to Chapter 5 of [FC2006]_ for more knowledge about GEP-RNC.
"""
[docs] def __init__(self, pset, head_length, rnc_gen, rnc_array_length):
"""
Initialize a gene with a Dc domain.
:param head_length: length of the head domain
:param pset: a primitive set including functions and terminals for genome construction.
:param rnc_gen: callable, which should generate a random number when called by ``rnc_gen()``.
:param rnc_array_length: int, number of random numerical constant candidates associated with this gene,
usually 10 is enough
Supposing the maximum arity of functions in *pset* is *max_arity*, then the tail length is automatically
determined to be ``tail_length = head_length * (max_arity - 1) + 1``. The genome, i.e., list of symbols in
the instantiated gene is formed randomly from *pset*. The length of Dc domain is equal to *tail_length*, i.e.,
the tail domain and the Dc domain share the same length.
"""
# first generate the gene without Dc
super().__init__(pset, head_length)
t = head_length * (pset.max_arity - 1) + 1
d = t
# complement it with a Dc domain
self._rnc_gen = rnc_gen
dc = generate_dc(rnc_array_length, dc_length=d)
self.extend(dc)
# generate the rnc array
self._rnc_array = [self._rnc_gen() for _ in range(rnc_array_length)]
[docs] @classmethod
def from_genome(cls, genome, head_length, rnc_array):
"""
Build a gene directly from the given *genome* and the random numerical constant (RNC) array *rnc_array*.
:param genome: iterable, a list of symbols representing functions and terminals (especially the special RNC
terminal of type :class:`~geppy.core.symbol.TerminalRNC`). This genome should have three
domains: head, tail and Dc. The Dc domain should be composed only of integers representing indices into the
*rnc_array*.
:param head_length: length of the head domain. The length of the tail and Dc domain should follow the rule and
can be determined from the head_length: ``dc_length = tail_length = (len(genome) - head_length) / 2``.
:param rnc_array: the RNC array associated with the gene, which contains random constant candidates
:return: :class:`GeneDc`, a gene
"""
g = super().from_genome(genome, head_length) # the genome is copied and the head-length is fixed
g._rnc_array = copy.deepcopy(rnc_array)
return g
@property
def dc_length(self):
"""
Get the length of the Dc domain, which is equal to :meth:`GeneDc.tail_length`
"""
return self.tail_length
@property
def rnc_array(self):
"""
Get the random numerical array (RNC) associated with this gene.
"""
return self._rnc_array
@property
def tail_length(self):
"""
Get the tail domain length.
"""
return (len(self) - self.head_length) // 2
@property
def max_arity(self):
"""
Get the max arity of functions in the primitive set used to build this gene.
"""
# determine from the head-tail-Dc length relations
return (len(self) - 2 + self.head_length) // (2 * self.head_length)
@property
def dc(self):
"""
Get the Dc domain of this gene.
:return:
"""
return self[self.head_length + self.tail_length: self.head_length + self.tail_length + self.dc_length]
# def __deepcopy__(self, memodict):
# return self.__class__.from_genome(self, self.head_length, self.rnc_array)
[docs] def __str__(self):
"""
Return the expression in a human readable string, which is also a legal python code that can be evaluated.
The special terminal representing a RNC will be replaced by their true values retrieved from the array
:meth:`rnc_array`.
:return: string form of the expression
"""
expr = self.kexpression
n_total_rncs = sum(1 if isinstance(p, RNCTerminal) else 0 for p in expr) # how many RNCs in total?
n_encountered_rncs = 0 # how many RNCs we have encountered in reverse order?
i = len(expr) - 1
while i >= 0:
if expr[i].arity > 0: # a function
f = expr[i]
args = []
for _ in range(f.arity):
ele = expr.pop()
if isinstance(ele, str):
args.append(ele)
else: # a terminal or a RNC terminal
if isinstance(ele, RNCTerminal):
# retrieve its true value
which = n_total_rncs - n_encountered_rncs - 1
index = self.dc[which]
value = self.rnc_array[index]
args.append(str(value))
n_encountered_rncs += 1
else: # a normal terminal
args.append(ele.format())
expr[i] = f.format(*reversed(args)) # replace the operator with its result (str)
i -= 1
# the final result is at the root
if isinstance(expr[0], str):
return expr[0]
if isinstance(expr[0], RNCTerminal): # only contains a single RNC, let's retrieve its value
return str(self.rnc_array[self.dc[0]])
return expr[0].format() # only contains a normal terminal
@property
def kexpression(self):
"""
Get the K-expression of type :class:`KExpression` represented by this gene. The involved RNC terminal will be
replaced by a constant terminal with its value retrived from the :meth:`rnc_array` according to the GEP-RNC
algorithm.
"""
n_rnc = 0
def convert_RNC(p):
nonlocal n_rnc
if isinstance(p, RNCTerminal):
index = self.dc[n_rnc]
value = self.rnc_array[index]
n_rnc += 1
t = Terminal(str(value), value)
return t
return p
# level-order
expr = KExpression([convert_RNC(self[0])])
i = 0
j = 1
while i < len(expr):
for _ in range(expr[i].arity):
expr.append(convert_RNC(self[j]))
j += 1
i += 1
return expr
def __repr__(self):
return super().__repr__() + ', rnc_array=[' + ', '.join(str(num) for num in self.rnc_array) + ']'
[docs]class Chromosome(list):
"""
Individual representation in gene expression programming (GEP), which may contain one or more genes. Note that in a
multigenic chromosome, all genes must share the same head length and the same primitive set. Each element of
:class:`Chromosome` is of type :class:`Gene`.
"""
[docs] def __init__(self, gene_gen, n_genes, linker=None):
"""
Initialize an individual in GEP.
:param gene_gen: callable, a gene generator, i.e., ``gene_gen()`` should return a gene
:param n_genes: number of genes
:param linker: callable, a linking function
.. note:
If a linker is specified, then it must accept *n_genes* arguments, each produced by one gene. If the
*linker* parameter is the default value ``None``, then for a monogenic chromosome, no linking is applied,
while for a multigenic chromosome, the ``tuple`` function is used.
"""
list.__init__(self, (gene_gen() for _ in range(n_genes)))
self._linker = linker
[docs] @classmethod
def from_genes(cls, genes, linker=None):
"""
Create a chromosome instance by providing a list of genes directly. The number of genes is implicitly specified
by ``len(genes)``.
:param genes: iterable, a list of genes
:param linker: callable, a linking function. Refer to :meth:`Chromosome.__init__` for more details.
:return: :class:`Chromosome`, a chromosome
.. caution:
Every gene of type :class:`Gene` in *genes* should be an independent object. That is, for any two genes
*g1* and *g2*, the assertion ``g1 is g1`` should return ``False``. Otherwise, unexpected results may happen
in following evolution.
"""
c = super().__new__(cls)
super().__init__(c, genes)
c._linker = linker
return c
@property
def head_length(self):
"""
Get the length of the head domain. All genes in a chromosome should have the same head length.
"""
return self[0].head_length
@property
def tail_length(self):
"""
Get the length of the tail domain. All genes in a chromosome should have the same tail length.
"""
return self[0].tail_length
@property
def max_arity(self):
"""
Get the max arity of the functions in the primitive set with which all genes are built. Note that all genes
in a chromosome should share the same primitive set.
"""
return self[0].max_arity
[docs] def __str__(self):
"""
Return the expressions in a human readable string.
"""
if self.linker is not None:
return '{0}(\n\t{1}\n)'.format(self.linker.__name__, ',\n\t'.join(str(g) for g in self))
else:
assert len(self) == 1
return str(self[0])
@property
def kexpressions(self):
"""
Get a list of K-expressions for all the genes in this chromosome.
"""
return [g.kexpression for g in self]
@property
def linker(self):
"""
Get the linking function.
"""
if self._linker is None and len(self) > 1:
return tuple
return self._linker
def __repr__(self):
"""
Return a string representation of the chromosome including all the symbols in all the genes.
Mainly for debugging purpose.
:return: a string
"""
return '{}[\n\t'.format(self.__class__) + ',\n\t'.join(repr(g) for g in self) + \
'\n], linker={}'.format(self.linker)