Reputation: 45
I'm looking for effective means of adding or omitting code in order to help my genetic algorithm program return faster results. The goal of the program is to accept a string and create other strings that match as closely as possible. Whichever newly made strings match the closest(the top 5) mate with others and produces offspring(some of which have mutations that put a new random number into the string without affecting the length). It all works fine, but it takes an unfathomable amount of generations to get some of the longer strings(4 and up) to perfectly match. Sorry about the tl;dr length, but here's my current code. Criticize away!
#include "stdio.h"
#include "fstream"
#include "ctime"
#include "iostream"
#include "string"
#include "windows.h"
#define CHARACTERS 16
#define STRINGS 100
/*
Enter String(max 16 chars)
Generate 100 words of the same length
Check for Fitness(how close each word is to the string)
Every generation: display top 5
Clone the top 5
Top 20 reproduce(mix each other's chars)
1/1000 chance the children might mutate(each newly mixed string or char might have a completely random number)
*/
typedef struct _stringHolder
{
char randString[CHARACTERS];
int fitness;
}StringHolder;
//Randomly generate 100 words
void generate(char *myString, StringHolder *SH)
{
unsigned seed = time(0);
srand(seed);
//int i = 0;
int j = 0;
char randChar;
//char showString[CHARACTERS];
for(int i=0; i<STRINGS; i++)
{
for(int j=0; j<strlen(myString); j++)
{
randChar = ('a' + (rand() %26));
SH[i].randString[j] = randChar;
}
//limiter so that it doesn't crash
SH[i].randString[strlen(myString)] = 0;
}
}
//Check the similarity of the random strings to the original string.
void getFitness(char *myString, StringHolder *SH)
{
for(int i=0; i<STRINGS; i++)
{
for(int j=0; j<strlen(myString); j++)
{
if(SH[i].randString[j] == myString[j])
{ SH[i].fitness++; }
}
}
}
//Sort the strings
void sortByFitness(char *myString, StringHolder *SH)
{
bool swapped = 1;
while(swapped)
{
swapped = 0;
for(int a=0; a<STRINGS-1; a++)
{
if(SH[a].fitness < SH[a+1].fitness)
{
swapped = 1;
StringHolder temp[STRINGS];
temp[a] = SH[a+1]/*.randString[i]*/;
SH[a+1]/*.randString[i]*/ = SH[a]/*.randString[i]*/;
SH[a]/*.randString[i]*/ = temp[a];
/*if(SH[a].fitness < SH[a+1].fitness)
{ swapped = 0; }*/
}
}
}//while
}
//Clone the Top 5 strings
void cloneTopFive(char *myString, StringHolder *SH, StringHolder *cloneString)
{
for(int i=0; i<5; i++)
{
cloneString[i]/*.randString[j]*/ = SH[i]/*.randString[j]*/;
//printf("cloneString[%d] now holds %s.\n", i, SH[i].randString);
}
}
//Reproduce the Top 20 strings by mixing and matching elements between strings
void reproduceTopTwenty(char *myString, StringHolder *SH /*char *cloneString*/)
{
/*for(int h=5; h<95; h++)
{*/
for(int i=0; i<20; i++)
{
for(int j=0; j<strlen(myString)-1; j++)
{
//char temp[16];
//temp[i] =
SH[i].randString[j] = SH[1 + (rand() %20)].randString[1 + (rand() %strlen(myString)-1)];
int randomNumber;
randomNumber = (1 +(rand() %100));
if(randomNumber == 7)
{
SH[i].randString[1 + (rand() %strlen(myString)-1)] = ('a' + (rand() %26));
}
}
}
}
//Randomize the other 75 numbers and place the cloned Top 5 at the end of the String Holder(SH)
void randomizeOther75(char *myString, StringHolder *SH, StringHolder *cloneString)
{
for(int i=20; i<STRINGS; i++)
{
for(int j=0; j<strlen(myString); j++)
{
SH[i].randString[j] = ('a' + (rand() %26));
}
}
for(int i=0; i<5; i++)
{
for(int j=0; j<strlen(myString); j++)
{
int v = i + 94;
SH[v].randString[j] = cloneString[i].randString[j];
}
}
}
void printGen(char *myString, StringHolder *SH)
{
for(int i=0; i<5; i++)
{
if(SH[i].fitness == strlen(myString))
{ printf("%s has %d fitness. Perfection!\n", SH[i].randString, SH[i].fitness); }
else
printf("%s has %d fitness.\n", SH[i].randString, SH[i].fitness);
}
}
void main()
{
char myString[CHARACTERS];
StringHolder cloneString[5];
StringHolder SH[STRINGS];
for(int i=0; i<STRINGS; i++)
{ SH[i].fitness = 0; }
printf("Enter your name(no whitespaces): ");
scanf("%s", myString);
/*while(strlen(myString) >= CHARACTERS)
{
printf("Please type a string with less than 16 characters\n");
scanf("%s", myString);
}*/
//printf("%s\n", myString);
//first generation
generate(myString, SH);
int gen = 0;
while(1)
{
char x = ' ';
/* printf("Insert something. Anything!");
scanf(&x);*/
/*char newString[CHARACTERS];
for(int i=0; i<5; i++)
{
for( int j=0; j< strlen(myString); j++)
{
newString[j] = SH[i].randString[j];
}
newString[strlen(myString)] = 0;
printf("%s has %d fitness.\n", newString, SH[i].fitness);
}*/
printf("\n");
while(x==' ')
{
printf("Generation %d: \n", gen);
getFitness(myString, SH);
sortByFitness(myString, SH);
printGen(myString, SH);
for(int i=0; i<STRINGS; i++)
{ SH[i].fitness = 0; }
cloneTopFive(myString, SH, cloneString);
reproduceTopTwenty(myString, SH);
randomizeOther75(myString, SH, cloneString);
/*getFitness(myString, SH);
sortByFitness(myString, SH);
for(int i=0; i<5; i++)
{
printf("%s has %d fitness.\n", SH[i].randString, SH[i].fitness);
}
printf("\n");*/
//printf("\nInsert ' ' to continue!\n");
//scanf("%c",&x);
gen++;
}
}
Upvotes: 1
Views: 4546
Reputation: 51
One of the big reasons for the GA to converge poorly is your fitness function. Disregarding potential coding errors in other parts of the programm, what you do is rewarding only perfectly matched letters. The fitness landscape is like this (fear my ASCII art!):
___________ ___________ | | |_| a b c d e f G h i j k l m
Where G is the desired letter. The algorithm has no clue how to find G but through sheer luck. You've basically implemented a randomized letter-wise brute-force search.
Make the fitness function reward "closeness" to the correct solution and convergence will be much faster. Also tweak population parameters, mutation, crossover etc.
Upvotes: 5
Reputation: 296
You need to look at the parameters of your GA. Your population is far too small for such simple computations. You shouldn't have any trouble bumping it up to at least 1000, if not 10 or 100K. You simply don't have enough solutions in the pool to converge to a good result quickly.
In addition your elitism (the number of candidates you clone for the next generation) is rather high. You generally don't want to go above 2% for elitism.
You might also look at how you're doing your crossover function. You generally want to perform crossover for the whole population, rather than just the top 20%. Pass all 95 of your non-cloned values into your crossover function and you will see more diversity in your population.
As Cameron said, your issues likely lie in your parameters rather than your code, and that is a completely different issue, but this should help you on your way. Good luck!
Upvotes: 0
Reputation: 54326
Unfortunately, the nature of genetic algorithms means that sometimes you just need to tweak parameters and see if you can make it find a solution faster. Try cloning the top 10 individuals, or the top 7, or the top 3. Change your top 20 to (e.g.) 50. Increase or decrease the mutation rate.
Sadly we do not yet understand enough about the GA to be able to determine "correct" parameters without this kind of tweaking.
Code optimisation is a whole separate question which could make each generation run faster, but I suspect the problem you're having is that it is taking too many generations so I won't speak to that.
Upvotes: 0