Arthur the Chatterbor

/* VERSION 1 19 MAY 1998*/

// IOANNIS KARYDIS, SIMON STONE, MATHEW SCOINES, KEITH PASSMORE @ Kingston University

#include

char strings[20][100];

int counter;

char sentence2[40];

int no_args;

char outp[100]="\0";

void display(char *);

void temp_man(char *templ, char *response)

{

char qtempl[100]="\0";

char *part1;

char *part2;

/*create array buffers to read template responses, seperate them at the square

bracket and insert another buffer containing the response from the parse

sentence function */

strcpy(qtempl,templ);

part1 = strtok(qtempl, "[");

part2 = strtok(NULL,"\0");

strcat(outp, part1);

strcat(outp, " ");

strcat(outp, response);

strcat(outp, " ");

strcat(outp, part2);

strcat(outp, "\0");

/*return output to display function */

}

char * templ_database(char *sentence, int ident)

{

fstream tpl;//create a new file stream

char buffer[100]="\0";

char templates[20][100];

char identt[30];

int no_r=0;

//open kbase tpl file for input

tpl.open("kbase.tpl",ios::in);

/* allow identity of interrogatives to be read as character strings

not integers */

if (ident == 1)

strcpy(identt,"what");

if (ident == 2)

strcpy(identt,"who");

if (ident == 3)

strcpy(identt,"where");

if (ident == 4)

strcpy(identt,"which");

if (ident == 5)

strcpy(identt,"when");

if (ident == 6)

strcpy(identt,"why");

if (ident == 7)

strcpy(identt,"how");

if (ident == 8)

strcpy(identt,"are");

if (ident == 9)

strcpy(identt,"am");

if (ident == 10)

strcpy(identt,"were");

if (ident == 11)

strcpy(identt,"is");

if (ident == 12)

strcpy(identt,"was");

if (ident == 13)

strcpy(identt,"would");

if (ident == 14)

strcpy(identt,"can");

if (ident == 15)

strcpy(identt,"could");

if (ident == 16)

strcpy(identt,"do");

if (ident == 17)

strcpy(identt,"does");

if (ident == 18)

strcpy(identt,"did");

if (ident == 19)

strcpy(identt,"have");

if (ident == 20)

strcpy(identt,"has");

if (ident == 21)

strcpy(identt,"had");

if (ident == 22)

strcpy(identt,"must");

if (ident == 23)

strcpy(identt,"might");

if (ident == 24)

strcpy(identt,"shall");

if (ident == 25)

strcpy(identt,"should");

if (ident == 26)

strcpy(identt,"like");

/*do this while not at the end of the template file */

while (!tpl.eof())

{

while(buffer[0]!='*' && !tpl.eof())

//while the buffer is not equal to * and not end of file do this

{

//get the line and put it in the buffer up to the size of the buffer

tpl.getline(buffer,sizeof(buffer));

};

/*if broken out of the loop above by reading an asterisk get the next line

in to a buffer */

tpl.getline(buffer,sizeof(buffer));

//compare it to the listed interrogatives

if (strcmp(buffer,identt) == 0)

{

/* read the number of lines in the selection of possible answers. This is

written in the template file as an integer, beneath the asterisk */

tpl >> no_r;

//this next line just moves the file pointer along to the end of the line

// which the >> commnand does not do

tpl.getline(buffer,sizeof(buffer));

//collect all of the lines

for (counter=0;counter < no_r;counter++)

{

tpl.getline(templates[counter],sizeof(buffer));

}

//use a random number to select one of the lines in the buffers as an answer

temp_man(templates[rand() % no_r],sentence);

tpl.close();

//this function constructs the output variable which is used by the display

// function

}

};

return "I don't understand life...";

}

void clean(char *sentence)

//function to remove non alphabetical characters and make everything

//lower case

{

int counter;

for (counter = 0; counter < strlen(sentence); counter++)

{

sentence[counter] = removed(sentence[counter]);

sentence[counter] = tolower(sentence[counter]);

}

for (counter = strlen(sentence) - 1; counter > 0 ; counter--) // Remove empty

{ // spaces

if (sentence[counter] == ' ')

{

sentence[counter] = '\0';

}

else

{

break;

}

//copy sentence into sentence2

strcpy(sentence2, sentence);

}

void display(char *output)

{

/* format web page send output through cgi bin */

cout << "Content-Type: text/HTML \n\n";

cout << "<HTML>\n";

cout << "<HEAD>\n";

cout << "<TITLE>Response</TITLE>\n";

cout << "</HEAD>\n";

cout << "<body bgcolor=\"#FFFFFF\">\n";

cout << "<img src=\"led.gif\" width=\"378\" height=\"64\">\n";

cout << "A ChatterBot by:\n";

cout << "<a href=\"mailto:[email protected]\">Keith\n";

cout << "Passmore</a> \n";

cout << "<a href=\"mailto:[email protected]\">Simon Stone</a> \n";

cout << "<a href=\"mailto:[email protected]\">Ioannis Karydis</a> \n";

cout << "<a href=\"mailto:[email protected]\">Mathew Scoines</a>\n";

cout << "<a href=\"http://www.kingston.ac.uk/~k967325/arthur.htm\">CLICK HERE TO READ HOW ARTHUR WAS DEVELOPED</a>\n";

cout << "<ISINDEX prompt='Please type your question: '>\n\n";

cout << output << "\n";

cout << "</body>\n";

cout << "</html>\n";

}

int search_database(char *sentence)

{

//open database file

fstream db;

//declare no_r as an integer

int no_r;

char responses[20][100];

int counter;

char buffer[80];

db.open("kbase.dat",ios::in); /*open the database */

clean(sentence); //clean the sentence

//sentence2 is returned by clean() sentence2 and then copied into sentence

strcpy(sentence,sentence2);

/*do this while the end of the database file has not been reached */

while (!db.eof())

{

/* while the first character of the array buffer is not an asterisk

and it is not the end of the file, do this */

while(buffer[0]!='*' && !db.eof())

{ //take the next line into the buffer

db.getline(buffer,sizeof(buffer));

/* if the buffer is empty i.e. the line pulled in is blank

then insert the characters 'EEEEEE' just so that the line is

not empty. */

if (strcmp(buffer, "")==0) strcpy(buffer, "EEEE");

};

//otherwise take the next line into the buffer

db.getline(buffer,sizeof(buffer));

//if the buffer is empty insert the characters 'EEEEE'.

if (strcmp(buffer, "")==0) strcpy(buffer, "EEEE");

/* compare the contents of the buffer to the sentence,

looking to match the buffer to any part of the sentence */

if (strstr(sentence,buffer) != NULL)

{

/* if there is a match do this

take the number from the database file that indicates the

number of choices of response available */

db >> no_r;

//nove the line pointer onto the next line as the

//>> command does not do this

db.getline(buffer,sizeof(buffer));

//collect all of the responses available

for (counter=0;counter < no_r;counter++)

{

db.getline(responses[counter],sizeof(buffer));

}

display(responses[rand() % no_r]);

//randomly select one of the responses

db.close(); //close the database

return 0; //return the response to main which sends it to display

}

};

return 1;

}

void response(char *pattern)

{

char rsentence[255];

rsentence[0]='\0'; //create an array for response sentence called rsentence

//declare counter, counter2 and 'i' as integers

int counter=0, counter2=0, i=0;

//create a flag to use later in the program to signal an effect

int ok = 0;

//allow for an integer value for the identity of the question

int ident_question;

//declare rand_num as an integer

int rand_num;

char rpattern[20] = "000000000000000000";

/*initialize a pattern for responses called rpattern to zero.

Compare pattern from parse() to check for valid pattern.

In this case; interrogative, verb, pronoun as the first three digits */

ok = 0;

if (strncmp(pattern, "643", 3) ==0)

{

// set counter to count to the length of the pattern

for (counter=0;counter<strlen(pattern);counter++)

{

/* look for a match between the integer value of the pattern at that

point in the pattern indicated by the counter and the integer value

of an interrogative defined in the header file */

if (pattern[counter]==Interrogative)

{

/* if there is a match to the interrogative,look for a match to the individual

interrogative */

if (strcmp(strings[counter],"what")==0) ident_question=What;

if (strcmp(strings[counter],"when")==0) ident_question=When;

if (strcmp(strings[counter],"where")==0) ident_question=Where;

if (strcmp(strings[counter],"why")==0) ident_question=Why;

if (strcmp(strings[counter],"which")==0) ident_question=Which;

if (strcmp(strings[counter],"who")==0) ident_question=Who;

if (strcmp(strings[counter],"how")==0) ident_question=How;

}

//define the rules for the response to the type of questions listed below

if (ident_question == What || ident_question == How

|| ident_question == Who || ident_question == Why

|| ident_question == Where || ident_question == Which)

{

for (counter=0;counter<strlen(pattern);counter++)

//count through the pattern using the integer values for pronouns

{ for (counter = 0; counter < strlen(rpattern); counter++)

{if (pattern[counter]==Pro)

/*if the pattern is a pronoun, look through the list of pronouns,

compare the strings(words)as they are counted through the

sentence.

pronoun 1 is 'you' if the sentence contains 'you'. The rsentence would

contain 'I' */

if ((strcmp(strings[counter],pronoun[1]))==0)

strcat(rsentence,"I ");

// if the pronoun is 'I', the return sentence will contain 'you'

if((strcmp(strings[counter],pronoun[0]))==0)

strcat(rsentence,"you ");

//if the pronoun is 'he' the return sentence (rsentence) will contain 'he'

if ((strcmp(strings[counter],pronoun[3]))==0)

strcat(rsentence,"he ");

//if the pronoun is 'she' the return sentence will contain 'she'

if ((strcmp(strings[counter],pronoun[4]))==0)

strcat(rsentence,"she ");

//if the pronoun is 'it' the return sentence will contain 'it'

if ((strcmp(strings[counter], pronoun[6]))==0)

strcat(rsentence,"it ");

//if the pronoun is 'they' the return sentence will contain 'they'

if ((strcmp(strings[counter],pronoun[2]))==0)

strcat(rsentence,"they ");

//if the pronoun is 'we' return sentence will contain 'you'

if ((strcmp(strings[counter],pronoun[8]))==0)

strcat(rsentence,"you ");

}

for (counter=0;counter<strlen(rpattern);counter++)

{

/* this line breaks the program out of the loop if the verb is 'does' or 'do'.

The program resumes at the line if pattern[0]=='6' */

if (strcmp(strings[counter], verb[19]) == 0 || strcmp(strings[counter], verb[7])==0)

{

break;

}

/* if the pronoun in the incoming sentence is 'he', 'she' or 'it' the

return sentence will be appended with 'is' */

if (strcmp(strings[counter],pronoun[3])==0 || strcmp(strings[counter],pronoun[4])==0

|| strcmp(strings[counter], pronoun[6])==0)

{

strcat(rsentence,"is ");

}

/* if the pronoun in the incoming sentence is 'they' or 'we', then append the

return sentence with 'are' */

if (strcmp(strings[counter], pronoun[2])==0 || strcmp(strings[counter], pronoun[8])==0)

{

strcat(rsentence, "are ");

}

/* if the incoming sentence pronoun is 'you' then append the return sentence

with 'am' */

if (strcmp(strings[counter], pronoun[1])==0)

{

strcat(rsentence, "am ");

}

/* if the pronoun in the incoming sentence is 'i' then append the return

sentence with 'are' */

if (strcmp(strings[counter], pronoun[0]) == 0)

{

strcat(rsentence, "are ");

}

strcat(rsentence, " ");

//if the first digit of the pattern in the incoming sentence is '6' or '4'

if (pattern[0]=='6' || pattern[0]=='4')

{

//and the length of the pattern is greater than 3 digits

if (strlen(pattern) > 3)

{

//set counter to zero and do the following

counter = 0;

/* this do statement is repeated until all words in the pattern

have been compared */

{

/* if the word at position [counter] is 'does' then set counter2 to counter +1.

This means that only words in the sentence that come after the word 'does'

will be looked at for comparison */

if ((strcmp(strings[counter],verb[19]))==0)

{

counter2 = counter + 1;

//do this while counter2 is less than the number of verbs in the header file

{

//if another word in the pattern is a verb

if (pattern[counter2] == Verb)

{

//add the verb to the sentence

strcat(rsentence, strings[counter2]);

if (strcmp(strings[counter2], verb[7]) != 0

&& strcmp(strings[counter2], verb[20])!=0)

//if the verb is not 'do' or 'go' add an 's'

strcat(rsentence, "s");

//else add 'es'

else strcat (rsentence, "es");

//add an empty space

strcat(rsentence, " ");

//break out of the loop

ok = 1;//this flag is now set to one

break;

}

//increment counter2

counter2++;

} while (counter2 < NO_VERBS - 1);

}//this brace closes the 'if the verb is does' if statement

//set a loop to count through the number of verbs

for(i=0; i < NO_VERBS - 1; i++)

{

//this loop deals with all the second verbs in a sentence if the first verb

// is not 'does'

//find the first verb in a sentence

if((strcmp(strings[counter],verb[i]))==0 )

{

//set counter2 so that all the rest of the word after that are looked at

counter2 = counter + 1;

//do all this while counter two is less than the number of verbs in the header

//file

{

//if a second verb is found

if (pattern[counter2] == Verb)

{

//if that verb is 'shall', 'should', 'could', or 'can'

if (i==17 || i==16 || i==9 || i==10)

{

//add verb to rsentence and add space to rsentence

strcat(rsentence, verb[i]);

strcat(rsentence," ");

}

//if not, if flag is not set to '1' add the next verb encountered

if (ok!=1)

{

strcat(rsentence, strings[counter2]);

}

strcat(rsentence, " ");

//set counter to 500 in order to break loop

counter2 = 500;

break;

}

counter2++;

} while (counter2 < NO_VERBS - 1);

}

counter++;

} while (counter < strlen(pattern));

}

//this code is used to identify a prepositional or article in the

//incoming sentence. It then appends the remainder of the incoming sentence to the

//response sentence after the rules pertaining to the response have been applied

//to the pronoun and verb and possibly interrogative.

for (counter = 0; counter < 8; counter++)

{

for (i = 0; i < 8; i++)

{

if (strcmp(strings[counter], prepositional[i]) == 0)

{

strcat(rsentence, prepositional[i]);

//this counter starts at the word after the prepositional and counts

//to the end of the incoming sentence

for (counter2 = counter + 1; counter2 < no_args - 1; counter2++)

{

strcat(rsentence, " ");

strcat(rsentence, strings[counter2]);

}

counter = 10; //these values are inserted into these counters

//in order to break the loops for i and counter respectively

i = 10;

break;//break is used to break the last loop using counter2

}

else if (strcmp(strings[counter], article[i]) == 0)

{

strcat(rsentence, article[i]);

for (counter2 = counter + 1; counter2 < no_args - 1; counter2++)

{

strcat(rsentence, " ");

strcat(rsentence, strings[counter2]);

}

counter = 10;

i = 10;

break;

}

//this code is for sentences starting verb, pronoun.

else if (strncmp(pattern, "43",2) == 0)

{

for (counter=0;counter<strlen(pattern);counter++)

//this code identifies all the verbs that can be used to start questions

//in this pattern

{

if (pattern[counter]==Verb)

{

if (strcmp(strings[counter],"are")==0) ident_question=Are;

if (strcmp(strings[counter],"am")==0) ident_question=Am;

if (strcmp(strings[counter],"were")==0) ident_question=Were;

if (strcmp(strings[counter],"is")==0) ident_question=Is;

if (strcmp(strings[counter],"was")==0) ident_question=Was;

if (strcmp(strings[counter],"would")==0) ident_question=Would;

if (strcmp(strings[counter],"can")==0) ident_question=Can;

if (strcmp(strings[counter],"could")==0) ident_question=Could;

if (strcmp(strings[counter],"do")==0) ident_question=Do;

if (strcmp(strings[counter],"does")==0) ident_question=Does;

if (strcmp(strings[counter],"did")==0) ident_question=Did;

if (strcmp(strings[counter],"have")==0) ident_question=Have;

if (strcmp(strings[counter],"has")==0) ident_question=Has;

if (strcmp(strings[counter],"had")==0) ident_question=Had;

if (strcmp(strings[counter],"must")==0) ident_question=Must;

if (strcmp(strings[counter],"might")==0) ident_question=Might;

if (strcmp(strings[counter],"shall")==0) ident_question=Shall;

if (strcmp(strings[counter],"should")==0) ident_question=Should;

}

if(ident_question == Are ||ident_question == Am

||ident_question == Were ||ident_question == Is

||ident_question == Was ||ident_question == Would

||ident_question == Can ||ident_question == Could

||ident_question == Do ||ident_question == Does

||ident_question == Did ||ident_question == Have

||ident_question == Has ||ident_question == Had

||ident_question == Must ||ident_question == Might

||ident_question == Shall ||ident_question == Should)

{

for (counter=0;counter<strlen(pattern);counter++)

//sort incoming pronouns to the appropriate output responses in the same way

//as above

{ for (counter = 0; counter < strlen(rpattern); counter++)

{ if (pattern[counter]==Pro)

if ((strcmp(strings[counter],pronoun[1]))==0)

strcat(rsentence,"I ");

if((strcmp(strings[counter],pronoun[0]))==0)

strcat(rsentence,"you ");

if ((strcmp(strings[counter],pronoun[3]))==0)

strcat(rsentence,"he ");

if ((strcmp(strings[counter],pronoun[4]))==0)

strcat(rsentence,"she ");

if ((strcmp(strings[counter], pronoun[6]))==0)

strcat(rsentence,"it ");

if ((strcmp(strings[counter],pronoun[2]))==0)

strcat(rsentence,"they ");

if ((strcmp(strings[counter],pronoun[8]))==0)

strcat(rsentence,"you ");

}

// same code as above for sorting responses

if (strcmp(strings[0], verb[0])==0 || strcmp(strings[0], verb[1])==0

|| strcmp(strings[0], verb[5]) == 0)

{

if (strcmp(strings[1], pronoun[3]) == 0 || strcmp(strings[1], pronoun[4]) == 0

|| strcmp(strings[1], pronoun[6]) == 0)

{

strcat (rsentence, "is ");

}

if (strcmp(strings[1], pronoun[2]) == 0 || strcmp(strings[1], pronoun[8]) == 0)

{

strcat(rsentence, "are ");

}

if (strcmp(strings[1], pronoun[1]) == 0)

{

strcat(rsentence, "am ");

}

if (strcmp(strings[1], pronoun[0]) == 0)

{

strcat(rsentence, "are ");

}

else

{

strcat (rsentence, strings[0]);

}

//take results of the response sentence to be inserted into the templates

templ_database(rsentence,ident_question);

display(outp);

}

void parse()

{

int counter2, counter3;

char pattern[20]="000000000000000000\0";

char output[255];

//breakdown incoming sentence into numerical values for the bit pattern

//the counter in each case has its upper limit set to the number of words in

//the appropriate lexical array

for (counter2=0; counter2 <= counter; counter2++)

{

for (counter3=0; counter3<3; counter3++)

{

if (strcmp(adverb[counter3],strings[counter2])==0)

{

pattern[counter2] = Adv;

}

for (counter3=0; counter3<11; counter3++)

{

if (strcmp(noun[counter3],strings[counter2])==0)

{

pattern[counter2] = Noun;

}

for (counter3=0; counter3<7; counter3++)

{

if (strcmp(interrogative[counter3],strings[counter2])==0)

{

pattern[counter2] = Interrogative;

}

for (counter3=0; counter3< NO_VERBS + 1; counter3++)

{

if (strcmp(verb[counter3],strings[counter2])==0)

{

pattern[counter2] = Verb;

}

for (counter3=0; counter3<10; counter3++)

{

if (strcmp(pronoun[counter3],strings[counter2])==0)

{

pattern[counter2] = Pro;

}

for (counter3=0; counter3<24; counter3++)

{

if (strcmp(adjective[counter3],strings[counter2])==0)

{

pattern[counter2] = Adj;

}

for (counter3=0; counter3<=10; counter3++)

{

if (strcmp(possesive[counter3],strings[counter2])==0)

{

pattern[counter2] = Possesive;

}

pattern[counter+1]='\0';//ends the pattern

}

for (counter2=0; counter2<20; counter2++)

{

if (strcmp(strings[counter2],"his")==0 && pattern[counter2+1] == Noun)

pattern[counter2] = Adj;

}

if (strncmp(pattern,"643",3)==0 || strncmp(pattern,"43",2)==0)

{

//if the pattern is well formed i.e. it matches the two patterns that we have

//defined so far send it to response()

response(pattern);

return ;

}

else

//else return 1 to main and call search_database() function and perform keyword

//search

display(error_resp[rand() % 4]);

}

int main(int argc, char * argv[])

{

time_t t;//declare time structure for use as a seed value for randomising

int counter, counter2, i;

char sent[100]="\0";

//argc is a C++ defined integer that stores the number of arguments passed

//to the program by the operating system

//this copies argc to the global variable no_args

no_args = argc;

//this initialises the random number generator using system's time as seed

srand((unsigned) time(&t));

//argv is a C++ defined array that stores the arguments passed to the program

//the arguments are words taken from the incoming sentence the array that stores

//these words then has to feed them into the sentence being created

if (argv[1])

{

//sent here is the beginning of the sentence structure being built

strcat(sent,argv[1]);

strcat(sent," ");

for (counter = 2; counter < argc; counter++)

{

strcat(sent,argv[counter]);

strcat(sent, " ");

}

::counter = argc - 2;

//this code cleans each word in the array and constructs the strings array

//which is used by the parse function

for (counter = 0; counter < argc-1; counter++)

{

strcpy(strings[counter], argv[counter+1]);

for (i = 0; i < strlen(strings[counter]); i++)

{

strings[counter][i] = removed(strings[counter][i]);

strings[counter][i] = tolower(strings[counter][i]);

for (counter2 = strlen(strings[counter]) - 1; counter2 > 0; counter2--)

{

if (strings[counter][counter2] == ' ')

{

strings[counter][counter2] = '\0';

}

//if the parse function cannot parse the sentence invoke the search_database

//function

if (search_database(sent)!=0)

parse();

}

//if there are no arguments passed to the program display the empty HTML page

// prompt for input

else

{

display(" ");

}

return 0;

}