/*
// usage ... command-line:
// ~: ./strplot 'to be or not to be' 'to be or not to be'
// token based dotplot:
// [+][ ][ ][ ][+][ ]
// [ ][+][ ][ ][ ][+]
// [ ][ ][+][ ][ ][ ]
// [ ][ ][ ][+][ ][ ]
// [+][ ][ ][ ][+][ ]
// [ ][+][ ][ ][ ][+]
//
// character based distance:
// levenshtein distance: 0
// one : to be or not to be
// two : to be or not to be
*/
#include
#include
#include
#define MAXTOKENS 256
#define MAXLINE 1024
char **split(char *string, char *delim);
int levenshtein(char *stra, char *strb);
int main(int argc, char *argv[]) {
char *delim = ".,:;`'\"+-_(){}[]<>*&^%$#@!?~/|\\= \t\r\n1234567890";
char *str1 = NULL;
char *str2 = NULL;
char **stok1 = NULL;
char **stok2 = NULL;
int i = 0, j = 0;
if(argc != 3)
return 1;
else if(strlen(argv[1]) < 5 || strlen(argv[2]) < 5)
return 1;
else {
str1 = argv[1];
str2 = argv[2];
}
stok1 = split(str1, delim);
stok2 = split(str2, delim);
printf("token based dotplot:\n");
for(i = 0; stok1[i] != NULL; i++) {
for(j = 0; stok2[j] != NULL; j++) {
if(strcmp(stok1[i], stok2[j]) == 0)
printf("[+]");
else
printf("[ ]");
}
printf("\n");
}
printf("\n");
printf("character based distance:\n");
printf("levenshtein distance: %d\n", levenshtein(str1, str2));
printf("one : %s\n", str1);
printf("two : %s\n", str2);
for(i = 0; stok1[i] != NULL; i++)
free(stok1[i]);
free(stok1);
for(i = 0; stok2[i] != NULL; i++)
free(stok2[i]);
free(stok2);
return 0;
}
/* calculate levenshtein distance */
int levenshtein(char *stra, char *strb) {
int i, j, k, l, m1, m2, m3, laa, r;
int lengtha, lengthb, lengthmin;
int **d = 0;
char *a = 0;
i = j = k = l = m1 = m2 = m3 = laa = r = 0;
lengtha = lengthb = lengthmin = 0;
lengtha = strlen(stra);
lengthb = strlen(strb);
lengthmin = lengtha;
if(lengthb > lengthmin)
lengthmin = lengthb;
a = (char *)malloc((lengthmin + 2) * sizeof(char));
d = (int **)malloc((lengthmin + 2) * sizeof(int *));
for(i = 0; i < lengthmin + 2; i++)
d[i] = (int *)malloc((lengthmin + 2) * sizeof(int));
for(i = 0; i <= lengtha; i++)
a[i] = stra[i];
laa = lengtha;
d[0][0] = 0;
for(i = 1; i <= lengtha; i++)
d[i][0] = d[i-1][0]+1;
for(j = 1; j <= lengthb; j++)
d[0][j] = d[0][j-1]+1;
for(i = 1; i <= laa; i++) {
for(j = 1;j <= lengthb; j++) {
r = 0;
if(a[i-1] != strb[j-1]) {
r = 1;
a[i-1] = strb[j-1];
}
m1 = d[i-1][j-1]+r;
for(k = lengtha; k >= i-1; k--) a[k+1] = a[k];
a[j-1] = strb[j-1];
lengtha++;
m2 = d[i][j-1]+1;
for(k = i-1; k < lengtha; k++) a[k] = a[k+1];
lengtha--;
m3 = d[i-1][j]+1;
r = m1;
if( m2 < r) r = m2;
if(m3 < r) r = m3;
d[i][j] = r;
lengtha = laa;
for(l = 0; l <= lengtha; l++) a[l] = stra[l];
}
}
r = d[lengtha][lengthb];
free(a);
for(i = 0; i < lengthmin+2; i++)
free(d[i]);
free(d);
return(r);
}
/* split string into tokens, return token array */
char **split(char *string, char *delim) {
char **tokens = NULL;
char *working = NULL;
char *token = NULL;
int idx = 0;
tokens = malloc(sizeof(char *) * MAXTOKENS);
if(tokens == NULL)
return NULL;
working = malloc(sizeof(char) * strlen(string) + 1);
if(working == NULL)
return NULL;
strcpy(working, string);
for(idx = 0; idx < MAXTOKENS; idx++)
tokens[idx] = NULL;
token = strtok(working, delim);
idx = 0;
/* always keep the last entry NULL terminated */
while((idx < (MAXTOKENS - 1)) && (token != NULL)) {
tokens[idx] = malloc(sizeof(char) * strlen(token) + 1);
if(tokens[idx] != NULL) {
strcpy(tokens[idx], token);
idx++;
token = strtok(NULL, delim);
}
}
free(working);
return tokens;
}