]> mj.ucw.cz Git - misc.git/commitdiff
Fixed ukkonen.c
authorMartin Mares <mj@ucw.cz>
Sat, 24 Mar 2018 11:17:27 +0000 (12:17 +0100)
committerMartin Mares <mj@ucw.cz>
Sat, 24 Mar 2018 11:17:27 +0000 (12:17 +0100)
ukkonen.c

index 7e597cb5c57db154c99e3ca1da197ed62a09f760..8b332744edf9cef9ea2a53cd362c8054b438fcd6 100644 (file)
--- a/ukkonen.c
+++ b/ukkonen.c
@@ -1,8 +1,14 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <assert.h>
 
 
-#define INFTY 1000000
+#define MAXN 1000000
+char x[MAXN+1];
+
+#define INFTY 100000000
+
+#define DEBUG
 
 struct node {
        struct node *first_son;
 
 struct node {
        struct node *first_son;
@@ -10,32 +16,49 @@ struct node {
        struct node *backlink;          /* We use them only for internal nodes */
        char *label;                    /* Label of edge above this node */
        int lablen;
        struct node *backlink;          /* We use them only for internal nodes */
        char *label;                    /* Label of edge above this node */
        int lablen;
+#ifdef DEBUG
+       int id;
+#endif
 };
 
 };
 
+#ifdef DEBUG
 static void dump_tree(struct node *n, int level, char *strend)
 {
 static void dump_tree(struct node *n, int level, char *strend)
 {
+       printf("%-3d ", n->id);
        for (int i=0; i<level; i++)
                putchar('.');
        for (int i=0; i<level; i++)
                putchar('.');
-       printf("<%.*s%s> [%p] {%p}\n",
-               (n->lablen >= INFTY/2 ? strend-n->label : n->lablen), n->label,
-               (n->lablen >= INFTY/2 ? "*" : ""), n, n->backlink);
+       printf("<%.*s%s> {%d}\n",
+               (n->lablen >= INFTY/2 ? (int)(strend-n->label) : n->lablen), n->label,
+               (n->lablen >= INFTY/2 ? "*" : ""), n->backlink ? n->backlink->id : -1);
        for (n=n->first_son; n; n=n->next_sibling)
                dump_tree(n, level+1, strend);
 }
        for (n=n->first_son; n; n=n->next_sibling)
                dump_tree(n, level+1, strend);
 }
+#endif
 
 static struct node *new_node(void)
 {
        struct node *n = malloc(sizeof(*n));
        bzero(n, sizeof(*n));
 
 static struct node *new_node(void)
 {
        struct node *n = malloc(sizeof(*n));
        bzero(n, sizeof(*n));
+#ifdef DEBUG
+       static int ncount;
+       n->id = ++ncount;
+#endif
        return n;
 }
 
        return n;
 }
 
-static struct node *lookup_edge(struct node *x, int c)
+static struct node *lookup_edge(struct node *x, int c, struct node ***ptr_from)
 {
 {
-       x = x->first_son;
-       while (x && x->label[0] != c)
-               x = x->next_sibling;
-       return x;
+       struct node **f = &x->first_son;
+       for (;;) {
+               x = *f;
+               if (!x)
+                       return NULL;
+               if (x->label[0] == c) {
+                       *ptr_from = f;
+                       return x;
+               }
+               f = &x->next_sibling;
+       }
 }
 
 static struct node *build_tree(char *string)
 }
 
 static struct node *build_tree(char *string)
@@ -44,13 +67,13 @@ static struct node *build_tree(char *string)
        struct node *l = root;          /* Currently longest nested suffix as a canonical reference pair (node, label) */
        char *llabel = string;
        int llablen = 0;
        struct node *l = root;          /* Currently longest nested suffix as a canonical reference pair (node, label) */
        char *llabel = string;
        int llablen = 0;
-       struct node *x, *y, *z, *trash, **blp;
+       struct node *x, *y, *z, *trash, **blp, **from;
 
        for (int i=0; string[i]; i++) {
                blp = &trash;
                for(;;) {
                        while (llablen) {                               /* Canonicalize the reference pair */
 
        for (int i=0; string[i]; i++) {
                blp = &trash;
                for(;;) {
                        while (llablen) {                               /* Canonicalize the reference pair */
-                               x = lookup_edge(l, llabel[0]);
+                               x = lookup_edge(l, llabel[0], &from);
                                if (x->lablen > llablen)
                                        break;
                                l = x;
                                if (x->lablen > llablen)
                                        break;
                                l = x;
@@ -60,7 +83,7 @@ static struct node *build_tree(char *string)
                        if (!llablen) {                                 /* LNS is an internal node or root */
                                *blp = l;
                                blp = &trash;
                        if (!llablen) {                                 /* LNS is an internal node or root */
                                *blp = l;
                                blp = &trash;
-                               if (lookup_edge(l, string[i])) {                /* Found LNS for the new string */
+                               if (lookup_edge(l, string[i], &from)) {         /* Found LNS for the new string */
                                        llabel = string+i;
                                        llablen = 1;
                                        break;
                                        llabel = string+i;
                                        llablen = 1;
                                        break;
@@ -69,24 +92,24 @@ static struct node *build_tree(char *string)
                                z->next_sibling = l->first_son;
                                l->first_son = z;
                        } else {                                        /* LNS is in the middle of an edge */
                                z->next_sibling = l->first_son;
                                l->first_son = z;
                        } else {                                        /* LNS is in the middle of an edge */
-                               x = lookup_edge(l, llabel[0]);
+                               x = lookup_edge(l, llabel[0], &from);
                                if (x->label[llablen] == string[i]) {           /* found LNS for the new string */
                                        llablen++;
                                        break;
                                }
                                if (x->label[llablen] == string[i]) {           /* found LNS for the new string */
                                        llablen++;
                                        break;
                                }
-                               y = new_node();                         /* New interior node which gets all sons of x */
+                               y = new_node();                         /* New interior node inside the edge */
                                z = new_node();                         /* New leaf connected below x */
                                z = new_node();                         /* New leaf connected below x */
-                               y->first_son = x->first_son;
-                               y->next_sibling = NULL;
-                               y->backlink = x->backlink;
-                               y->label = x->label + llablen;
-                               y->lablen = x->lablen - llablen;
-                               x->first_son = z;
-                               x->backlink = NULL;
-                               *blp = x;
-                               blp = &x->backlink;                     /* Backlink will be filled in later */
-                               x->lablen = llablen;
-                               z->next_sibling = y;
+                               *from = y;
+                               y->next_sibling = x->next_sibling;
+                               y->first_son = z;
+                               z->next_sibling = x;
+                               x->next_sibling = NULL;
+                               y->label = x->label;
+                               y->lablen = llablen;
+                               x->label += llablen;
+                               x->lablen -= llablen;
+                               *blp = y;
+                               blp = &y->backlink;                     /* Backlink will be filled in later */
                        }
                        z->label = string+i;
                        z->lablen = INFTY;
                        }
                        z->label = string+i;
                        z->lablen = INFTY;
@@ -104,11 +127,17 @@ static struct node *build_tree(char *string)
        return root;
 }
 
        return root;
 }
 
-int main(int argc, char **argv)
+int main(void)
 {
 {
-       char *string = argv[1];
-       struct node *root = build_tree(string);
-       puts("Result:");
-       dump_tree(root, 0, string+strlen(string));
+       scanf("%s", x);
+       int len = strlen(x);
+       x[len++] = '$';
+       x[len] = 0;
+
+       struct node *root = build_tree(x);
+#ifdef DEBUG
+       dump_tree(root, 0, x+strlen(x));
+#endif
+
        return 0;
 }
        return 0;
 }