]> mj.ucw.cz Git - misc.git/blob - ukkonen.c
Merge branch 'master' of git+ssh://git.ucw.cz/home/mj/GIT/misc
[misc.git] / ukkonen.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <assert.h>
5
6 #define MAXN 1000000
7 char x[MAXN+1];
8
9 #define INFTY 100000000
10
11 #define DEBUG
12
13 struct node {
14         struct node *first_son;
15         struct node *next_sibling;
16         struct node *backlink;          /* We use them only for internal nodes */
17         char *label;                    /* Label of edge above this node */
18         int lablen;
19 #ifdef DEBUG
20         int id;
21 #endif
22 };
23
24 #ifdef DEBUG
25 static void dump_tree(struct node *n, int level, char *strend)
26 {
27         printf("%-3d ", n->id);
28         for (int i=0; i<level; i++)
29                 putchar('.');
30         printf("<%.*s%s> {%d}\n",
31                 (n->lablen >= INFTY/2 ? (int)(strend-n->label) : n->lablen), n->label,
32                 (n->lablen >= INFTY/2 ? "*" : ""), n->backlink ? n->backlink->id : -1);
33         for (n=n->first_son; n; n=n->next_sibling)
34                 dump_tree(n, level+1, strend);
35 }
36 #endif
37
38 static struct node *new_node(void)
39 {
40         struct node *n = malloc(sizeof(*n));
41         bzero(n, sizeof(*n));
42 #ifdef DEBUG
43         static int ncount;
44         n->id = ++ncount;
45 #endif
46         return n;
47 }
48
49 static struct node *lookup_edge(struct node *x, int c, struct node ***ptr_from)
50 {
51         struct node **f = &x->first_son;
52         for (;;) {
53                 x = *f;
54                 if (!x)
55                         return NULL;
56                 if (x->label[0] == c) {
57                         *ptr_from = f;
58                         return x;
59                 }
60                 f = &x->next_sibling;
61         }
62 }
63
64 static struct node *build_tree(char *string)
65 {
66         struct node *root = new_node();
67         struct node *l = root;          /* Currently longest nested suffix as a canonical reference pair (node, label) */
68         char *llabel = string;
69         int llablen = 0;
70         struct node *x, *y, *z, *trash, **blp, **from;
71
72         for (int i=0; string[i]; i++) {
73                 blp = &trash;
74                 for(;;) {
75                         while (llablen) {                               /* Canonicalize the reference pair */
76                                 x = lookup_edge(l, llabel[0], &from);
77                                 if (x->lablen > llablen)
78                                         break;
79                                 l = x;
80                                 llabel += x->lablen;
81                                 llablen -= x->lablen;
82                         }
83                         if (!llablen) {                                 /* LNS is an internal node or root */
84                                 *blp = l;
85                                 blp = &trash;
86                                 if (lookup_edge(l, string[i], &from)) {         /* Found LNS for the new string */
87                                         llabel = string+i;
88                                         llablen = 1;
89                                         break;
90                                 }
91                                 z = new_node();
92                                 z->next_sibling = l->first_son;
93                                 l->first_son = z;
94                         } else {                                        /* LNS is in the middle of an edge */
95                                 x = lookup_edge(l, llabel[0], &from);
96                                 if (x->label[llablen] == string[i]) {           /* found LNS for the new string */
97                                         llablen++;
98                                         break;
99                                 }
100                                 y = new_node();                         /* New interior node inside the edge */
101                                 z = new_node();                         /* New leaf connected below x */
102                                 *from = y;
103                                 y->next_sibling = x->next_sibling;
104                                 y->first_son = z;
105                                 z->next_sibling = x;
106                                 x->next_sibling = NULL;
107                                 y->label = x->label;
108                                 y->lablen = llablen;
109                                 x->label += llablen;
110                                 x->lablen -= llablen;
111                                 *blp = y;
112                                 blp = &y->backlink;                     /* Backlink will be filled in later */
113                         }
114                         z->label = string+i;
115                         z->lablen = INFTY;
116
117                         if (l == root) {                                /* Cut the first character of the LNS */
118                                 if (!llablen)
119                                         break;
120                                 llabel++;
121                                 llablen--;
122                         } else
123                                 l = l->backlink;
124                 }
125                 *blp = root;
126         }
127         return root;
128 }
129
130 int main(void)
131 {
132         scanf("%s", x);
133         int len = strlen(x);
134         x[len++] = '$';
135         x[len] = 0;
136
137         struct node *root = build_tree(x);
138 #ifdef DEBUG
139         dump_tree(root, 0, x+strlen(x));
140 #endif
141
142         return 0;
143 }