2 * UCW Library -- Universal Sorter: Governing Routines
4 * (c) 2007 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
11 #include "lib/fastbuf.h"
12 #include "lib/mempool.h"
13 #include "lib/sorter/common.h"
16 sorter_alloc(struct sort_context *ctx, uns size)
18 return mp_alloc_zero(ctx->pool, size);
22 sbuck_new(struct sort_context *ctx)
24 return sorter_alloc(ctx, sizeof(struct sort_bucket));
28 sbuck_drop(struct sort_bucket *b)
40 sbuck_can_read(struct sort_bucket *b)
46 sbuck_open_read(struct sort_bucket *b)
48 /* FIXME: These functions should handle buckets with no fb and only name. */
54 sbuck_open_write(struct sort_bucket *b)
57 b->fb = bopen_tmp(sorter_stream_bufsize);
62 sbuck_close_read(struct sort_bucket *b)
72 sbuck_close_write(struct sort_bucket *b)
76 b->size = btell(b->fb);
82 sorter_alloc_buf(struct sort_context *ctx)
86 u64 bs = MAX(sorter_bufsize/2, 1);
87 bs = ALIGN_TO(bs, (u64)CPU_PAGE_SIZE);
88 ctx->big_buf = big_alloc(2*bs);
89 ctx->big_buf_size = 2*bs;
90 ctx->big_buf_half = ((byte*) ctx->big_buf) + bs;
91 ctx->big_buf_half_size = bs;
92 SORT_XTRACE("Allocated sorting buffer (%jd bytes)", (uintmax_t) bs);
96 sorter_free_buf(struct sort_context *ctx)
100 big_free(ctx->big_buf, ctx->big_buf_size);
102 SORT_XTRACE("Freed sorting buffer");
105 static int sorter_presort(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only)
107 /* FIXME: Mode with no presorting (mostly for debugging) */
108 sorter_alloc_buf(ctx);
109 if (in->flags & SBF_CUSTOM_PRESORT)
111 struct fastbuf *f = sbuck_open_write(out);
112 return ctx->custom_presort(f, ctx->big_buf, ctx->big_buf_size); // FIXME: out_only optimization?
114 return ctx->internal_sort(ctx, in, out, out_only);
117 static inline struct sort_bucket *
118 sbuck_join_to(struct sort_bucket *b)
120 struct sort_bucket *out = (struct sort_bucket *) b->n.prev; // Such bucket is guaranteed to exist
121 return (out->flags & SBF_FINAL) ? out : NULL;
125 sorter_join(struct sort_bucket *b)
127 struct sort_bucket *join = sbuck_join_to(b);
130 // FIXME: What if the final bucket doesn't contain any file yet?
132 SORT_TRACE("Copying %jd bytes to output file", (uintmax_t) b->size);
133 struct fastbuf *src = sbuck_open_read(b);
134 struct fastbuf *dest = sbuck_open_write(join);
135 bbcopy(src, dest, ~0U);
140 sorter_twoway(struct sort_context *ctx, struct sort_bucket *b)
142 struct sort_bucket *ins[3], *outs[3];
143 struct sort_bucket *join = sbuck_join_to(b);
145 SORT_TRACE("Presorting");
146 ins[0] = sbuck_new(ctx);
148 if (!sorter_presort(ctx, b, ins[0], join ? : ins[0]))
153 clist_insert_after(&ins[0]->n, &b->n);
158 ins[1] = sbuck_new(ctx);
161 while (sorter_presort(ctx, b, ins[i], ins[i]))
164 sbuck_close_write(ins[0]);
165 sbuck_close_write(ins[1]);
167 SORT_TRACE("Main sorting");
169 if (ins[0]->runs == 1 && ins[1]->runs == 1 && join) // FIXME: Debug switch for disabling joining optimizations
171 // This is guaranteed to produce a single run, so join if possible
174 ctx->twoway_merge(ctx, ins, outs);
175 ASSERT(outs[0]->runs == 2);
177 SORT_TRACE("Pass done (joined final run)");
181 outs[0] = sbuck_new(ctx);
182 outs[1] = sbuck_new(ctx);
184 ctx->twoway_merge(ctx, ins, outs);
185 sbuck_close_write(outs[0]);
186 sbuck_close_write(outs[1]);
187 SORT_TRACE("Pass done (%d+%d runs, %jd+%jd bytes)", outs[0]->runs, outs[1]->runs, (uintmax_t) outs[0]->size, (uintmax_t) outs[1]->size);
190 memcpy(ins, outs, 3*sizeof(struct sort_bucket *));
191 } while (ins[1]->size);
194 clist_insert_after(&ins[0]->n, &b->n);
199 sorter_run(struct sort_context *ctx)
201 ctx->pool = mp_new(4096);
202 clist_init(&ctx->bucket_list);
204 /* FIXME: There should be a way how to detect size of the input file */
205 /* FIXME: Remember to test sorting of empty files */
207 // Create bucket containing the source
208 struct sort_bucket *bin = sbuck_new(ctx);
209 bin->flags = SBF_SOURCE;
210 if (ctx->custom_presort)
211 bin->flags |= SBF_CUSTOM_PRESORT;
213 bin->fb = ctx->in_fb;
216 bin->hash_bits = ctx->hash_bits;
217 clist_add_tail(&ctx->bucket_list, &bin->n);
219 // Create bucket for the output
220 struct sort_bucket *bout = sbuck_new(ctx);
221 bout->flags = SBF_FINAL;
222 bout->fb = ctx->out_fb;
225 clist_add_head(&ctx->bucket_list, &bout->n);
227 struct sort_bucket *b;
228 while (b = clist_next(&ctx->bucket_list, &bout->n))
232 else if (b->runs == 1)
235 sorter_twoway(ctx, b);
238 sorter_free_buf(ctx);
239 sbuck_close_write(bout);
240 SORT_XTRACE("Final size: %jd", (uintmax_t) bout->size);
241 ctx->out_fb = sbuck_open_read(bout);