2 * UCW Library -- Universal Sorter: Governing Routines
4 * (c) 2007 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU Lesser General Public License.
11 #include "lib/fastbuf.h"
12 #include "lib/mempool.h"
13 #include "lib/sorter/common.h"
18 sorter_alloc(struct sort_context *ctx, uns size)
20 return mp_alloc_zero(ctx->pool, size);
24 sbuck_new(struct sort_context *ctx)
26 struct sort_bucket *b = sorter_alloc(ctx, sizeof(struct sort_bucket));
32 sbuck_drop(struct sort_bucket *b)
36 ASSERT(!(b->flags & SBF_DESTROYED));
41 b->flags = SBF_DESTROYED;
46 sbuck_size(struct sort_bucket *b)
48 if ((b->flags & SBF_OPEN_WRITE) && !(b->flags & SBF_SWAPPED_OUT))
55 sbuck_have(struct sort_bucket *b)
57 return b && sbuck_size(b);
61 sbuck_has_file(struct sort_bucket *b)
63 return (b->fb || (b->flags & SBF_SWAPPED_OUT));
67 sbuck_swap_in(struct sort_bucket *b)
69 if (b->flags & SBF_SWAPPED_OUT)
71 b->fb = bopen(b->filename, O_RDWR, sorter_stream_bufsize);
72 if (b->flags & SBF_OPEN_WRITE)
73 bseek(b->fb, 0, SEEK_END);
74 bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 1);
75 b->flags &= ~SBF_SWAPPED_OUT;
76 SORT_XTRACE("Swapped in %s", b->filename);
81 sbuck_read(struct sort_bucket *b)
84 if (b->flags & SBF_OPEN_READ)
86 else if (b->flags & SBF_OPEN_WRITE)
88 b->size = btell(b->fb);
89 b->flags = (b->flags & ~SBF_OPEN_WRITE) | SBF_OPEN_READ;
98 sbuck_write(struct sort_bucket *b)
101 if (b->flags & SBF_OPEN_WRITE)
105 ASSERT(!(b->flags & (SBF_OPEN_READ | SBF_DESTROYED)));
106 b->fb = bopen_tmp(sorter_stream_bufsize);
107 b->flags |= SBF_OPEN_WRITE;
108 b->filename = mp_strdup(b->ctx->pool, b->fb->name);
114 sbuck_swap_out(struct sort_bucket *b)
116 if ((b->flags & (SBF_OPEN_READ | SBF_OPEN_WRITE)) && b->fb)
118 if (b->flags & SBF_OPEN_WRITE)
119 b->size = btell(b->fb);
120 bconfig(b->fb, BCONFIG_IS_TEMP_FILE, 0);
123 b->flags |= SBF_SWAPPED_OUT;
124 SORT_XTRACE("Swapped out %s", b->filename);
129 sorter_alloc_buf(struct sort_context *ctx)
133 u64 bs = MAX(sorter_bufsize/2, 1);
134 bs = ALIGN_TO(bs, (u64)CPU_PAGE_SIZE);
135 ctx->big_buf = big_alloc(2*bs);
136 ctx->big_buf_size = 2*bs;
137 ctx->big_buf_half = ((byte*) ctx->big_buf) + bs;
138 ctx->big_buf_half_size = bs;
139 SORT_XTRACE("Allocated sorting buffer (%jd bytes)", (uintmax_t) bs);
143 sorter_free_buf(struct sort_context *ctx)
147 big_free(ctx->big_buf, ctx->big_buf_size);
149 SORT_XTRACE("Freed sorting buffer");
152 static int sorter_presort(struct sort_context *ctx, struct sort_bucket *in, struct sort_bucket *out, struct sort_bucket *out_only)
154 /* FIXME: Mode with no presorting (mostly for debugging) */
155 sorter_alloc_buf(ctx);
156 if (in->flags & SBF_CUSTOM_PRESORT)
158 struct fastbuf *f = sbuck_write(out);
159 return ctx->custom_presort(f, ctx->big_buf, ctx->big_buf_size); // FIXME: out_only optimization?
161 return ctx->internal_sort(ctx, in, out, out_only);
164 static inline struct sort_bucket *
165 sbuck_join_to(struct sort_bucket *b)
167 if (sorter_debug & SORT_DEBUG_NO_JOIN)
170 struct sort_bucket *out = (struct sort_bucket *) b->n.prev; // Such bucket is guaranteed to exist
171 return (out->flags & SBF_FINAL) ? out : NULL;
175 sorter_join(struct sort_bucket *b)
177 struct sort_bucket *join = (struct sort_bucket *) b->n.prev;
178 ASSERT(join->flags & SBF_FINAL);
179 ASSERT(b->runs == 1);
181 if (!sbuck_has_file(join))
183 // The final bucket doesn't have any file associated yet, so replace
184 // it with the new bucket.
185 SORT_XTRACE("Replaced final bucket");
186 b->flags |= SBF_FINAL;
191 SORT_TRACE("Copying %jd bytes to output file", (uintmax_t) sbuck_size(b));
192 struct fastbuf *src = sbuck_read(b);
193 struct fastbuf *dest = sbuck_write(join);
194 bbcopy(src, dest, ~0U);
200 sorter_twoway(struct sort_context *ctx, struct sort_bucket *b)
202 struct sort_bucket *ins[3] = { NULL }, *outs[3] = { NULL };
203 cnode *list_pos = b->n.prev;
204 struct sort_bucket *join = sbuck_join_to(b);
206 if (!(sorter_debug & SORT_DEBUG_NO_PRESORT) || (b->flags & SBF_CUSTOM_PRESORT))
208 SORT_TRACE("Presorting");
209 ins[0] = sbuck_new(ctx);
210 if (!sorter_presort(ctx, b, ins[0], join ? : ins[0]))
212 SORT_XTRACE("Sorted in memory");
216 clist_insert_after(&ins[0]->n, list_pos);
221 ins[1] = sbuck_new(ctx);
223 while (sorter_presort(ctx, b, ins[i], ins[i]))
229 SORT_TRACE("Skipped presorting");
233 SORT_TRACE("Main sorting");
235 if (ins[0]->runs == 1 && ins[1]->runs == 1 && join)
237 // This is guaranteed to produce a single run, so join if possible
240 ctx->twoway_merge(ctx, ins, outs);
241 ASSERT(outs[0]->runs == 2);
243 SORT_TRACE("Pass done (joined final run)");
248 outs[0] = sbuck_new(ctx);
249 outs[1] = sbuck_new(ctx);
251 ctx->twoway_merge(ctx, ins, outs);
252 SORT_TRACE("Pass done (%d+%d runs, %jd+%jd bytes)", outs[0]->runs, outs[1]->runs, (uintmax_t) sbuck_size(outs[0]), (uintmax_t) sbuck_size(outs[1]));
255 memcpy(ins, outs, 3*sizeof(struct sort_bucket *));
256 } while (sbuck_have(ins[1]));
259 clist_insert_after(&ins[0]->n, list_pos);
263 sorter_run(struct sort_context *ctx)
265 ctx->pool = mp_new(4096);
266 clist_init(&ctx->bucket_list);
268 /* FIXME: There should be a way how to detect size of the input file */
269 /* FIXME: Remember to test sorting of empty files */
271 // Create bucket containing the source
272 struct sort_bucket *bin = sbuck_new(ctx);
273 bin->flags = SBF_SOURCE | SBF_OPEN_READ;
274 if (ctx->custom_presort)
275 bin->flags |= SBF_CUSTOM_PRESORT;
277 bin->fb = ctx->in_fb;
280 bin->hash_bits = ctx->hash_bits;
281 clist_add_tail(&ctx->bucket_list, &bin->n);
283 // Create bucket for the output
284 struct sort_bucket *bout = sbuck_new(ctx);
285 bout->flags = SBF_FINAL;
286 bout->fb = ctx->out_fb;
289 clist_add_head(&ctx->bucket_list, &bout->n);
291 struct sort_bucket *b;
292 while (bout = clist_head(&ctx->bucket_list), b = clist_next(&ctx->bucket_list, &bout->n))
296 else if (b->runs == 1)
299 sorter_twoway(ctx, b);
302 sorter_free_buf(ctx);
303 sbuck_write(bout); // Force empty bucket to a file
304 SORT_XTRACE("Final size: %jd", (uintmax_t) sbuck_size(bout));
305 ctx->out_fb = sbuck_read(bout);