2 * Netgrind -- TCP Layer Analyser
4 * (c) 2003 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU General Public License.
14 #include "netgrind/pkt.h"
15 #include "netgrind/netgrind.h"
20 #include <netinet/in.h>
21 #include <netinet/ip.h>
22 #include <netinet/tcp.h>
26 struct pkt_stats stat_tcp_in, stat_tcp_invalid, stat_tcp_badsum, stat_tcp_unmatched,
27 stat_tcp_on_closed, stat_tcp_bad_state;
29 uns tcp_arrival_times, tcp_wait_for_ack;
31 struct appl_hooks *tcp_default_appl;
34 static byte *pipe_state_names[] = { "IDLE", "SYNSENT", "SYNACK", "ESTAB", "FINSENT", "FINISH" };
37 static uns num_flows, max_flows;
38 static struct flow **flow_hash;
39 static struct flow **flow_heap;
41 static uns flow_calc_hash(u32 saddr, u32 daddr, u32 sport, u32 dport)
43 saddr = (saddr >> 16) | (saddr << 16);
44 daddr = (daddr >> 8) | (daddr << 24);
47 return (saddr + daddr + sport + dport) % max_flows;
50 #define FLOW_HEAP_LESS(a,b) (a->timeout < b->timeout)
51 #define FLOW_HEAP_SWAP(h,a,b,t) do { t=h[a]; h[a]=h[b]; h[b]=t; h[a]->heap_pos=a; h[b]->heap_pos=b; } while(0)
53 static void flow_rehash(void)
56 struct flow **ohash = flow_hash;
61 max_flows = nextprime(2*max_flows);
64 // DBG("Rehashing to %d buckets\n", max_flows);
65 flow_hash = xmalloc_zero(sizeof(struct flow *) * max_flows);
66 flow_heap = xmalloc_zero(sizeof(struct flow *) * (max_flows+1));
68 for (uns i=0; i<omax; i++)
70 struct flow *f = ohash[i];
73 struct flow *n = f->hash_next;
74 uns h = flow_calc_hash(f->saddr, f->daddr, f->sport, f->dport);
75 f->hash_next = flow_hash[h];
77 flow_heap[++num_flows] = f;
78 f->heap_pos = num_flows;
84 HEAP_INIT(struct flow *, flow_heap, num_flows, FLOW_HEAP_LESS, FLOW_HEAP_SWAP);
87 static struct flow *flow_lookup(u32 saddr, u32 daddr, u32 sport, u32 dport)
89 uns h = flow_calc_hash(saddr, daddr, sport, dport);
90 for (struct flow *f = flow_hash[h]; f; f=f->hash_next)
91 if (f->saddr == saddr && f->daddr == daddr &&
92 f->sport == sport && f->dport == dport)
97 static struct flow *flow_create(u32 saddr, u32 daddr, u32 sport, u32 dport)
99 if (num_flows >= max_flows)
101 uns h = flow_calc_hash(saddr, daddr, sport, dport);
102 struct flow *f = xmalloc_zero(sizeof(struct flow));
108 f->hash_next = flow_hash[h];
110 flow_heap[++num_flows] = f;
111 f->heap_pos = num_flows;
116 static void flow_set_timeout(struct flow *f, u32 when)
119 HEAP_CHANGE(struct flow *, flow_heap, num_flows, FLOW_HEAP_LESS, FLOW_HEAP_SWAP, f->heap_pos);
122 static uns flow_now(struct pkt *p)
124 return p->timestamp >> 20;
127 static u64 flow_now_to_time(uns now)
132 return (u64)now << 20;
135 static inline int tcp_seq_le(u32 a, u32 b)
137 return ((b - a) < 0x80000000);
140 static inline int tcp_seq_lt(u32 a, u32 b)
142 return (a != b && tcp_seq_le(a, b));
145 static void tcp_time_step(uns now)
147 while (num_flows && flow_heap[1]->timeout <= now)
149 struct flow *f = flow_heap[1];
150 HEAP_DELMIN(struct flow *, flow_heap, num_flows, FLOW_HEAP_LESS, FLOW_HEAP_SWAP);
151 DBG("TIMEOUT for flow %p(%s/%s)\n", f, pipe_state_names[f->pipe[0].state], pipe_state_names[f->pipe[1].state]);
152 if (f->pipe[0].state != FLOW_FINISHED || f->pipe[1].state != FLOW_FINISHED)
153 f->appl->close(f, (now == ~0U) ? CAUSE_DOOMSDAY : CAUSE_TIMEOUT, flow_now_to_time(now));
154 uns h = flow_calc_hash(f->saddr, f->daddr, f->sport, f->dport);
155 struct flow **gg = &flow_hash[h];
164 gg = &(*gg)->hash_next;
170 static void tcp_enqueue_data(struct pipe *b, struct pkt *p)
172 struct pkt *q, *prev, *new;
176 if (tcp_seq_lt(b->last_acked_seq, p->seq) && p->seq - b->last_acked_seq >= 0x40000)
178 DBG(" OUT OF WINDOW (last-ack=%u)\n", b->last_acked_seq);
182 prev = (struct pkt *) &b->queue.head;
183 last_seq = b->last_acked_seq;
186 if (tcp_seq_lt(p->seq, last_seq))
188 if (tcp_seq_le(p->seq + pkt_len(p), last_seq))
194 pkt_pop(p, p->seq + pkt_len(p) - last_seq);
198 q = list_next(&b->queue, &prev->n);
199 if (q && tcp_seq_le(q->seq, p->seq))
201 /* next packet starts before us => skip it */
203 last_seq = q->seq + pkt_len(q);
208 if (q && tcp_seq_lt(q->seq, p->seq + pkt_len(p)))
210 /* overlap with next packet => split */
212 uns keeplen = q->seq - p->seq;
213 uns newlen = pkt_len(p) - keeplen;
214 new = pkt_new(0, newlen);
215 memcpy(pkt_append(new, newlen), pkt_unappend(p, newlen), newlen);
216 new->seq = p->seq + keeplen;
219 list_insert(&p->n, &prev->n);
221 last_seq = p->seq + pkt_len(p);
227 void tcp_got_packet(struct iphdr *iph, struct pkt *p)
238 uns now = flow_now(p);
242 pkt_account(&stat_tcp_in, p);
243 if (!(tcph = pkt_peek(p, sizeof(*tcph))))
245 uns hdrlen = 4*tcph->doff;
246 if (hdrlen < sizeof(*tcph) || hdrlen > pkt_len(p))
248 fakehdr.src = iph->saddr;
249 fakehdr.dst = iph->daddr;
251 fakehdr.proto = IPPROTO_TCP;
252 fakehdr.len = htons(pkt_len(p));
253 uns sum = tcpip_calc_checksum(&fakehdr, sizeof(fakehdr), 0);
254 sum = tcpip_calc_checksum(p->data, pkt_len(p), sum);
255 if (!tcpip_verify_checksum(sum))
257 pkt_account(&stat_tcp_badsum, p);
260 /* XXX: Check TCP options? */
262 u32 seq = ntohl(tcph->seq);
263 u32 ack = ntohl(tcph->ack_seq);
264 DBG("TCP %08x %08x %04x %04x seq=%u len=%u end=%u ack=%u%s%s%s%s%s%s\n",
265 ntohl(iph->saddr), ntohl(iph->daddr), ntohs(tcph->source), ntohs(tcph->dest),
266 seq, pkt_len(p) - hdrlen, seq + pkt_len(p) - hdrlen, ack,
267 (tcph->fin ? " FIN" : ""),
268 (tcph->syn ? " SYN" : ""),
269 (tcph->rst ? " RST" : ""),
270 (tcph->psh ? " PSH" : ""),
271 (tcph->ack ? " ACK" : ""),
272 (tcph->urg ? " URG" : ""));
276 if (f = flow_lookup(iph->saddr, iph->daddr, tcph->source, tcph->dest))
281 else if (f = flow_lookup(iph->daddr, iph->saddr, tcph->dest, tcph->source))
288 /* Flow not found, if it's a SYN packet, go create it */
289 if (tcph->syn && !tcph->ack && !tcph->rst && !tcph->fin)
291 f = flow_create(iph->saddr, iph->daddr, tcph->source, tcph->dest);
292 f->appl = tcp_default_appl;
293 f->appl->open(f, p->timestamp);
296 list_init(&a->queue);
297 a->syn_or_fin_seq = a->last_acked_seq = seq;
298 a->state = FLOW_SYN_SENT;
299 list_init(&b->queue);
300 b->state = FLOW_IDLE;
301 DBG("\t%p NEW\n", f);
302 pkt_account(&f->stat_raw, p);
305 DBG("\tUnmatched\n");
306 pkt_account(&stat_tcp_unmatched, p);
309 pkt_account(&f->stat_raw, p);
312 DBG("\t%p %s (%s/%s) ", f, (a == &f->pipe[0] ? "A->B" : "B->A"), pipe_state_names[f->pipe[0].state], pipe_state_names[f->pipe[1].state]);
313 if (a->state == FLOW_FINISHED && b->state == FLOW_FINISHED)
316 pkt_account(&stat_tcp_on_closed, p);
323 f->appl->close(f, CAUSE_RESET, p->timestamp);
324 a->state = b->state = FLOW_FINISHED;
325 flow_set_timeout(f, now + 120);
329 flow_set_timeout(f, now + 3000); /* Somewhat arbitrary timeout */
333 if (tcph->fin || pkt_len(p))
337 if (b->state == FLOW_SYN_SENT && b->syn_or_fin_seq+1 == ack)
340 a->last_acked_seq = ack;
341 a->syn_or_fin_seq = seq;
342 a->queue_start_seq = ack;
343 a->state = FLOW_SYN_SENT_ACK;
344 b->last_acked_seq = seq;
347 else if (b->state == FLOW_ESTABLISHED)
353 goto dup; /* otherwise SYN on already existing connection gets ignored */
358 if (tcp_seq_le(ack, a->last_acked_seq))
362 a->last_acked_seq = ack;
363 if (tcp_wait_for_ack)
365 while ((q = list_head(&a->queue)) && tcp_seq_le(q->seq+pkt_len(q), ack))
368 a->queue_start_seq = q->seq + pkt_len(q);
369 if (!tcp_arrival_times)
370 q->timestamp = p->timestamp;
371 DBG("data(%Ld-%Ld), ", a->stat.bytes, a->stat.bytes+pkt_len(q)-1);
372 pkt_account(&a->stat, q);
373 f->appl->input(f, (a == &f->pipe[0]), q);
376 if (b->state == FLOW_SYN_SENT_ACK && b->syn_or_fin_seq+1 == ack)
378 a->state = b->state = FLOW_ESTABLISHED;
379 a->queue_start_seq = ack;
382 else if (b->state == FLOW_FIN_SENT && b->syn_or_fin_seq+1 == ack)
384 b->state = FLOW_FINISHED;
385 if (a->state == FLOW_FINISHED)
387 DBG("CLOSED BOTH WAYS\n");
388 f->appl->close(f, CAUSE_CLOSE, p->timestamp);
389 flow_set_timeout(f, now + 300); /* FIXME */
393 DBG("CLOSED ONE-WAY, ");
395 else if (tcp_seq_lt(a->queue_start_seq, ack))
397 DBG("DAMNED, ACK FOR UNCAUGHT DATA!\n");
400 else if (b->state == FLOW_SYN_SENT_ACK || b->state == FLOW_SYN_SENT)
407 if (a->state == FLOW_ESTABLISHED)
409 a->state = FLOW_FIN_SENT;
410 a->syn_or_fin_seq = seq + pkt_len(p);
411 DBG("FIN SENT, waiting for FIN ACK, ");
413 else if (a->state == FLOW_FIN_SENT)
425 if (b->state == FLOW_ESTABLISHED || b->state == FLOW_FIN_SENT || b->state == FLOW_FINISHED)
427 u64 arrival = p->timestamp;
429 tcp_enqueue_data(b, p);
430 if (!tcp_wait_for_ack)
432 while ((q = list_head(&b->queue)) && q->seq == b->queue_start_seq)
435 if (!tcp_arrival_times)
436 q->timestamp = arrival;
437 DBG(", data(%Ld-%Ld)", b->stat.bytes, b->stat.bytes+pkt_len(q)-1);
438 pkt_account(&b->stat, q);
439 b->queue_start_seq += pkt_len(q);
440 f->appl->input(f, (b == &f->pipe[1]), q);
459 pkt_account(&stat_tcp_bad_state, p);
465 pkt_account(&stat_tcp_invalid, p);
474 void tcp_cleanup(void)