2 * Netgrind -- TCP Layer Analyser
4 * (c) 2003 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU General Public License.
14 #include "netgrind/pkt.h"
15 #include "netgrind/netgrind.h"
20 #include <netinet/in.h>
21 #include <netinet/ip.h>
22 #include <netinet/tcp.h>
26 struct pkt_stats stat_tcp_in, stat_tcp_invalid, stat_tcp_badsum, stat_tcp_unmatched,
27 stat_tcp_on_closed, stat_tcp_bad_state;
28 uns cnt_tcp_flows, cnt_tcp_causes[CAUSE_MAX];
29 uns tcp_arrival_times, tcp_wait_for_ack;
31 struct appl_hooks *tcp_default_appl;
33 byte *flow_state_names[] = { "IDLE", "SYNSENT", "SYNACK", "ESTAB", "FINSENT", "FINISH" };
34 byte *flow_cause_names[] = { "Close", "Reset", "Timeout", "Doomsday", "Corrupted" };
35 byte *flow_cause_names_short[] = { "OK", "RE", "TO", "DD", "CO" };
37 uns tcp_num_flows, tcp_max_flows;
38 static struct flow **flow_hash;
39 static struct flow **flow_heap;
41 static uns flow_calc_hash(u32 saddr, u32 daddr, u32 sport, u32 dport)
43 saddr = (saddr >> 16) | (saddr << 16);
44 daddr = (daddr >> 8) | (daddr << 24);
47 return (saddr + daddr + sport + dport) % tcp_max_flows;
50 #define FLOW_HEAP_LESS(a,b) (a->timeout < b->timeout)
51 #define FLOW_HEAP_SWAP(h,a,b,t) do { t=h[a]; h[a]=h[b]; h[b]=t; h[a]->heap_pos=a; h[b]->heap_pos=b; } while(0)
53 static void flow_rehash(void)
55 uns omax = tcp_max_flows;
56 struct flow **ohash = flow_hash;
61 tcp_max_flows = nextprime(2*tcp_max_flows);
64 // DBG("Rehashing to %d buckets\n", tcp_max_flows);
65 flow_hash = xmalloc_zero(sizeof(struct flow *) * tcp_max_flows);
66 flow_heap = xmalloc_zero(sizeof(struct flow *) * (tcp_max_flows+1));
68 for (uns i=0; i<omax; i++)
70 struct flow *f = ohash[i];
73 struct flow *n = f->hash_next;
74 uns h = flow_calc_hash(f->saddr, f->daddr, f->sport, f->dport);
75 f->hash_next = flow_hash[h];
77 flow_heap[++tcp_num_flows] = f;
78 f->heap_pos = tcp_num_flows;
84 HEAP_INIT(struct flow *, flow_heap, tcp_num_flows, FLOW_HEAP_LESS, FLOW_HEAP_SWAP);
87 static struct flow *flow_lookup(u32 saddr, u32 daddr, u32 sport, u32 dport)
89 uns h = flow_calc_hash(saddr, daddr, sport, dport);
90 for (struct flow *f = flow_hash[h]; f; f=f->hash_next)
91 if (f->saddr == saddr && f->daddr == daddr &&
92 f->sport == sport && f->dport == dport)
97 static struct flow *flow_create(u32 saddr, u32 daddr, u32 sport, u32 dport)
99 if (tcp_num_flows >= tcp_max_flows)
101 uns h = flow_calc_hash(saddr, daddr, sport, dport);
102 struct flow *f = xmalloc_zero(sizeof(struct flow));
108 f->hash_next = flow_hash[h];
110 flow_heap[++tcp_num_flows] = f;
111 f->heap_pos = tcp_num_flows;
116 static void flow_set_timeout(struct flow *f, u32 when)
119 HEAP_CHANGE(struct flow *, flow_heap, tcp_num_flows, FLOW_HEAP_LESS, FLOW_HEAP_SWAP, f->heap_pos);
122 static uns timestamp_to_now(u64 timestamp)
124 return timestamp >> 20;
127 static u64 now_to_timestamp(uns now)
129 return (u64)now << 20;
132 static inline int tcp_seq_le(u32 a, u32 b)
134 return ((b - a) < 0x80000000);
137 static inline int tcp_seq_lt(u32 a, u32 b)
139 return (a != b && tcp_seq_le(a, b));
142 static void tcp_time_step(uns now, uns doomsday)
144 while (tcp_num_flows && (flow_heap[1]->timeout <= now || doomsday))
146 struct flow *f = flow_heap[1];
147 HEAP_DELMIN(struct flow *, flow_heap, tcp_num_flows, FLOW_HEAP_LESS, FLOW_HEAP_SWAP);
148 DBG("%s for flow %p(%s/%s) now=%d\n", (doomsday ? "DOOMSDAY" : "TIMEOUT"), f,
149 flow_state_names[f->pipe[0].state], flow_state_names[f->pipe[1].state], now);
150 if (f->pipe[0].state != FLOW_FINISHED || f->pipe[1].state != FLOW_FINISHED)
153 if (f->cnt_unexpected)
154 cause = CAUSE_CORRUPT;
156 cause = CAUSE_DOOMSDAY;
158 cause = CAUSE_TIMEOUT;
159 cnt_tcp_causes[cause]++;
160 f->appl->close(f, cause, now_to_timestamp(now));
162 for (uns i=0; i<2; i++)
163 pkt_flush_queue(&f->pipe[i].queue);
164 uns h = flow_calc_hash(f->saddr, f->daddr, f->sport, f->dport);
165 struct flow **gg = &flow_hash[h];
174 gg = &(*gg)->hash_next;
180 static void tcp_enqueue_data(struct pipe *b, struct pkt *p)
182 struct pkt *q, *prev, *new;
186 if (tcp_seq_lt(b->queue_start_seq, p->seq) && p->seq - b->queue_start_seq >= 0x40000)
188 DBG(" OUT OF WINDOW (q-start=%u)", b->queue_start_seq);
192 prev = (struct pkt *) &b->queue.head;
193 last_seq = b->last_acked_seq;
196 if (tcp_seq_lt(p->seq, last_seq))
198 if (tcp_seq_le(p->seq + pkt_len(p), last_seq))
204 pkt_pop(p, p->seq + pkt_len(p) - last_seq);
208 q = list_next(&b->queue, &prev->n);
209 if (q && tcp_seq_le(q->seq, p->seq))
211 /* next packet starts before us => skip it */
213 last_seq = q->seq + pkt_len(q);
218 if (q && tcp_seq_lt(q->seq, p->seq + pkt_len(p)))
220 /* overlap with next packet => split */
222 uns keeplen = q->seq - p->seq;
223 uns newlen = pkt_len(p) - keeplen;
224 new = pkt_new(0, newlen);
225 memcpy(pkt_append(new, newlen), pkt_unappend(p, newlen), newlen);
226 new->seq = p->seq + keeplen;
229 list_insert(&p->n, &prev->n);
231 last_seq = p->seq + pkt_len(p);
237 void tcp_got_packet(struct iphdr *iph, struct pkt *p)
248 uns now = timestamp_to_now(p->timestamp);
249 struct flow *f = NULL;
251 tcp_time_step(now, 0);
253 pkt_account(&stat_tcp_in, p);
254 if (!(tcph = pkt_peek(p, sizeof(*tcph))))
256 uns hdrlen = 4*tcph->doff;
257 if (hdrlen < sizeof(*tcph) || hdrlen > pkt_len(p))
259 fakehdr.src = iph->saddr;
260 fakehdr.dst = iph->daddr;
262 fakehdr.proto = IPPROTO_TCP;
263 fakehdr.len = htons(pkt_len(p));
264 uns sum = tcpip_calc_checksum(&fakehdr, sizeof(fakehdr), 0);
265 sum = tcpip_calc_checksum(p->data, pkt_len(p), sum);
266 if (!tcpip_verify_checksum(sum))
268 pkt_account(&stat_tcp_badsum, p);
271 /* XXX: Check TCP options? */
273 u32 seq = ntohl(tcph->seq);
274 u32 ack = ntohl(tcph->ack_seq);
275 DBG("TCP %08x %08x %04x %04x seq=%u len=%u end=%u ack=%u%s%s%s%s%s%s now=%d\n",
276 ntohl(iph->saddr), ntohl(iph->daddr), ntohs(tcph->source), ntohs(tcph->dest),
277 seq, pkt_len(p) - hdrlen, seq + pkt_len(p) - hdrlen, ack,
278 (tcph->fin ? " FIN" : ""),
279 (tcph->syn ? " SYN" : ""),
280 (tcph->rst ? " RST" : ""),
281 (tcph->psh ? " PSH" : ""),
282 (tcph->ack ? " ACK" : ""),
283 (tcph->urg ? " URG" : ""),
287 if (f = flow_lookup(iph->saddr, iph->daddr, tcph->source, tcph->dest))
292 else if (f = flow_lookup(iph->daddr, iph->saddr, tcph->dest, tcph->source))
299 /* Flow not found, if it's a SYN packet, go create it */
300 if (tcph->syn && !tcph->ack && !tcph->rst && !tcph->fin)
302 f = flow_create(iph->saddr, iph->daddr, tcph->source, tcph->dest);
303 f->appl = tcp_default_appl;
304 f->appl->open(f, p->timestamp);
307 list_init(&a->queue);
308 a->syn_or_fin_seq = a->last_acked_seq = seq;
309 a->state = FLOW_SYN_SENT;
310 list_init(&b->queue);
311 b->state = FLOW_IDLE;
312 DBG("\t%p NEW\n", f);
313 pkt_account(&f->stat_raw, p);
316 DBG("\tUnmatched\n");
317 pkt_account(&stat_tcp_unmatched, p);
320 pkt_account(&f->stat_raw, p);
323 DBG("\t%p %s (%s/%s) ", f, (a == &f->pipe[0] ? "A->B" : "B->A"), flow_state_names[f->pipe[0].state], flow_state_names[f->pipe[1].state]);
324 if (a->state == FLOW_FINISHED && b->state == FLOW_FINISHED)
326 /* XXX: Here we probably should handle SYN (fast recycling of ports) */
328 pkt_account(&stat_tcp_on_closed, p);
329 f = NULL; /* Avoid shifting flow timeout */
336 cnt_tcp_causes[CAUSE_RESET]++;
337 f->appl->close(f, CAUSE_RESET, p->timestamp);
338 a->state = b->state = FLOW_FINISHED;
344 if (tcph->fin || pkt_len(p))
348 if (b->state == FLOW_SYN_SENT && b->syn_or_fin_seq+1 == ack)
351 a->last_acked_seq = ack;
352 a->syn_or_fin_seq = seq;
353 a->queue_start_seq = ack;
354 a->state = FLOW_SYN_SENT_ACK;
355 b->last_acked_seq = seq;
358 else if (b->state == FLOW_ESTABLISHED)
364 goto dup; /* otherwise SYN on already existing connection gets ignored */
369 if (tcp_seq_le(ack, a->last_acked_seq))
373 a->last_acked_seq = ack;
374 if (tcp_wait_for_ack)
376 while ((q = list_head(&a->queue)) && tcp_seq_le(q->seq+pkt_len(q), ack))
379 a->queue_start_seq = q->seq + pkt_len(q);
380 if (!tcp_arrival_times)
381 q->timestamp = p->timestamp;
382 DBG("data(%Ld-%Ld), ", a->stat.bytes, a->stat.bytes+pkt_len(q)-1);
383 pkt_account(&a->stat, q);
384 f->appl->input(f, (a == &f->pipe[0]), q);
387 if (b->state == FLOW_SYN_SENT_ACK && b->syn_or_fin_seq+1 == ack)
389 a->state = b->state = FLOW_ESTABLISHED;
390 a->queue_start_seq = ack;
393 else if (b->state == FLOW_FIN_SENT && b->syn_or_fin_seq+1 == ack)
395 b->state = FLOW_FINISHED;
397 q->timestamp = p->timestamp;
398 f->appl->input(f, (b == &f->pipe[0]), q);
399 if (a->state == FLOW_FINISHED)
401 DBG("CLOSED BOTH WAYS\n");
402 cnt_tcp_causes[CAUSE_CLOSE]++;
403 f->appl->close(f, CAUSE_CLOSE, p->timestamp);
407 DBG("CLOSED ONE-WAY, ");
409 else if (tcp_seq_lt(a->queue_start_seq, ack))
411 DBG("DAMNED, ACK FOR UNCAUGHT DATA!\n");
414 else if (b->state == FLOW_SYN_SENT_ACK || b->state == FLOW_SYN_SENT)
421 if (a->state == FLOW_ESTABLISHED)
423 a->state = FLOW_FIN_SENT;
424 a->syn_or_fin_seq = seq + pkt_len(p);
425 DBG("FIN SENT, waiting for FIN ACK, ");
427 else if (a->state == FLOW_FIN_SENT)
439 if (b->state == FLOW_ESTABLISHED || b->state == FLOW_FIN_SENT || b->state == FLOW_FINISHED)
441 u64 arrival = p->timestamp;
443 tcp_enqueue_data(b, p);
444 if (!tcp_wait_for_ack)
446 while ((q = list_head(&b->queue)) && q->seq == b->queue_start_seq)
449 if (!tcp_arrival_times)
450 q->timestamp = arrival;
451 DBG(", data(%Ld-%Ld)", b->stat.bytes, b->stat.bytes+pkt_len(q)-1);
452 pkt_account(&b->stat, q);
453 b->queue_start_seq += pkt_len(q);
454 f->appl->input(f, (b == &f->pipe[1]), q);
471 int timeout = 60; /* Connection setup timeout */
472 if (f->pipe[0].state == FLOW_FINISHED && f->pipe[1].state == FLOW_FINISHED)
473 timeout = 30; /* After connection close; later the packets are just unmatched */
474 else if ((f->pipe[0].state == FLOW_ESTABLISHED || f->pipe[0].state == FLOW_FIN_SENT || f->pipe[0].state == FLOW_FINISHED) &&
475 (f->pipe[1].state == FLOW_ESTABLISHED || f->pipe[1].state == FLOW_FIN_SENT || f->pipe[1].state == FLOW_FINISHED))
476 timeout = 900; /* Data phase timeout */
477 DBG("timeout at %d\n", now+timeout);
478 flow_set_timeout(f, now + timeout);
486 pkt_account(&stat_tcp_bad_state, p);
492 pkt_account(&stat_tcp_invalid, p);
501 void tcp_cleanup(u64 timestamp)
503 tcp_time_step(timestamp_to_now(timestamp), 1);