-/*** CHECKSUMMING ***/
-
-static uns tcpip_calc_checksum(void *data, uns len, uns csum)
-{
- byte *x = data;
-
- while (len >= 2)
- {
- csum += (x[0] << 8) | x[1];
- if (csum & 0xffff0000)
- {
- csum &= 0x0000ffff;
- csum++;
- }
- x += 2;
- len -= 2;
- }
- if (len)
- {
- csum += x[0];
- if (csum & 0xffff0000)
- {
- csum &= 0x0000ffff;
- csum++;
- }
- }
- return csum;
-}
-
-static inline uns tcpip_verify_checksum(uns csum)
-{
- return (csum == 0xffff);
-}
-
-/*** FLOW ANALYSIS ***/
-
-enum close_cause {
- CAUSE_CLOSE,
- CAUSE_RESET,
- CAUSE_TIMEOUT,
- CAUSE_DOOMSDAY
-};
-
-struct flow;
-
-struct appl_hooks {
- void (*open)(struct flow *f);
- void (*input)(struct flow *f, int dir, struct pkt *p);
- void (*close)(struct flow *f, int cause);
-};
-
-static void sink_open(struct flow *f)
-{
-}
-
-static void sink_close(struct flow *f, int cause)
-{
-}
-
-static void sink_input(struct flow *f, int dir, struct pkt *p)
-{
-}
-
-struct appl_hooks appl_sink = {
- .open = sink_open,
- .input = sink_input,
- .close = sink_close
-};
-
-/*** TCP LAYER ***/
-
-static struct pkt_stats stat_tcp_in, stat_tcp_invalid, stat_tcp_badsum, stat_tcp_unmatched,
- stat_tcp_on_closed;
-
-struct pipe {
- list queue;
- u32 last_acked_seq;
- enum {
- FLOW_IDLE,
- FLOW_SYN_SENT, /* sent SYN, waiting for SYN ACK */
- FLOW_SYN_SENT_ACK, /* sent SYN ACK, waiting for first ACK */
- FLOW_ESTABLISHED, /* established state including waiting for ACK of SYN ACK */
- FLOW_FIN_SENT, /* sent FIN, waiting for its ACK */
- FLOW_CLOSED /* closed, ignoring further packets */
- } state;
-};
-
-struct flow {
- struct flow *hash_next;
- u32 saddr, daddr, sport, dport;
- u32 timeout;
- uns heap_pos;
- struct appl_hooks *appl;
- void *appl_data;
- struct pipe pipe[2];
-};
-
-static uns num_flows, max_flows;
-static struct flow **flow_hash;
-static struct flow **flow_heap;
-
-static uns flow_calc_hash(u32 saddr, u32 daddr, u32 sport, u32 dport)
-{
- saddr = (saddr >> 16) | (saddr << 16);
- daddr = (daddr >> 8) | (daddr << 24);
- sport <<= 7;
- dport <<= 21;
- return (saddr + daddr + sport + dport) % max_flows;
-}
-
-#define FLOW_HEAP_LESS(a,b) (a->timeout < b->timeout)
-#define FLOW_HEAP_SWAP(h,a,b,t) do { t=h[a]; h[a]=h[b]; h[b]=t; h[a]->heap_pos=a; h[b]->heap_pos=b; } while(0)
-
-static void flow_rehash(void)
-{
- uns omax = max_flows;
- struct flow **ohash = flow_hash;
-
- if (flow_heap)
- xfree(flow_heap);
- if (max_flows)
- max_flows = nextprime(2*max_flows);
- else
- max_flows = 3;
- DBG("Rehashing to %d buckets\n", max_flows);
- flow_hash = xmalloc_zero(sizeof(struct flow *) * max_flows);
- flow_heap = xmalloc_zero(sizeof(struct flow *) * (max_flows+1));
- num_flows = 0;
- for (uns i=0; i<omax; i++)
- {
- struct flow *f = ohash[i];
- while (f)
- {
- struct flow *n = f->hash_next;
- uns h = flow_calc_hash(f->saddr, f->daddr, f->sport, f->dport);
- f->hash_next = flow_hash[h];
- flow_hash[h] = f;
- flow_heap[++num_flows] = f;
- f->heap_pos = num_flows;
- f = n;
- }
- }
- if (ohash)
- xfree(ohash);
- HEAP_INIT(struct flow *, flow_heap, num_flows, FLOW_HEAP_LESS, FLOW_HEAP_SWAP);
-}
-
-static struct flow *flow_lookup(u32 saddr, u32 daddr, u32 sport, u32 dport)
-{
- uns h = flow_calc_hash(saddr, daddr, sport, dport);
- for (struct flow *f = flow_hash[h]; f; f=f->hash_next)
- if (f->saddr == saddr && f->daddr == daddr &&
- f->sport == sport && f->dport == dport)
- return f;
- return NULL;
-}
-
-static struct flow *flow_create(u32 saddr, u32 daddr, u32 sport, u32 dport)
-{
- if (num_flows >= max_flows)
- flow_rehash();
- uns h = flow_calc_hash(saddr, daddr, sport, dport);
- struct flow *f = xmalloc_zero(sizeof(struct flow));
- f->saddr = saddr;
- f->daddr = daddr;
- f->sport = sport;
- f->dport = dport;
- f->timeout = ~0U;
- f->hash_next = flow_hash[h];
- flow_hash[h] = f;
- flow_heap[++num_flows] = f;
- f->heap_pos = num_flows;
- return f;
-}
-
-static void flow_set_timeout(struct flow *f, u32 when)
-{
- f->timeout = when;
- HEAP_CHANGE(struct flow *, flow_heap, num_flows, FLOW_HEAP_LESS, FLOW_HEAP_SWAP, f->heap_pos);
-}
-
-static uns flow_now(struct pkt *p)
-{
- return p->timestamp >> 20;
-}
-
-static void tcp_time_step(uns now)
-{
- while (num_flows && flow_heap[1]->timeout <= now)
- {
- struct flow *f = flow_heap[1];
- HEAP_DELMIN(struct flow *, flow_heap, num_flows, FLOW_HEAP_LESS, FLOW_HEAP_SWAP);
- DBG("TIMEOUT for flow %p(%d/%d)\n", f, f->pipe[0].state, f->pipe[1].state);
- if (f->pipe[0].state != FLOW_CLOSED || f->pipe[1].state != FLOW_CLOSED)
- f->appl->close(f, (now == ~0U) ? CAUSE_DOOMSDAY : CAUSE_TIMEOUT);
- uns h = flow_calc_hash(f->saddr, f->daddr, f->sport, f->dport);
- struct flow **gg = &flow_hash[h];
- for(;;)
- {
- ASSERT(*gg);
- if (*gg == f)
- {
- *gg = f->hash_next;
- break;
- }
- gg = &(*gg)->hash_next;
- }
- xfree(f);
- }
-}
-
-static void tcp_got_packet(struct iphdr *iph, struct pkt *p)
-{
- struct tcphdr *tcph;
- struct {
- u32 src;
- u32 dst;
- byte zero;
- byte proto;
- u16 len;
- } fakehdr;
- uns now = flow_now(p);
-
- tcp_time_step(now);
-
- pkt_account(&stat_tcp_in, p);
- if (!(tcph = pkt_peek(p, sizeof(*tcph))))
- goto invalid;
- uns hdrlen = 4*tcph->doff;
- if (hdrlen < sizeof(*tcph) || hdrlen > pkt_len(p))
- goto invalid;
- fakehdr.src = iph->saddr;
- fakehdr.dst = iph->daddr;
- fakehdr.zero = 0;
- fakehdr.proto = IPPROTO_TCP;
- fakehdr.len = htons(pkt_len(p));
- uns sum = tcpip_calc_checksum(&fakehdr, sizeof(fakehdr), 0);
- sum = tcpip_calc_checksum(p->data, pkt_len(p), sum);
- if (!tcpip_verify_checksum(sum))
- {
- pkt_account(&stat_tcp_badsum, p);
- goto drop;
- }
- /* XXX: Check TCP options? */
- pkt_pop(p, hdrlen);
-
- u32 seq = ntohl(tcph->seq);
- u32 ack = ntohl(tcph->ack_seq);
- DBG("TCP %08x %08x %04x %04x seq=%u+%u ack=%u%s%s%s%s%s%s\n",
- ntohl(iph->saddr), ntohl(iph->daddr), ntohs(tcph->source), ntohs(tcph->dest), seq, pkt_len(p), ack,
- (tcph->fin ? " FIN" : ""),
- (tcph->syn ? " SYN" : ""),
- (tcph->rst ? " RST" : ""),
- (tcph->psh ? " PSH" : ""),
- (tcph->ack ? " ACK" : ""),
- (tcph->urg ? " URG" : ""));
-
- struct flow *f;
- struct pipe *a, *b;
- if (f = flow_lookup(iph->saddr, iph->daddr, tcph->source, tcph->dest))
- {
- a = &f->pipe[0];
- b = &f->pipe[1];
- }
- else if (f = flow_lookup(iph->daddr, iph->saddr, tcph->dest, tcph->source))
- {
- a = &f->pipe[1];
- b = &f->pipe[0];
- }
- else
- {
- /* Flow not found, if it's a SYN packet, go create it */
- if (tcph->syn && !tcph->ack && !tcph->rst && !tcph->fin)
- {
- f = flow_create(iph->saddr, iph->daddr, tcph->source, tcph->dest);
- f->appl = &appl_sink;
- f->appl->open(f);
- a = &f->pipe[0];
- b = &f->pipe[1];
- list_init(&a->queue);
- a->last_acked_seq = seq;
- a->state = FLOW_SYN_SENT;
- list_init(&b->queue);
- b->state = FLOW_IDLE;
- DBG("\t%p NEW\n", f);
- goto drop;
- }
- DBG("\tUnmatched\n");
- pkt_account(&stat_tcp_unmatched, p);
- goto drop;
- }
-
- DBG("\t%p(%d/%d) ", f, a->state, b->state);
- if (a->state == FLOW_CLOSED && b->state == FLOW_CLOSED)
- {
- DBG("closed\n");
- pkt_account(&stat_tcp_on_closed, p);
- goto drop;
- }
-
- if (tcph->rst)
- {
- DBG("RESET\n");
- f->appl->close(f, CAUSE_RESET);
- a->state = b->state = FLOW_CLOSED;
- flow_set_timeout(f, now + 300); /* FIXME */
- goto drop;
- }
-
- flow_set_timeout(f, now + 600); /* FIXME */
-
- if (tcph->syn)
- {
- if (tcph->fin || pkt_len(p))
- goto inval;
- if (tcph->ack)
- { /* SYN ACK */
- if (b->state == FLOW_SYN_SENT && b->last_acked_seq+1 == ack)
- {
- DBG("SYN ACK\n");
- b->last_acked_seq = ack;
- a->state = FLOW_SYN_SENT_ACK;
- a->last_acked_seq = seq;
- goto drop;
- }
- else if (b->state == FLOW_ESTABLISHED)
- goto dup;
- else
- goto unex;
- }
- else
- goto dup; /* otherwise SYN on already existing connection gets ignored */
- }
-
- if (tcph->ack)
- {
- if (b->state == FLOW_SYN_SENT_ACK && b->last_acked_seq+1 == ack)
- {
- a->state = b->state = FLOW_ESTABLISHED;
- b->last_acked_seq = ack+1;
- DBG("ACKED SYN, ");
- }
- if (b->state == FLOW_FIN_SENT && b->last_acked_seq+1 == ack)
- {
- b->state = FLOW_CLOSED;
- if (a->state == FLOW_CLOSED)
- {
- DBG("CLOSED BOTH WAYS\n");
- f->appl->close(f, CAUSE_CLOSE);
- flow_set_timeout(f, now + 300); /* FIXME */
- goto drop;
- }
- else
- DBG("CLOSED ONE-WAY, ");
- }
- else if (b->state == FLOW_ESTABLISHED || b->state == FLOW_FIN_SENT)
- DBG("ACK, ");
- else if (b->state == FLOW_CLOSED)
- ;
- else
- goto unex;
- }
-
- if (tcph->fin)
- {
- if (a->state == FLOW_ESTABLISHED)
- {
- a->state = FLOW_FIN_SENT;
- a->last_acked_seq = seq;
- DBG("FIN SENT, waiting for FIN ACK, ");
- }
- else if (a->state == FLOW_FIN_SENT)
- ;
- else
- goto unex;
- }
-
- if (!pkt_len(p))
- {
- DBG("EMPTY\n");
- goto drop;
- }
-
- if (b->state == FLOW_ESTABLISHED || b->state == FLOW_FIN_SENT || b->state == FLOW_CLOSED)
- {
- DBG("DATA\n");
- // list_add_tail(&b->queue, &p->n); /* FIXME */
- if (pkt_len(p))
- f->appl->input(f, (a == &f->pipe[1]), p);
- return;
- }
- else
- goto unex;
-
- drop:
- pkt_free(p);
- return;
-
- dup:
- DBG("DUP\n");
- goto drop;
-
- unex:
- DBG("UNEXPECTED\n");
- goto drop;
-
- inval:
- DBG("???\n");
- invalid:
- pkt_account(&stat_tcp_invalid, p);
- goto drop;
-}
-
-/*** IP LAYER ***/
-
-static struct pkt_stats stat_ip_in, stat_ip_invalid, stat_ip_uninteresting, stat_ip_fragmented, stat_ip_badsum;
-
-static void ip_got_packet(struct pkt *p)
-{
- struct iphdr *iph;
-
- pkt_account(&stat_ip_in, p);
- if (!(iph = pkt_peek(p, sizeof(*iph))))
- goto invalid;
- if (iph->ihl < 5)
- goto invalid;
- if (iph->version != 4)
- goto invalid;
- uns hdrlen = 4*iph->ihl;
- if (pkt_len(p) < hdrlen)
- goto invalid;
- if (!tcpip_verify_checksum(tcpip_calc_checksum(p->data, hdrlen, 0)))
- {
- pkt_account(&stat_ip_badsum, p);
- goto drop;
- }
- uns len = ntohs(iph->tot_len);
- if (len < hdrlen || len > pkt_len(p))
- goto invalid;
- pkt_unappend(p, pkt_len(p) - len);
- pkt_pop(p, hdrlen);
-
- if (iph->protocol != IPPROTO_TCP)
- {
- pkt_account(&stat_ip_uninteresting, p);
- goto drop;
- }
- /* XXX: Fragmentation not supported yet, but well-behaved TCP stacks don't use it anyway */
- if (ntohs(iph->frag_off) & 0x3fff)
- {
- pkt_account(&stat_ip_fragmented, p);
- goto drop;
- }
- tcp_got_packet(iph, p);
- return;
-
- invalid:
- pkt_account(&stat_ip_invalid, p);
- drop:
- pkt_free(p);
-}
-
-/*** LINK LAYER ***/
-
-static struct pkt_stats stat_link_dwarf, stat_link_in, stat_link_unknown, stat_link_arp;
-
-static void link_eth_got_packet(struct pkt *p)
-{
- struct ether_header *eth;
- uns etype;
-
- pkt_account(&stat_link_in, p);
- if (!(eth = pkt_pop(p, sizeof(*eth))))
- {
- pkt_account(&stat_link_dwarf, p);
- return;
- }
- etype = ntohs(eth->ether_type);
- switch (etype)
- {
- case ETHERTYPE_IP:
- ip_got_packet(p);
- break;
- case ETHERTYPE_ARP:
- pkt_account(&stat_link_arp, p);
- pkt_free(p);
- break;
- default:
- // printf("Unknown ethertype: %04x\n", etype);
- pkt_account(&stat_link_unknown, p);
- pkt_free(p);
- }
-}
-