2 * Netgrind -- HTTP Analyser
4 * (c) 2003--2013 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU General Public License.
13 #include "lib/pools.h"
14 #include "netgrind/pkt.h"
15 #include "netgrind/netgrind.h"
21 #include <netinet/in.h>
34 HTTP_IDLE, /* initialized, waiting for request */
35 HTTP_ERROR, /* protocol error, ignoring everything else */
36 HTTP_CUT, /* unexpected EOF in one direction, ignoring everything else */
37 HTTP_REQUEST, /* parsing request */
38 HTTP_BODY_CHUNKED, /* receiving body: chunked encoding */
39 HTTP_BODY_LENGTH, /* receiving body: length given */
40 HTTP_BODY_INF, /* receiving body: till EOF */
41 HTTP_RESPONSE, /* parsing response */
42 HTTP_DONE, /* transaction finished, logging it */
43 HTTP_CONNECT, /* inside CONNECT transaction */
46 u64 req_start_time, resp_start_time;
49 list tx_queue, rx_queue;
51 byte *req_line, *resp_line;
52 list req_headers, resp_headers;
66 static uns http_conn_counter;
67 static uns http_xact_counter;
69 static void http_open(struct flow *f, u64 when)
71 struct http_state *s = xmalloc_zero(sizeof(*s));
74 s->conn_id = http_conn_counter++;
75 DBG("HTTP: %d NEW %d.%d.%d.%d:%d -> %d.%d.%d.%d:%d\n", s->conn_id,
76 IPQUAD(f->saddr), ntohs(f->sport), IPQUAD(f->daddr), ntohs(f->dport));
77 list_init(&s->tx_queue);
78 list_init(&s->rx_queue);
79 s->req_start_time = when;
82 static byte *http_lookup_hdr(list *l, byte *name)
84 struct http_header *h;
86 if (!strcasecmp(h->name, name))
91 static uns find_token(byte *hay, byte *needle)
97 if (*hay == ' ' || *hay == '\t' || *hay == ',')
102 while (*hay && *hay != ',' && *hay != ' ' && *hay != '\t')
106 uns found = !strcasecmp(h, needle);
115 static void http_log_start(struct http_state *s)
120 char name[256], stamp[TIMESTAMP_LEN];
121 struct flow *f = s->flow;
123 sprintf(name, "%s/%06u-%d.%d.%d.%d:%d-%d.%d.%d.%d:%d", http_log_dir, s->xact_id,
124 IPQUAD(f->saddr), ntohs(f->sport), IPQUAD(f->daddr), ntohs(f->dport));
125 if (!(s->log_file = fopen(name, "w")))
126 die("Unable to create %s: %m", name);
128 format_timestamp(stamp, s->req_start_time);
129 fprintf(s->log_file, "; [%s] From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d (req %u)\n",
130 stamp, IPQUAD(f->saddr), ntohs(f->sport), IPQUAD(f->daddr), ntohs(f->dport),
134 static void http_log_end(struct http_state *s)
142 static void http_log_req_line(struct http_state *s, byte *line)
145 fprintf(s->log_file, "> %s\n", line);
148 static void http_log_resp_line(struct http_state *s, byte *line)
151 fprintf(s->log_file, "< %s\n", line);
154 static void http_log_body(struct http_state *s, byte *data, uns len)
157 fwrite(data, len, 1, s->log_file);
160 static void http_report(struct flow *f, struct http_state *s, u64 when, byte *reason)
162 byte *method, *url, *x, *y, *stat;
164 if (!(method = s->req_line))
170 /* Analyse request line */
172 while (*url && *url != ' ')
181 /* Analyse response line */
182 if (stat = s->resp_line)
184 while (*stat && *stat != ' ')
189 while (*x && *x != ' ')
196 reason = stat[0] ? stat : (byte*)"???";
198 /* Reconstruct full URL */
199 if (!strstr(url, "://") && strcasecmp(method, "CONNECT"))
201 if (!(x = http_lookup_hdr(&s->req_headers, "Host:")))
204 url = alloca(7 + strlen(x) + strlen(y) + 1);
205 sprintf(url, "http://%s%s", x, y);
207 char *ffor = http_lookup_hdr(&s->req_headers, "X-Forwarded-For:");
209 /* Find out cacheability */
210 byte *rq_pragma = http_lookup_hdr(&s->req_headers, "Pragma:");
211 byte *rp_pragma = http_lookup_hdr(&s->resp_headers, "Pragma:");
212 byte *rq_cc = http_lookup_hdr(&s->req_headers, "Cache-control:");
213 byte *rp_cc = http_lookup_hdr(&s->resp_headers, "Cache-control:");
214 byte *rp_cache = http_lookup_hdr(&s->resp_headers, "X-Cache:");
215 uns rq_cflag, rp_cflag, rp_hit;
216 if (find_token(rq_pragma, "no-cache") || find_token(rq_cc, "no-cache"))
218 else if (find_token(rq_cc, "max-age=0") || find_token(rq_cc, "must-revalidate"))
222 if (find_token(rp_pragma, "no-cache") || find_token(rp_cc, "no-cache"))
224 else if (find_token(rp_cc, "private"))
226 else if (find_token(rp_cc, "no-store"))
228 else if (find_token(rp_cc, "must-revalidate"))
234 else if (!strncmp(rp_cache, "HIT ", 4))
236 else if (!strncmp(rp_cache, "MISS ", 5))
241 byte stamp[TIMESTAMP_LEN], src[22], dst[22];
242 sprintf(src, "%d.%d.%d.%d:%d", IPQUAD(f->saddr), ntohs(f->sport));
243 sprintf(dst, "%d.%d.%d.%d:%d", IPQUAD(f->daddr), ntohs(f->dport));
244 format_timestamp(stamp, s->req_start_time);
245 u64 ttotal = when - s->req_start_time;
246 u64 tresp = (s->resp_line ? (s->resp_start_time - s->req_start_time) : 0);
247 byte *ctype = (http_lookup_hdr(&s->resp_headers, "Content-type:") ? : http_lookup_hdr(&s->req_headers, "Content-type:")) ? : (byte*)"-";
249 if (sep = strchr(ctype, ';'))
252 printf("# id timestamp source destination forwarded-for res cac que length total time wait time ctype method URL\n");
253 /* 000000 2003-06-06 22:53:38.642 81.27.194.19:1175 205.217.153.53:80 123.123.123.123 200 ... 0 14030 0.957 0.444 text/plain GET http://... */
254 printf("%06u %s %-21s %-21s %-15s %-3s %c%c%c %3d %8d %6d.%03d %6d.%03d %-12s %s %s\n",
255 s->xact_id, stamp, src, dst, (ffor ? : "-"), reason,
256 rq_cflag, rp_cflag, rp_hit,
259 (uns)(ttotal/1000000), (uns)(ttotal%1000000)/1000,
260 (uns)(tresp/1000000), (uns)(tresp%1000000)/1000,
267 static void http_close(struct flow *f, int cause, u64 when)
269 struct http_state *s = f->appl_data;
270 DBG("HTTP: %d CLOSE in state %d (cause %d)\n", s->conn_id, s->state, cause);
271 if (cause != CAUSE_CLOSE)
273 if (s->state != HTTP_IDLE)
276 sprintf(buf, "T%s", flow_cause_names_short[cause]);
277 http_report(f, s, when, buf);
284 http_report(f, s, when, "ERR");
287 http_report(f, s, when, "CUT");
290 http_report(f, s, when, "FIN");
295 pkt_flush_queue(&s->rx_queue);
296 pkt_flush_queue(&s->tx_queue);
302 static struct http_header *http_get_line(struct http_state *s, list *l)
306 struct pkt *p = list_head(l);
309 while (p->data < p->stop)
316 struct http_header *h = mp_alloc(s->pool, sizeof(*h) + s->line_len);
317 memcpy(h->buf, s->line, s->line_len);
318 h->buf[s->line_len] = 0;
319 h->name = h->value = NULL;
323 else if (s->line_len >= MAXLINE-1)
325 DBG("HTTP: Line too long!\n");
326 s->state = HTTP_ERROR;
330 s->line[s->line_len++] = c;
337 static int http_skip_body_bytes(struct http_state *s)
341 struct pkt *p = list_head(s->body_queue);
344 uns avail = pkt_len(p);
345 uns want = s->body_len;
346 uns go = MIN(avail, want);
347 http_log_body(s, p->data, go);
350 s->body_total_size += go;
361 static int http_have_input(list *l)
365 struct pkt *p = list_head(l);
375 static void http_init_xact(struct http_state *s)
377 list_init(&s->req_headers);
378 list_init(&s->resp_headers);
382 s->pool = mp_new(4096);
383 s->req_line = s->resp_line = NULL;
385 s->body_total_size = 0;
386 s->xact_id = http_xact_counter++;
391 static void http_parse_hdr(list *l, struct http_header *h)
395 while (*x && *x != ' ' && *x != '\t')
397 while (*x == ' ' || *x == '\t')
400 list_add_tail(l, &h->n);
403 static int http_ask_body(struct http_state *s, list *hdr)
406 if (x = http_lookup_hdr(hdr, "Transfer-Encoding:"))
408 DBG("\tBody encoding: %s\n", x);
409 if (!strcasecmp(x, "chunked"))
411 s->state = HTTP_BODY_CHUNKED;
416 s->state = HTTP_ERROR;
418 else if (x = http_lookup_hdr(hdr, "Content-Length:"))
420 s->body_len = atol(x);
421 DBG("\tBody length: %d\n", s->body_len);
422 s->state = HTTP_BODY_LENGTH;
429 static void http_parse_req(struct http_state *s)
431 if (!strstr(s->req_line, " HTTP/1"))
433 DBG("\tNot a HTTP/1.x request!\n");
434 s->state = HTTP_ERROR;
436 else if (http_ask_body(s, &s->req_headers))
438 else if (!strncasecmp(s->req_line, "POST ", 4))
440 DBG("\tPOST with no request body, that smells!\n");
441 s->state = HTTP_BODY_INF;
445 DBG("\tNo request body, awaiting reply\n");
446 s->state = HTTP_RESPONSE;
448 s->body_queue = &s->tx_queue;
449 s->body_end_state = HTTP_RESPONSE;
452 static void http_parse_resp(struct http_state *s)
454 if (!strncasecmp(s->req_line, "HEAD ", 5))
456 DBG("\tHEAD has no body :)\n");
457 s->state = HTTP_DONE;
459 else if (http_ask_body(s, &s->resp_headers))
461 else if (!strncasecmp(s->req_line, "GET ", 4) && strstr(s->resp_line, " 200 "))
463 DBG("\tGET with no response body, that smells!\n");
464 s->state = HTTP_BODY_INF;
468 DBG("\tNo response body\n");
469 s->state = HTTP_DONE;
471 s->body_queue = &s->rx_queue;
472 s->body_end_state = HTTP_DONE;
475 static void http_input(struct flow *f, int dir, struct pkt *p)
477 struct http_state *s = f->appl_data;
478 struct http_header *h;
479 int fin_tx = (f->pipe[0].state == FLOW_FINISHED);
480 int fin_rx = (f->pipe[1].state == FLOW_FINISHED);
482 // DBG("dir=%d txf=%d rxf=%d len=%d\n", dir, fin_tx, fin_rx, pkt_len(p));
483 if (s->state == HTTP_ERROR || s->state == HTTP_CUT)
485 DBG("HTTP: %d DROPPING INPUT\n", s->conn_id);
490 list_add_tail((dir ? &s->tx_queue : &s->rx_queue), &p->n);
493 DBG("HTTP: %d STATE %d\n", s->conn_id, s->state);
497 if (fin_tx || !http_have_input(&s->tx_queue))
499 s->state = HTTP_REQUEST;
500 if (!s->req_start_time)
501 s->req_start_time = p->timestamp;
505 if (fin_tx || fin_rx)
507 if (!(h = http_get_line(s, &s->tx_queue)))
509 DBG("\t>> %s\n", h->buf);
510 http_log_req_line(s, h->buf);
515 s->req_line = h->buf;
518 http_parse_hdr(&s->req_headers, h);
522 case HTTP_BODY_LENGTH:
525 if (!http_skip_body_bytes(s))
527 DBG("\tEnd of body\n");
528 s->state = s->body_end_state;
530 case HTTP_BODY_CHUNKED:
535 if (!http_skip_body_bytes(s))
538 else if (s->body_trailer)
540 if (!(h = http_get_line(s, s->body_queue)))
544 DBG("\tEnd of chunk-encoded body\n");
545 s->state = s->body_end_state;
550 if (!(h = http_get_line(s, s->body_queue)))
552 if (sscanf(h->buf, "%x", &s->body_len) != 1)
555 s->body_len += 2; /* extra CRLF */
556 else /* last chunk */
562 http_skip_body_bytes(s);
565 DBG("\tEnd of FIN-delimited body\n");
566 s->state = s->body_end_state;
574 if (!(h = http_get_line(s, &s->rx_queue)))
576 DBG("\t<< %s\n", h->buf);
577 http_log_resp_line(s, h->buf);
582 s->resp_line = h->buf;
583 s->resp_start_time = p->timestamp;
586 http_parse_hdr(&s->resp_headers, h);
591 DBG("\tTransaction finished.\n");
592 if (!strncasecmp(s->req_line, "CONNECT ", 8))
594 s->state = HTTP_CONNECT;
597 http_report(f, s, p->timestamp, NULL);
598 s->state = HTTP_IDLE;
599 s->req_start_time = 0;
603 s->body_queue = &s->rx_queue;
604 http_skip_body_bytes(s);
606 s->body_queue = &s->tx_queue;
607 http_skip_body_bytes(s);
618 DBG("HTTP: %d ERROR: PROTOCOL VIOLATION\n", s->conn_id);
619 s->state = HTTP_ERROR;
623 DBG("HTTP: %d ERROR: UNEXPECTED EOF\n", s->conn_id);
627 struct appl_hooks appl_http = {