2 * Netgrind -- HTTP Analyser
4 * (c) 2003--2013 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU General Public License.
13 #include "lib/pools.h"
14 #include "netgrind/pkt.h"
15 #include "netgrind/netgrind.h"
22 #include <netinet/in.h>
35 HTTP_IDLE, /* initialized, waiting for request */
36 HTTP_ERROR, /* protocol error, ignoring everything else */
37 HTTP_CUT, /* unexpected EOF in one direction, ignoring everything else */
38 HTTP_REQUEST, /* parsing request */
39 HTTP_BODY_CHUNKED, /* receiving body: chunked encoding */
40 HTTP_BODY_LENGTH, /* receiving body: length given */
41 HTTP_BODY_INF, /* receiving body: till EOF */
42 HTTP_RESPONSE, /* parsing response */
43 HTTP_DONE, /* transaction finished, logging it */
44 HTTP_CONNECT, /* inside CONNECT transaction */
47 u64 req_start_time, resp_start_time;
50 list tx_queue, rx_queue;
52 byte *req_line, *resp_line;
53 list req_headers, resp_headers;
67 static uns http_conn_counter;
68 static uns http_xact_counter;
70 static void http_open(struct flow *f, u64 when)
72 struct http_state *s = xmalloc_zero(sizeof(*s));
75 s->conn_id = http_conn_counter++;
76 DBG("HTTP: %d NEW %d.%d.%d.%d:%d -> %d.%d.%d.%d:%d\n", s->conn_id,
77 IPQUAD(f->saddr), ntohs(f->sport), IPQUAD(f->daddr), ntohs(f->dport));
78 list_init(&s->tx_queue);
79 list_init(&s->rx_queue);
80 s->req_start_time = when;
83 static byte *http_lookup_hdr(list *l, byte *name)
85 struct http_header *h;
87 if (!strcasecmp(h->name, name))
92 static uns find_token(byte *hay, byte *needle)
98 if (*hay == ' ' || *hay == '\t' || *hay == ',')
103 while (*hay && *hay != ',' && *hay != ' ' && *hay != '\t')
107 uns found = !strcasecmp(h, needle);
116 static byte *find_token_val(byte *hay, byte *needle)
122 if (*hay == ' ' || *hay == '\t' || *hay == ',')
127 while (*hay && *hay != ',' && *hay != ' ' && *hay != '\t' && *hay != '=')
133 uns found = !strcasecmp(h, needle);
138 while (*hay && *hay != ',' && *hay != ' ' && *hay != '\t')
147 static void http_log_start(struct http_state *s)
152 char name[256], stamp[TIMESTAMP_LEN];
153 struct flow *f = s->flow;
155 sprintf(name, "%s/%06u-%d.%d.%d.%d:%d-%d.%d.%d.%d:%d", http_log_dir, s->xact_id,
156 IPQUAD(f->saddr), ntohs(f->sport), IPQUAD(f->daddr), ntohs(f->dport));
157 if (!(s->log_file = fopen(name, "w")))
158 die("Unable to create %s: %m", name);
160 format_timestamp(stamp, s->req_start_time);
161 fprintf(s->log_file, "; [%s] From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d (req %u)\n",
162 stamp, IPQUAD(f->saddr), ntohs(f->sport), IPQUAD(f->daddr), ntohs(f->dport),
166 static void http_log_end(struct http_state *s)
174 static void http_log_req_line(struct http_state *s, byte *line)
177 fprintf(s->log_file, "> %s\n", line);
180 static void http_log_resp_line(struct http_state *s, byte *line)
183 fprintf(s->log_file, "< %s\n", line);
186 static void http_log_body(struct http_state *s, byte *data, uns len)
189 fwrite(data, len, 1, s->log_file);
195 validate_time(byte *a, byte *p)
208 if (*a < '0' || *a > '9')
212 if (*a < 'a' || *a > 'z')
216 if (*a < 'A' || *a > 'Z')
220 if ((*a < 'a' && *a > 'z') && (*a < 'A' && *a > 'Z'))
226 if (*a != ' ' && *a != '-')
236 return (*p == *a || *p == '*');
245 return (p[0] - '0')*10 + p[1] - '0';
251 return twodig(p)*100 + twodig(p+2);
254 static byte short_months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
259 byte *z = short_months;
265 if (p[0] == z[0] && p[1] == z[1] && p[2] == z[2])
272 static s64 http_parse_date(char *date)
282 bzero(&tm, sizeof(tm));
283 if (validate_time(p, "Aaa, ##_Aaa_#### ##:##:##*"))
285 tm.tm_mday = twodig(p+5);
293 tm.tm_year = y - 1900;
294 tm.tm_hour = twodig(p+17);
295 tm.tm_min = twodig(p+20);
296 tm.tm_sec = twodig(p+23);
299 if (q = strchr(p, ','))
302 if (validate_time(q, ", ##-Aaa-## ##:##:## GMT")) /* RFC 850 */
304 else if (validate_time(q, ", #-Aaa-## ##:##:## GMT")) /* Incorrectly implemented RFC 850 */
308 tm.tm_mday = twodig(q+2);
315 tm.tm_year = twodig(q+9);
318 tm.tm_hour = twodig(q+12);
319 tm.tm_min = twodig(q+15);
320 tm.tm_sec = twodig(q+18);
324 if (validate_time(p, "Aaa Aaa $# ##:##:## ####"))
325 { /* ANSI C asctime() */
330 tm.tm_mday = twodig(p+8);
331 tm.tm_hour = twodig(p+11);
332 tm.tm_min = twodig(p+14);
333 tm.tm_sec = twodig(p+17);
337 tm.tm_year = y - 1900;
340 if (!strcmp(p, "0") || !strcmp(p, "-1"))
341 return 0; /* Porcine hacks */
344 fprintf(stderr, "Unable to parse date `%s'\n", date);
349 if (m == (time_t) -1)
351 return (u64) m * 1000000;
354 static void http_cache_report(struct http_state *s, char *buf)
356 byte *rq_pragma = http_lookup_hdr(&s->req_headers, "Pragma:");
357 byte *rp_pragma = http_lookup_hdr(&s->resp_headers, "Pragma:");
358 byte *rq_cc = http_lookup_hdr(&s->req_headers, "Cache-control:");
359 byte *rp_cc = http_lookup_hdr(&s->resp_headers, "Cache-control:");
360 byte *rp_vary = http_lookup_hdr(&s->resp_headers, "Vary:");
361 byte *rp_cache = http_lookup_hdr(&s->resp_headers, "X-Cache:");
362 s64 rp_expires = http_parse_date(http_lookup_hdr(&s->resp_headers, "Expires:"));
363 s64 rp_date = http_parse_date(http_lookup_hdr(&s->resp_headers, "Date:"));
364 s64 rp_time = rp_date > 0 ? rp_date : s->resp_line ? (s64) s->resp_start_time : (s64) s->req_start_time;
366 // Cache control in request
367 if (find_token(rq_pragma, "no-cache") || find_token(rq_cc, "no-cache"))
369 else if (find_token(rq_cc, "max-age=0") || find_token(rq_cc, "must-revalidate"))
374 // HTTP/1.0 cache control in reply
375 uns expired10 = (rp_expires > 0 && rp_expires <= rp_time);
376 uns nocache10 = find_token(rp_pragma, "no-cache");
378 // Expiration of reply
379 byte *rp_maxage_arg = find_token_val(rp_cc, "max-age");
382 rp_maxage = (s64) atoi(rp_maxage_arg) * 1000000;
385 else if (rp_expires >= rp_time)
386 rp_maxage = rp_expires - rp_time;
391 fprintf(s->log_file, "; rp_expires=%Ld rp_time=%Ld rp_maxage=%Ld expired10=%d nocache10=%d\n", rp_expires, rp_time, rp_maxage, expired10, nocache10);
394 // Cache control in reply
395 if (nocache10 || find_token(rp_cc, "no-cache"))
397 else if (find_token(rp_cc, "private"))
399 else if (find_token(rp_cc, "no-store"))
401 else if (expired10 || !rp_maxage)
405 else if (find_token(rp_cc, "must-revalidate"))
407 else if (rp_maxage > 0 && rp_maxage < (s64) 300 * 1000000)
409 else if (rp_maxage > 0)
414 // Do HTTP/1.1 and HTTP/1.0 behaviour match?
415 if (buf[1] != '.' && buf[1] != 'E' && buf[1] != 'L' && !expired10 && !nocache10)
416 buf[1] |= 0x20; // Lowercase
418 // Validators in reply
419 byte *rp_etag = http_lookup_hdr(&s->resp_headers, "ETag:");
420 s64 rp_lastmod = http_parse_date(http_lookup_hdr(&s->resp_headers, "Last-Modified:"));
423 if (rp_etag[0] == 'W' && rp_etag[1] == '/')
428 else if (rp_lastmod > 0)
433 // Is there cache status in reply?
436 else if (!strncmp(rp_cache, "HIT", 3))
438 else if (!strncmp(rp_cache, "MISS", 4))
446 static void http_report(struct flow *f, struct http_state *s, u64 when, byte *reason)
448 byte *method, *url, *x, *y, *stat;
450 if (!(method = s->req_line))
456 /* Analyse request line */
458 while (*url && *url != ' ')
467 /* Analyse response line */
468 if (stat = s->resp_line)
470 while (*stat && *stat != ' ')
475 while (*x && *x != ' ')
482 reason = stat[0] ? stat : (byte*)"???";
484 /* Reconstruct full URL */
485 if (!strstr(url, "://") && strcasecmp(method, "CONNECT"))
487 if (!(x = http_lookup_hdr(&s->req_headers, "Host:")))
490 url = alloca(7 + strlen(x) + strlen(y) + 1);
491 sprintf(url, "http://%s%s", x, y);
493 char *ffor = http_lookup_hdr(&s->req_headers, "X-Forwarded-For:");
495 /* Find out cacheability */
496 char cache_flags[16];
497 http_cache_report(s, cache_flags);
499 /* Format log message */
500 byte stamp[TIMESTAMP_LEN], src[22], dst[22];
501 sprintf(src, "%d.%d.%d.%d:%d", IPQUAD(f->saddr), ntohs(f->sport));
502 sprintf(dst, "%d.%d.%d.%d:%d", IPQUAD(f->daddr), ntohs(f->dport));
503 format_timestamp(stamp, s->req_start_time);
504 u64 ttotal = when - s->req_start_time;
505 u64 tresp = (s->resp_line ? (s->resp_start_time - s->req_start_time) : 0);
506 byte *ctype = (http_lookup_hdr(&s->resp_headers, "Content-type:") ? : http_lookup_hdr(&s->req_headers, "Content-type:")) ? : (byte*)"-";
508 if (sep = strchr(ctype, ';'))
511 printf("# id timestamp source destination forwarded-for res cach que length total time wait time ctype method URL\n");
512 /* 000000 2003-06-06 22:53:38.642 81.27.194.19:1175 205.217.153.53:80 123.123.123.123 200 .... 0 14030 0.957 0.444 text/plain GET http://... */
513 printf("%06u %s %-21s %-21s %-15s %-3s %s %3d %8d %6d.%03d %6d.%03d %-12s %s %s\n",
514 s->xact_id, stamp, src, dst, (ffor ? : "-"), reason,
518 (uns)(ttotal/1000000), (uns)(ttotal%1000000)/1000,
519 (uns)(tresp/1000000), (uns)(tresp%1000000)/1000,
526 static void http_close(struct flow *f, int cause, u64 when)
528 struct http_state *s = f->appl_data;
529 DBG("HTTP: %d CLOSE in state %d (cause %d)\n", s->conn_id, s->state, cause);
530 if (cause != CAUSE_CLOSE)
532 if (s->state != HTTP_IDLE)
535 sprintf(buf, "T%s", flow_cause_names_short[cause]);
536 http_report(f, s, when, buf);
543 http_report(f, s, when, "ERR");
546 http_report(f, s, when, "CUT");
549 http_report(f, s, when, "FIN");
554 pkt_flush_queue(&s->rx_queue);
555 pkt_flush_queue(&s->tx_queue);
561 static struct http_header *http_get_line(struct http_state *s, list *l)
565 struct pkt *p = list_head(l);
568 while (p->data < p->stop)
575 struct http_header *h = mp_alloc(s->pool, sizeof(*h) + s->line_len);
576 memcpy(h->buf, s->line, s->line_len);
577 h->buf[s->line_len] = 0;
578 h->name = h->value = NULL;
582 else if (s->line_len >= MAXLINE-1)
584 DBG("HTTP: Line too long!\n");
585 s->state = HTTP_ERROR;
589 s->line[s->line_len++] = c;
596 static int http_skip_body_bytes(struct http_state *s)
600 struct pkt *p = list_head(s->body_queue);
603 uns avail = pkt_len(p);
604 uns want = s->body_len;
605 uns go = MIN(avail, want);
606 http_log_body(s, p->data, go);
609 s->body_total_size += go;
620 static int http_have_input(list *l)
624 struct pkt *p = list_head(l);
634 static void http_init_xact(struct http_state *s)
636 list_init(&s->req_headers);
637 list_init(&s->resp_headers);
641 s->pool = mp_new(4096);
642 s->req_line = s->resp_line = NULL;
644 s->body_total_size = 0;
645 s->xact_id = http_xact_counter++;
650 static void http_parse_hdr(list *l, struct http_header *h)
654 while (*x && *x != ' ' && *x != '\t')
656 while (*x == ' ' || *x == '\t')
659 list_add_tail(l, &h->n);
662 static int http_ask_body(struct http_state *s, list *hdr)
665 if (x = http_lookup_hdr(hdr, "Transfer-Encoding:"))
667 DBG("\tBody encoding: %s\n", x);
668 if (!strcasecmp(x, "chunked"))
670 s->state = HTTP_BODY_CHUNKED;
675 s->state = HTTP_ERROR;
677 else if (x = http_lookup_hdr(hdr, "Content-Length:"))
679 s->body_len = atol(x);
680 DBG("\tBody length: %d\n", s->body_len);
681 s->state = HTTP_BODY_LENGTH;
688 static void http_parse_req(struct http_state *s)
690 if (!strstr(s->req_line, " HTTP/1"))
692 DBG("\tNot a HTTP/1.x request!\n");
693 s->state = HTTP_ERROR;
695 else if (http_ask_body(s, &s->req_headers))
697 else if (!strncasecmp(s->req_line, "POST ", 4))
699 DBG("\tPOST with no request body, that smells!\n");
700 s->state = HTTP_BODY_INF;
704 DBG("\tNo request body, awaiting reply\n");
705 s->state = HTTP_RESPONSE;
707 s->body_queue = &s->tx_queue;
708 s->body_end_state = HTTP_RESPONSE;
711 static void http_parse_resp(struct http_state *s)
713 if (!strncasecmp(s->req_line, "HEAD ", 5))
715 DBG("\tHEAD has no body :)\n");
716 s->state = HTTP_DONE;
718 else if (http_ask_body(s, &s->resp_headers))
720 else if (!strncasecmp(s->req_line, "GET ", 4) && strstr(s->resp_line, " 200 "))
722 DBG("\tGET with no response body, that smells!\n");
723 s->state = HTTP_BODY_INF;
727 DBG("\tNo response body\n");
728 s->state = HTTP_DONE;
730 s->body_queue = &s->rx_queue;
731 s->body_end_state = HTTP_DONE;
734 static void http_input(struct flow *f, int dir, struct pkt *p)
736 struct http_state *s = f->appl_data;
737 struct http_header *h;
738 int fin_tx = (f->pipe[0].state == FLOW_FINISHED);
739 int fin_rx = (f->pipe[1].state == FLOW_FINISHED);
741 // DBG("dir=%d txf=%d rxf=%d len=%d\n", dir, fin_tx, fin_rx, pkt_len(p));
742 if (s->state == HTTP_ERROR || s->state == HTTP_CUT)
744 DBG("HTTP: %d DROPPING INPUT\n", s->conn_id);
749 list_add_tail((dir ? &s->tx_queue : &s->rx_queue), &p->n);
752 DBG("HTTP: %d STATE %d\n", s->conn_id, s->state);
756 if (fin_tx || !http_have_input(&s->tx_queue))
758 s->state = HTTP_REQUEST;
759 if (!s->req_start_time)
760 s->req_start_time = p->timestamp;
764 if (fin_tx || fin_rx)
766 if (!(h = http_get_line(s, &s->tx_queue)))
768 DBG("\t>> %s\n", h->buf);
769 http_log_req_line(s, h->buf);
774 s->req_line = h->buf;
777 http_parse_hdr(&s->req_headers, h);
781 case HTTP_BODY_LENGTH:
784 if (!http_skip_body_bytes(s))
786 DBG("\tEnd of body\n");
787 s->state = s->body_end_state;
789 case HTTP_BODY_CHUNKED:
794 if (!http_skip_body_bytes(s))
797 else if (s->body_trailer)
799 if (!(h = http_get_line(s, s->body_queue)))
803 DBG("\tEnd of chunk-encoded body\n");
804 s->state = s->body_end_state;
809 if (!(h = http_get_line(s, s->body_queue)))
811 if (sscanf(h->buf, "%x", &s->body_len) != 1)
814 s->body_len += 2; /* extra CRLF */
815 else /* last chunk */
821 http_skip_body_bytes(s);
824 DBG("\tEnd of FIN-delimited body\n");
825 s->state = s->body_end_state;
833 if (!(h = http_get_line(s, &s->rx_queue)))
835 DBG("\t<< %s\n", h->buf);
836 http_log_resp_line(s, h->buf);
841 s->resp_line = h->buf;
842 s->resp_start_time = p->timestamp;
845 http_parse_hdr(&s->resp_headers, h);
850 DBG("\tTransaction finished.\n");
851 if (!strncasecmp(s->req_line, "CONNECT ", 8))
853 s->state = HTTP_CONNECT;
856 http_report(f, s, p->timestamp, NULL);
857 s->state = HTTP_IDLE;
858 s->req_start_time = 0;
862 s->body_queue = &s->rx_queue;
863 http_skip_body_bytes(s);
865 s->body_queue = &s->tx_queue;
866 http_skip_body_bytes(s);
877 DBG("HTTP: %d ERROR: PROTOCOL VIOLATION\n", s->conn_id);
878 s->state = HTTP_ERROR;
882 DBG("HTTP: %d ERROR: UNEXPECTED EOF\n", s->conn_id);
886 struct appl_hooks appl_http = {