2 * Netgrind -- HTTP Analyser
4 * (c) 2003--2013 Martin Mares <mj@ucw.cz>
6 * This software may be freely distributed and used according to the terms
7 * of the GNU General Public License.
13 #include "lib/pools.h"
14 #include "netgrind/pkt.h"
15 #include "netgrind/netgrind.h"
22 #include <netinet/in.h>
35 HTTP_IDLE, /* initialized, waiting for request */
36 HTTP_ERROR, /* protocol error, ignoring everything else */
37 HTTP_CUT, /* unexpected EOF in one direction, ignoring everything else */
38 HTTP_REQUEST, /* parsing request */
39 HTTP_BODY_CHUNKED, /* receiving body: chunked encoding */
40 HTTP_BODY_LENGTH, /* receiving body: length given */
41 HTTP_BODY_INF, /* receiving body: till EOF */
42 HTTP_RESPONSE, /* parsing response */
43 HTTP_DONE, /* transaction finished, logging it */
44 HTTP_CONNECT, /* inside CONNECT transaction */
47 u64 req_start_time, resp_start_time;
50 list tx_queue, rx_queue;
52 byte *req_line, *resp_line;
53 list req_headers, resp_headers;
67 static uns http_conn_counter;
68 static uns http_xact_counter;
70 static void http_open(struct flow *f, u64 when)
72 struct http_state *s = xmalloc_zero(sizeof(*s));
75 s->conn_id = http_conn_counter++;
76 DBG("HTTP: %d NEW %d.%d.%d.%d:%d -> %d.%d.%d.%d:%d\n", s->conn_id,
77 IPQUAD(f->saddr), ntohs(f->sport), IPQUAD(f->daddr), ntohs(f->dport));
78 list_init(&s->tx_queue);
79 list_init(&s->rx_queue);
80 s->req_start_time = when;
83 static byte *http_lookup_hdr(list *l, byte *name)
85 struct http_header *h;
87 if (!strcasecmp(h->name, name))
92 static uns find_token(byte *hay, byte *needle)
98 if (*hay == ' ' || *hay == '\t' || *hay == ',')
103 while (*hay && *hay != ',' && *hay != ' ' && *hay != '\t')
107 uns found = !strcasecmp(h, needle);
116 static byte *find_token_val(byte *hay, byte *needle)
122 if (*hay == ' ' || *hay == '\t' || *hay == ',')
127 while (*hay && *hay != ',' && *hay != ' ' && *hay != '\t' && *hay != '=')
133 uns found = !strcasecmp(h, needle);
138 while (*hay && *hay != ',' && *hay != ' ' && *hay != '\t')
147 static void http_log_start(struct http_state *s)
152 char name[256], stamp[TIMESTAMP_LEN];
153 struct flow *f = s->flow;
155 sprintf(name, "%s/%06u-%d.%d.%d.%d:%d-%d.%d.%d.%d:%d", http_log_dir, s->xact_id,
156 IPQUAD(f->saddr), ntohs(f->sport), IPQUAD(f->daddr), ntohs(f->dport));
157 if (!(s->log_file = fopen(name, "w")))
158 die("Unable to create %s: %m", name);
160 format_timestamp(stamp, s->req_start_time);
161 fprintf(s->log_file, "; [%s] From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d (req %u)\n",
162 stamp, IPQUAD(f->saddr), ntohs(f->sport), IPQUAD(f->daddr), ntohs(f->dport),
166 static void http_log_end(struct http_state *s)
174 static void http_log_req_line(struct http_state *s, byte *line)
177 fprintf(s->log_file, "> %s\n", line);
180 static void http_log_resp_line(struct http_state *s, byte *line)
183 fprintf(s->log_file, "< %s\n", line);
186 static void http_log_body(struct http_state *s, byte *data, uns len)
189 fwrite(data, len, 1, s->log_file);
195 validate_time(byte *a, byte *p)
208 if (*a < '0' || *a > '9')
212 if (*a < 'a' || *a > 'z')
216 if (*a < 'A' || *a > 'Z')
220 if ((*a < 'a' && *a > 'z') && (*a < 'A' && *a > 'Z'))
226 if (*a != ' ' && *a != '-')
236 return (*p == *a || *p == '*');
245 return (p[0] - '0')*10 + p[1] - '0';
251 return twodig(p)*100 + twodig(p+2);
254 static byte short_months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
259 byte *z = short_months;
265 if (p[0] == z[0] && p[1] == z[1] && p[2] == z[2])
272 static s64 http_parse_date(char *date)
282 bzero(&tm, sizeof(tm));
283 if (validate_time(p, "Aaa, ##_Aaa_#### ##:##:##*"))
285 tm.tm_mday = twodig(p+5);
293 tm.tm_year = y - 1900;
294 tm.tm_hour = twodig(p+17);
295 tm.tm_min = twodig(p+20);
296 tm.tm_sec = twodig(p+23);
299 if (q = strchr(p, ','))
302 if (validate_time(q, ", ##-Aaa-## ##:##:## GMT")) /* RFC 850 */
304 else if (validate_time(q, ", #-Aaa-## ##:##:## GMT")) /* Incorrectly implemented RFC 850 */
308 tm.tm_mday = twodig(q+2);
315 tm.tm_year = twodig(q+9);
318 tm.tm_hour = twodig(q+12);
319 tm.tm_min = twodig(q+15);
320 tm.tm_sec = twodig(q+18);
324 if (validate_time(p, "Aaa Aaa $# ##:##:## ####"))
325 { /* ANSI C asctime() */
330 tm.tm_mday = twodig(p+8);
331 tm.tm_hour = twodig(p+11);
332 tm.tm_min = twodig(p+14);
333 tm.tm_sec = twodig(p+17);
337 tm.tm_year = y - 1900;
340 if (!strcmp(p, "0") || !strcmp(p, "-1"))
341 return 0; /* Porcine hacks */
344 fprintf(stderr, "Unable to parse date `%s'\n", date);
349 if (m == (time_t) -1)
351 return (u64) m * 1000000;
354 static void http_cache_report(struct http_state *s, char *buf)
356 byte *rq_pragma = http_lookup_hdr(&s->req_headers, "Pragma:");
357 byte *rp_pragma = http_lookup_hdr(&s->resp_headers, "Pragma:");
358 byte *rq_cc = http_lookup_hdr(&s->req_headers, "Cache-control:");
359 byte *rp_cc = http_lookup_hdr(&s->resp_headers, "Cache-control:");
360 byte *rp_vary = http_lookup_hdr(&s->resp_headers, "Vary:");
361 byte *rp_cache = http_lookup_hdr(&s->resp_headers, "X-Cache:");
362 s64 rp_expires = http_parse_date(http_lookup_hdr(&s->resp_headers, "Expires:"));
363 s64 rp_date = http_parse_date(http_lookup_hdr(&s->resp_headers, "Date:"));
364 s64 rp_time = rp_date > 0 ? rp_date : s->resp_line ? (s64) s->resp_start_time : (s64) s->req_start_time;
366 // Cache control in request
367 if (find_token(rq_pragma, "no-cache") || find_token(rq_cc, "no-cache"))
369 else if (find_token(rq_cc, "max-age=0") || find_token(rq_cc, "must-revalidate"))
371 else if (http_lookup_hdr(&s->req_headers, "If-Match:") || http_lookup_hdr(&s->req_headers, "If-Modified-Since:"))
376 // HTTP/1.0 cache control in reply
377 uns expired10 = (rp_expires > 0 && rp_expires <= rp_time);
378 uns nocache10 = find_token(rp_pragma, "no-cache");
380 // Expiration of reply
381 byte *rp_maxage_arg = find_token_val(rp_cc, "max-age");
384 rp_maxage = (s64) atoi(rp_maxage_arg) * 1000000;
387 else if (rp_expires >= rp_time)
388 rp_maxage = rp_expires - rp_time;
393 fprintf(s->log_file, "; rp_expires=%Ld rp_time=%Ld rp_maxage=%Ld expired10=%d nocache10=%d\n", rp_expires, rp_time, rp_maxage, expired10, nocache10);
396 // Cache control in reply
397 if (nocache10 || find_token(rp_cc, "no-cache"))
399 else if (find_token(rp_cc, "private"))
401 else if (find_token(rp_cc, "no-store"))
403 else if (expired10 || !rp_maxage)
407 else if (find_token(rp_cc, "must-revalidate"))
409 else if (rp_maxage > 0 && rp_maxage < (s64) 300 * 1000000)
411 else if (rp_maxage > 0)
416 // Do HTTP/1.1 and HTTP/1.0 behaviour match?
417 if (buf[1] != '.' && buf[1] != 'E' && buf[1] != 'L' && !expired10 && !nocache10)
418 buf[1] |= 0x20; // Lowercase
420 // Validators in reply
421 byte *rp_etag = http_lookup_hdr(&s->resp_headers, "ETag:");
422 s64 rp_lastmod = http_parse_date(http_lookup_hdr(&s->resp_headers, "Last-Modified:"));
425 if (rp_etag[0] == 'W' && rp_etag[1] == '/')
430 else if (rp_lastmod > 0)
435 // Is there cache status in reply?
438 else if (!strncmp(rp_cache, "HIT", 3))
440 else if (!strncmp(rp_cache, "MISS", 4))
448 static void http_report(struct flow *f, struct http_state *s, u64 when, byte *reason)
450 byte *method, *url, *x, *y, *stat;
452 if (!(method = s->req_line))
458 /* Analyse request line */
460 while (*url && *url != ' ')
469 /* Analyse response line */
470 if (stat = s->resp_line)
472 while (*stat && *stat != ' ')
477 while (*x && *x != ' ')
484 reason = stat[0] ? stat : (byte*)"???";
486 /* Reconstruct full URL */
487 if (!strstr(url, "://") && strcasecmp(method, "CONNECT"))
489 if (!(x = http_lookup_hdr(&s->req_headers, "Host:")))
492 url = alloca(7 + strlen(x) + strlen(y) + 1);
493 sprintf(url, "http://%s%s", x, y);
495 char *ffor = http_lookup_hdr(&s->req_headers, "X-Forwarded-For:");
497 /* Find out cacheability */
498 char cache_flags[16];
499 http_cache_report(s, cache_flags);
501 /* Format log message */
502 byte stamp[TIMESTAMP_LEN], src[22], dst[22];
503 sprintf(src, "%d.%d.%d.%d:%d", IPQUAD(f->saddr), ntohs(f->sport));
504 sprintf(dst, "%d.%d.%d.%d:%d", IPQUAD(f->daddr), ntohs(f->dport));
505 format_timestamp(stamp, s->req_start_time);
506 u64 ttotal = when - s->req_start_time;
507 u64 tresp = (s->resp_line ? (s->resp_start_time - s->req_start_time) : 0);
508 byte *ctype = (http_lookup_hdr(&s->resp_headers, "Content-type:") ? : http_lookup_hdr(&s->req_headers, "Content-type:")) ? : (byte*)"-";
510 if (sep = strchr(ctype, ';'))
513 printf("# id timestamp source destination forwarded-for res cach que length total time wait time ctype method URL\n");
514 /* 000000 2003-06-06 22:53:38.642 81.27.194.19:1175 205.217.153.53:80 123.123.123.123 200 .... 0 14030 0.957 0.444 text/plain GET http://... */
515 printf("%06u %s %-21s %-21s %-15s %-3s %s %3d %8d %6d.%03d %6d.%03d %-12s %s %s\n",
516 s->xact_id, stamp, src, dst, (ffor ? : "-"), reason,
520 (uns)(ttotal/1000000), (uns)(ttotal%1000000)/1000,
521 (uns)(tresp/1000000), (uns)(tresp%1000000)/1000,
528 static void http_close(struct flow *f, int cause, u64 when)
530 struct http_state *s = f->appl_data;
531 DBG("HTTP: %d CLOSE in state %d (cause %d)\n", s->conn_id, s->state, cause);
532 if (cause != CAUSE_CLOSE)
534 if (s->state != HTTP_IDLE)
537 sprintf(buf, "T%s", flow_cause_names_short[cause]);
538 http_report(f, s, when, buf);
545 http_report(f, s, when, "ERR");
548 http_report(f, s, when, "CUT");
551 http_report(f, s, when, "FIN");
556 pkt_flush_queue(&s->rx_queue);
557 pkt_flush_queue(&s->tx_queue);
563 static struct http_header *http_get_line(struct http_state *s, list *l)
567 struct pkt *p = list_head(l);
570 while (p->data < p->stop)
577 struct http_header *h = mp_alloc(s->pool, sizeof(*h) + s->line_len);
578 memcpy(h->buf, s->line, s->line_len);
579 h->buf[s->line_len] = 0;
580 h->name = h->value = NULL;
584 else if (s->line_len >= MAXLINE-1)
586 DBG("HTTP: Line too long!\n");
587 s->state = HTTP_ERROR;
591 s->line[s->line_len++] = c;
598 static int http_skip_body_bytes(struct http_state *s)
602 struct pkt *p = list_head(s->body_queue);
605 uns avail = pkt_len(p);
606 uns want = s->body_len;
607 uns go = MIN(avail, want);
608 http_log_body(s, p->data, go);
611 s->body_total_size += go;
622 static int http_have_input(list *l)
626 struct pkt *p = list_head(l);
636 static void http_init_xact(struct http_state *s)
638 list_init(&s->req_headers);
639 list_init(&s->resp_headers);
643 s->pool = mp_new(4096);
644 s->req_line = s->resp_line = NULL;
646 s->body_total_size = 0;
647 s->xact_id = http_xact_counter++;
652 static void http_parse_hdr(list *l, struct http_header *h)
656 while (*x && *x != ' ' && *x != '\t')
658 while (*x == ' ' || *x == '\t')
661 list_add_tail(l, &h->n);
664 static int http_ask_body(struct http_state *s, list *hdr)
667 if (x = http_lookup_hdr(hdr, "Transfer-Encoding:"))
669 DBG("\tBody encoding: %s\n", x);
670 if (!strcasecmp(x, "chunked"))
672 s->state = HTTP_BODY_CHUNKED;
677 s->state = HTTP_ERROR;
679 else if (x = http_lookup_hdr(hdr, "Content-Length:"))
681 s->body_len = atol(x);
682 DBG("\tBody length: %d\n", s->body_len);
683 s->state = HTTP_BODY_LENGTH;
690 static void http_parse_req(struct http_state *s)
692 if (!strstr(s->req_line, " HTTP/1"))
694 DBG("\tNot a HTTP/1.x request!\n");
695 s->state = HTTP_ERROR;
697 else if (http_ask_body(s, &s->req_headers))
699 else if (!strncasecmp(s->req_line, "POST ", 4))
701 DBG("\tPOST with no request body, that smells!\n");
702 s->state = HTTP_BODY_INF;
706 DBG("\tNo request body, awaiting reply\n");
707 s->state = HTTP_RESPONSE;
709 s->body_queue = &s->tx_queue;
710 s->body_end_state = HTTP_RESPONSE;
713 static void http_parse_resp(struct http_state *s)
715 if (!strncasecmp(s->req_line, "HEAD ", 5))
717 DBG("\tHEAD has no body :)\n");
718 s->state = HTTP_DONE;
720 else if (http_ask_body(s, &s->resp_headers))
722 else if (!strncasecmp(s->req_line, "GET ", 4) && strstr(s->resp_line, " 200 "))
724 DBG("\tGET with no response body, that smells!\n");
725 s->state = HTTP_BODY_INF;
729 DBG("\tNo response body\n");
730 s->state = HTTP_DONE;
732 s->body_queue = &s->rx_queue;
733 s->body_end_state = HTTP_DONE;
736 static void http_input(struct flow *f, int dir, struct pkt *p)
738 struct http_state *s = f->appl_data;
739 struct http_header *h;
740 int fin_tx = (f->pipe[0].state == FLOW_FINISHED);
741 int fin_rx = (f->pipe[1].state == FLOW_FINISHED);
743 // DBG("dir=%d txf=%d rxf=%d len=%d\n", dir, fin_tx, fin_rx, pkt_len(p));
744 if (s->state == HTTP_ERROR || s->state == HTTP_CUT)
746 DBG("HTTP: %d DROPPING INPUT\n", s->conn_id);
751 list_add_tail((dir ? &s->tx_queue : &s->rx_queue), &p->n);
754 DBG("HTTP: %d STATE %d\n", s->conn_id, s->state);
758 if (fin_tx || !http_have_input(&s->tx_queue))
760 s->state = HTTP_REQUEST;
761 if (!s->req_start_time)
762 s->req_start_time = p->timestamp;
766 if (fin_tx || fin_rx)
768 if (!(h = http_get_line(s, &s->tx_queue)))
770 DBG("\t>> %s\n", h->buf);
771 http_log_req_line(s, h->buf);
776 s->req_line = h->buf;
779 http_parse_hdr(&s->req_headers, h);
783 case HTTP_BODY_LENGTH:
786 if (!http_skip_body_bytes(s))
788 DBG("\tEnd of body\n");
789 s->state = s->body_end_state;
791 case HTTP_BODY_CHUNKED:
796 if (!http_skip_body_bytes(s))
799 else if (s->body_trailer)
801 if (!(h = http_get_line(s, s->body_queue)))
805 DBG("\tEnd of chunk-encoded body\n");
806 s->state = s->body_end_state;
811 if (!(h = http_get_line(s, s->body_queue)))
813 if (sscanf(h->buf, "%x", &s->body_len) != 1)
816 s->body_len += 2; /* extra CRLF */
817 else /* last chunk */
823 http_skip_body_bytes(s);
826 DBG("\tEnd of FIN-delimited body\n");
827 s->state = s->body_end_state;
835 if (!(h = http_get_line(s, &s->rx_queue)))
837 DBG("\t<< %s\n", h->buf);
838 http_log_resp_line(s, h->buf);
843 s->resp_line = h->buf;
844 s->resp_start_time = p->timestamp;
847 http_parse_hdr(&s->resp_headers, h);
852 DBG("\tTransaction finished.\n");
853 if (!strncasecmp(s->req_line, "CONNECT ", 8))
855 s->state = HTTP_CONNECT;
858 http_report(f, s, p->timestamp, NULL);
859 s->state = HTTP_IDLE;
860 s->req_start_time = 0;
864 s->body_queue = &s->rx_queue;
865 http_skip_body_bytes(s);
867 s->body_queue = &s->tx_queue;
868 http_skip_body_bytes(s);
879 DBG("HTTP: %d ERROR: PROTOCOL VIOLATION\n", s->conn_id);
880 s->state = HTTP_ERROR;
884 DBG("HTTP: %d ERROR: UNEXPECTED EOF\n", s->conn_id);
888 struct appl_hooks appl_http = {