#include <stdlib.h>
#include <string.h>
#include <alloca.h>
+#include <time.h>
#include <netinet/in.h>
#define MAXLINE 4096
return 0;
}
+static byte *find_token_val(byte *hay, byte *needle)
+{
+ if (!hay)
+ return NULL;
+ while (*hay)
+ {
+ if (*hay == ' ' || *hay == '\t' || *hay == ',')
+ hay++;
+ else
+ {
+ byte *h = hay;
+ while (*hay && *hay != ',' && *hay != ' ' && *hay != '\t' && *hay != '=')
+ hay++;
+ if (*hay == '=')
+ {
+ uns old = *hay;
+ *hay = 0;
+ uns found = !strcasecmp(h, needle);
+ *hay = old;
+ if (found)
+ return hay + 1;
+ }
+ while (*hay && *hay != ',' && *hay != ' ' && *hay != '\t')
+ hay++;
+ }
+ }
+ return NULL;
+}
+
+/* Logging */
+
static void http_log_start(struct http_state *s)
{
if (!http_log_dir)
fwrite(data, len, 1, s->log_file);
}
+/* Date parsing */
+
+static int
+validate_time(byte *a, byte *p)
+{
+ while (*p && *a)
+ {
+ switch (*p)
+ {
+ case '.':
+ break;
+ case '$':
+ if (*a == ' ')
+ break;
+ /* Fall-thru */
+ case '#':
+ if (*a < '0' || *a > '9')
+ return 0;
+ break;
+ case 'a':
+ if (*a < 'a' || *a > 'z')
+ return 0;
+ break;
+ case 'A':
+ if (*a < 'A' || *a > 'Z')
+ return 0;
+ break;
+ case '@':
+ if ((*a < 'a' && *a > 'z') && (*a < 'A' && *a > 'Z'))
+ return 0;
+ break;
+ case '*':
+ return 1;
+ case '_':
+ if (*a != ' ' && *a != '-')
+ return 0;
+ break;
+ default:
+ if (*p != *a)
+ return 0;
+ }
+ p++;
+ a++;
+ }
+ return (*p == *a || *p == '*');
+}
+
+static int
+twodig(byte *p)
+{
+ if (p[0] == ' ')
+ return p[1] - '0';
+ else
+ return (p[0] - '0')*10 + p[1] - '0';
+}
+
+static int
+fourdig(byte *p)
+{
+ return twodig(p)*100 + twodig(p+2);
+}
+
+static byte short_months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
+
+static int
+find_month(byte *p)
+{
+ byte *z = short_months;
+ int m = 0;
+
+ while (*z)
+ {
+ m++;
+ if (p[0] == z[0] && p[1] == z[1] && p[2] == z[2])
+ return m;
+ z += 3;
+ }
+ return 0;
+}
+
+static s64 http_parse_date(char *date)
+{
+ char *p = date;
+ char *q;
+ struct tm tm;
+ int m, y;
+
+ if (!date)
+ return -1;
+
+ bzero(&tm, sizeof(tm));
+ if (validate_time(p, "Aaa, ##_Aaa_#### ##:##:##*"))
+ { /* RFC 822/1123 */
+ tm.tm_mday = twodig(p+5);
+ m = find_month(p+8);
+ if (!m)
+ goto unk;
+ tm.tm_mon = m - 1;
+ y = fourdig(p+12);
+ if (y < 1970)
+ goto unk;
+ tm.tm_year = y - 1900;
+ tm.tm_hour = twodig(p+17);
+ tm.tm_min = twodig(p+20);
+ tm.tm_sec = twodig(p+23);
+ goto ook;
+ }
+ if (q = strchr(p, ','))
+ {
+ int flag = 0;
+ if (validate_time(q, ", ##-Aaa-## ##:##:## GMT")) /* RFC 850 */
+ flag = 1;
+ else if (validate_time(q, ", #-Aaa-## ##:##:## GMT")) /* Incorrectly implemented RFC 850 */
+ flag = 2;
+ if (flag)
+ {
+ tm.tm_mday = twodig(q+2);
+ if (flag == 2)
+ q--;
+ m = find_month(q+5);
+ if (!m)
+ goto unk;
+ tm.tm_mon = m - 1;
+ tm.tm_year = twodig(q+9);
+ if (tm.tm_year < 76)
+ tm.tm_year += 100;
+ tm.tm_hour = twodig(q+12);
+ tm.tm_min = twodig(q+15);
+ tm.tm_sec = twodig(q+18);
+ goto ook;
+ }
+ }
+ if (validate_time(p, "Aaa Aaa $# ##:##:## ####"))
+ { /* ANSI C asctime() */
+ m = find_month(p+4);
+ if (!m)
+ goto unk;
+ tm.tm_mon = m - 1;
+ tm.tm_mday = twodig(p+8);
+ tm.tm_hour = twodig(p+11);
+ tm.tm_min = twodig(p+14);
+ tm.tm_sec = twodig(p+17);
+ y = fourdig(p+20);
+ if (y < 1980)
+ goto unk;
+ tm.tm_year = y - 1900;
+ goto ook;
+ }
+ if (!strcmp(p, "0") || !strcmp(p, "-1"))
+ return 0; /* Porcine hacks */
+
+unk:
+ fprintf(stderr, "Unable to parse date `%s'\n", date);
+ return 0;
+
+ook:
+ m = timegm(&tm);
+ if (m == (time_t) -1)
+ goto unk;
+ return (u64) m * 1000000;
+}
+
+static void http_cache_report(struct http_state *s, char *buf)
+{
+ byte *rq_pragma = http_lookup_hdr(&s->req_headers, "Pragma:");
+ byte *rp_pragma = http_lookup_hdr(&s->resp_headers, "Pragma:");
+ byte *rq_cc = http_lookup_hdr(&s->req_headers, "Cache-control:");
+ byte *rp_cc = http_lookup_hdr(&s->resp_headers, "Cache-control:");
+ byte *rp_vary = http_lookup_hdr(&s->resp_headers, "Vary:");
+ byte *rp_cache = http_lookup_hdr(&s->resp_headers, "X-Cache:");
+ s64 rp_expires = http_parse_date(http_lookup_hdr(&s->resp_headers, "Expires:"));
+ s64 rp_date = http_parse_date(http_lookup_hdr(&s->resp_headers, "Date:"));
+ s64 rp_time = rp_date > 0 ? rp_date : s->resp_line ? (s64) s->resp_start_time : (s64) s->req_start_time;
+
+ // Cache control in request
+ if (find_token(rq_pragma, "no-cache") || find_token(rq_cc, "no-cache"))
+ buf[0] = 'N';
+ else if (find_token(rq_cc, "max-age=0") || find_token(rq_cc, "must-revalidate"))
+ buf[0] = 'R';
+ else
+ buf[0] = '.';
+
+ // HTTP/1.0 cache control in reply
+ uns expired10 = (rp_expires > 0 && rp_expires <= rp_time);
+ uns nocache10 = find_token(rp_pragma, "no-cache");
+
+ // Expiration of reply
+ byte *rp_maxage_arg = find_token_val(rp_cc, "max-age");
+ s64 rp_maxage;
+ if (rp_maxage_arg)
+ rp_maxage = (s64) atoi(rp_maxage_arg) * 1000000;
+ else if (expired10)
+ rp_maxage = 0;
+ else if (rp_expires >= rp_time)
+ rp_maxage = rp_expires - rp_time;
+ else
+ rp_maxage = -1;
+#if 0
+ if (s->log_file)
+ fprintf(s->log_file, "; rp_expires=%Ld rp_time=%Ld rp_maxage=%Ld expired10=%d nocache10=%d\n", rp_expires, rp_time, rp_maxage, expired10, nocache10);
+#endif
+
+ // Cache control in reply
+ if (nocache10 || find_token(rp_cc, "no-cache"))
+ buf[1] = 'N';
+ else if (find_token(rp_cc, "private"))
+ buf[1] = 'P';
+ else if (find_token(rp_cc, "no-store"))
+ buf[1] = 'S';
+ else if (expired10 || !rp_maxage)
+ buf[1] = 'X';
+ else if (rp_vary)
+ buf[1] = 'V';
+ else if (find_token(rp_cc, "must-revalidate"))
+ buf[1] = 'R';
+ else if (rp_maxage > 0 && rp_maxage < (s64) 300 * 1000000)
+ buf[1] = 'E';
+ else if (rp_maxage > 0)
+ buf[1] = 'L';
+ else
+ buf[1] = '.';
+
+ // Do HTTP/1.1 and HTTP/1.0 behaviour match?
+ if (buf[1] != '.' && buf[1] != 'E' && buf[1] != 'L' && !expired10 && !nocache10)
+ buf[1] |= 0x20; // Lowercase
+
+ // Validators in reply
+ byte *rp_etag = http_lookup_hdr(&s->resp_headers, "ETag:");
+ s64 rp_lastmod = http_parse_date(http_lookup_hdr(&s->resp_headers, "Last-Modified:"));
+ if (rp_etag)
+ {
+ if (rp_etag[0] == 'W' && rp_etag[1] == '/')
+ buf[2] = 'W';
+ else
+ buf[2] = 'E';
+ }
+ else if (rp_lastmod > 0)
+ buf[2] = 'L';
+ else
+ buf[2] = '.';
+
+ // Is there cache status in reply?
+ if (!rp_cache)
+ buf[3] = '.';
+ else if (!strncmp(rp_cache, "HIT ", 4))
+ buf[3] = '+';
+ else if (!strncmp(rp_cache, "MISS ", 5))
+ buf[3] = '-';
+ else
+ buf[3] = '?';
+
+ buf[4] = 0;
+}
+
static void http_report(struct flow *f, struct http_state *s, u64 when, byte *reason)
{
byte *method, *url, *x, *y, *stat;
char *ffor = http_lookup_hdr(&s->req_headers, "X-Forwarded-For:");
/* Find out cacheability */
- byte *rq_pragma = http_lookup_hdr(&s->req_headers, "Pragma:");
- byte *rp_pragma = http_lookup_hdr(&s->resp_headers, "Pragma:");
- byte *rq_cc = http_lookup_hdr(&s->req_headers, "Cache-control:");
- byte *rp_cc = http_lookup_hdr(&s->resp_headers, "Cache-control:");
- byte *rp_cache = http_lookup_hdr(&s->resp_headers, "X-Cache:");
- uns rq_cflag, rp_cflag, rp_hit;
- if (find_token(rq_pragma, "no-cache") || find_token(rq_cc, "no-cache"))
- rq_cflag = 'N';
- else if (find_token(rq_cc, "max-age=0") || find_token(rq_cc, "must-revalidate"))
- rq_cflag = 'R';
- else
- rq_cflag = '.';
- if (find_token(rp_pragma, "no-cache") || find_token(rp_cc, "no-cache"))
- rp_cflag = 'N';
- else if (find_token(rp_cc, "private"))
- rp_cflag = 'P';
- else if (find_token(rp_cc, "no-store"))
- rp_cflag = 'S';
- else if (find_token(rp_cc, "must-revalidate"))
- rp_cflag = 'R';
- else
- rp_cflag = '.';
- if (!rp_cache)
- rp_hit = '.';
- else if (!strncmp(rp_cache, "HIT ", 4))
- rp_hit = '+';
- else if (!strncmp(rp_cache, "MISS ", 5))
- rp_hit = '-';
- else
- rp_hit = '?';
+ char cache_flags[16];
+ http_cache_report(s, cache_flags);
+ /* Format log message */
byte stamp[TIMESTAMP_LEN], src[22], dst[22];
sprintf(src, "%d.%d.%d.%d:%d", IPQUAD(f->saddr), ntohs(f->sport));
sprintf(dst, "%d.%d.%d.%d:%d", IPQUAD(f->daddr), ntohs(f->dport));
if (sep = strchr(ctype, ';'))
*sep = 0;
if (!s->xact_id)
- printf("# id timestamp source destination forwarded-for res cac que length total time wait time ctype method URL\n");
- /* 000000 2003-06-06 22:53:38.642 81.27.194.19:1175 205.217.153.53:80 123.123.123.123 200 ... 0 14030 0.957 0.444 text/plain GET http://... */
- printf("%06u %s %-21s %-21s %-15s %-3s %c%c%c %3d %8d %6d.%03d %6d.%03d %-12s %s %s\n",
+ printf("# id timestamp source destination forwarded-for res cach que length total time wait time ctype method URL\n");
+ /* 000000 2003-06-06 22:53:38.642 81.27.194.19:1175 205.217.153.53:80 123.123.123.123 200 .... 0 14030 0.957 0.444 text/plain GET http://... */
+ printf("%06u %s %-21s %-21s %-15s %-3s %s %3d %8d %6d.%03d %6d.%03d %-12s %s %s\n",
s->xact_id, stamp, src, dst, (ffor ? : "-"), reason,
- rq_cflag, rp_cflag, rp_hit,
+ cache_flags,
s->req_counter,
s->body_total_size,
(uns)(ttotal/1000000), (uns)(ttotal%1000000)/1000,