X-Git-Url: https://pd.if.org/git/?p=zpackage;a=blobdiff_plain;f=src%2Ffetchurl.c;h=5ff67ddb2ddf257b69122159251cdcb02690c1eb;hp=1b337244421df9f0f74b7aa5a2d99186670ecd60;hb=HEAD;hpb=5dd3c3e64a9574112dda77a5afc167f5daa53fd8 diff --git a/src/fetchurl.c b/src/fetchurl.c index 1b33724..5ff67dd 100644 --- a/src/fetchurl.c +++ b/src/fetchurl.c @@ -15,7 +15,8 @@ #include #include "tlse.h" -#define MARK fprintf(stderr, "%s %s:%d\n", __FILE__, __func__, __LINE__) + +#define DEBUG(lvl, ...) if (debuglevel >= lvl ) { fprintf(stderr, __VA_ARGS__); } struct tls_uri { char *scheme; @@ -23,9 +24,12 @@ struct tls_uri { char *host; char *port; char *path; + char *encoded_path; char *query; + char *encoded_query; char *fragment; }; + int tls_parse_uri(char *, struct tls_uri *); void tls_free_uri(struct tls_uri *); @@ -53,6 +57,8 @@ static void hex(char *dst, uint8_t *src, size_t len) { } } +static int debuglevel = 0; + #if 0 static void hexbin(uint8_t *dst, unsigned char *src, size_t len) { size_t i; @@ -207,6 +213,11 @@ int verify_first(struct TLSContext *context, struct TLSCertificate **chain, int } int match = (memcmp(certhash, fp, 64) == 0); + if (!match) { + fprintf(stderr, "host %s certificate changed\n", host); + fprintf(stderr, "was %.64s\n", fp); + fprintf(stderr, "now %.64s\n", certhash); + } close(trustdb); tls_buffer_free(&tbuf); @@ -269,14 +280,23 @@ int verify_roots(struct TLSContext *context, struct TLSCertificate **chain, int struct io { struct tls_buffer response; + struct tls_buffer chunkbuf; struct TLSContext *tls; int socket; + int chunked; + int chunknum; + size_t chunksize; + size_t chunkleft; + size_t chunktotal; + size_t chunkbytesread; int status_code; time_t last_modified; time_t date; size_t content_length; + size_t received; char *redirect; }; +ssize_t unchunk(struct io *io); int month(char *m) { char *months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", @@ -319,18 +339,26 @@ char *find_header(struct io *io, char *header, size_t *len) { *len = 0; hlen = strlen(header); + + /* TODO can't do this, buffer may not be zero terminated */ eoh = strstr(io->response.buffer, "\r\n\r\n"); if (!eoh) { return 0; } + soh = io->response.buffer; do { + /* skip the first line for some reason */ soh = strstr(soh, "\r\n"); if (soh == eoh) { break; } soh += 2; - if (!memcmp(soh, header, hlen)) { + /* done if not enough room */ + if (hlen > (size_t)(eoh - soh)) { + break; + } + if (!strncasecmp(soh, header, hlen)) { break; } } while (soh < eoh); @@ -382,11 +410,22 @@ void parse_header(struct io *io) { hval[hlen] = '\r'; } + hval = find_header(io, "Transfer-Encoding:", &hlen); + if (hval) { + hval[hlen] = 0; + io->content_length = strtoul(hval, 0, 10); + if (!strcmp(hval, "chunked")) { + io->chunked = 1; + } + hval[hlen] = '\r'; + } + switch (code) { case 301: case 302: case 303: case 307: + DEBUG(1, "looking for Location header\n"); hval = find_header(io, "Location:", &hlen); if (hval) { io->redirect = strndup(hval, hlen); @@ -398,23 +437,136 @@ void parse_header(struct io *io) { } +/* fill buffer needs to put bytes into the response buffer + * if the transfer encoding is chunked, it will need to + * put the bytes into the chunkbuf first, then call + * unchunk. if unchunk return 0, then it needs more data, + * otherwise unchunk returns the number of bytes transferred + */ + ssize_t fill_buffer(struct io *io) { unsigned char buffer[4096]; - ssize_t ret; + ssize_t ret = 0; - if (io->tls) { - ret = tls_read(io->tls, buffer, sizeof buffer); - } else { - ret = read(io->socket, buffer, sizeof buffer); - } + ret = unchunk(io); + + while (ret == 0) { + if (io->tls) { + ret = tls_read(io->tls, buffer, sizeof buffer); + } else { + ret = read(io->socket, buffer, sizeof buffer); + } + + if (ret <= 0) { + break; + } - if (ret > 0) { - tls_buffer_append(&io->response, buffer, ret); + if (io->chunked) { + tls_buffer_append(&io->chunkbuf, buffer, ret); + //fwrite(buffer, ret, 1, stderr); + ret = unchunk(io); + if (ret != 0 || io->chunksize == 0) { + break; + } + } else { + tls_buffer_append(&io->response, buffer, ret); + break; + } } return ret; } +/* essentially memmem */ +void *lookfor(const void *buf, size_t buflen, const void *pattern, size_t len) { + const char *bf = buf; + const char *pt = pattern; + const char *p = bf; + + while (len <= (buflen - (p - bf))) { + if ((p = memchr(p, *pt, buflen - (p - bf))) != 0) { + if (memcmp(p, pattern, len) == 0) { + return (void *)p; + } else { + p++; + } + } else { + break; + } + } + return NULL; +} + +/* returns read chunksize, unshifts the line */ +ssize_t read_chunksize(struct io *io) { + char *cr; + ssize_t cs; + + //fwrite(io->chunkbuf.buffer, io->chunkbuf.len, 1, stderr); + + /* there could be up to two leading bytes */ + if (io->chunkbuf.len >= 2 && io->chunkbuf.buffer[0] == '\r' && io->chunkbuf.buffer[1] == '\n') { + tls_buffer_shift(&io->chunkbuf, 2); + } + + cr = lookfor(io->chunkbuf.buffer, io->chunkbuf.len, "\r\n", 2); + + if (cr == 0) { + return -1; + } + + cs = strtol(io->chunkbuf.buffer, 0, 16); + tls_buffer_shift(&io->chunkbuf, cr - io->chunkbuf.buffer + 2); + + return cs; +} + +/* unchunk's job is to move bytes from the chunk buf to the response buf */ +/* return bytes from chunk, 0 if unable. once last chunk, changed chunked + * to 0? + */ +ssize_t unchunk(struct io *io) { + ssize_t bytes_to_move = 0; + ssize_t chunksize; + + if (!io || !io->chunked) { + return 0; + } + + if (io->chunkleft == 0) { + chunksize = read_chunksize(io); + if (chunksize == -1) { + return 0; + } + io->chunksize = chunksize; + if (io->chunksize == 0) { + /* end of chunked data */ + io->chunked = 0; + return 0; + } + io->chunknum++; + io->chunkleft = io->chunksize; + io->chunktotal += io->chunksize; + } + + if (io->chunkbuf.len == 0) { + /* need more bytes */ + return 0; + } + + bytes_to_move = io->chunkbuf.len < io->chunkleft ? io->chunkbuf.len : io->chunkleft; + + tls_buffer_append(&io->response, io->chunkbuf.buffer, bytes_to_move); + io->chunkleft -= bytes_to_move; + io->chunkbytesread += bytes_to_move; + + /* chunk is terminated with a crlf */ + //tls_buffer_shift(&io->chunkbuf, bytes_to_move + io->chunkleft ? 0 : 2); + tls_buffer_shift(&io->chunkbuf, bytes_to_move); + + return bytes_to_move; +} + #if 0 char *nextline(struct io *io) { char *eol = 0;; @@ -508,6 +660,53 @@ static void fake_header(struct io *io, int fd) { tls_buffer_append(hdr, "\r\n", 2); } +char *pathlast(char *path) { + char *last = 0; + size_t len = 0; + + if (path == 0) { + return 0; + } + + while (*path == '/') { + path++; + } + + do { + last = path; + len = 0; + while (*path && *path != '/') { + path++; + len++; + } + while (*path == '/') { + path++; + } + } while (*path); + + if (len == 0) { + return 0; + } + + return strndup(last, len); +} + +static time_t file_mtime(char *path) { + struct stat st; + int rv; + + rv = stat(path, &st); + if (rv == -1) { + if (errno == ENOENT) { + return 0; + } + perror("stat failed:"); + return -1; + } + + return st.st_mtime; +} + int main(int ac, char *av[]) { int sockfd, port = -1, rv; ssize_t ret; @@ -519,12 +718,12 @@ int main(int ac, char *av[]) { #endif char *req_file = 0; char *host = 0; - struct tls_uri uri; + struct tls_uri uri = { 0 }; char *outfile = 0; int raw = 0, head = 0; - int out = 1; + int out = 1; /* output file descriptor */ int use_tls = 0; - struct io io = { {0}, 0, -1, 0, 0, 0, 0, 0 }; + struct io io = { {0}, {0}, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; struct TLSContext *clientssl = 0; int failsilent = 0; char *lmfile = 0; @@ -532,25 +731,32 @@ int main(int ac, char *av[]) { struct tls_buffer request; char lmtime[80]; char *eoh = 0; + char *user_agent = 0; size_t total = 0; size_t header_len; char *url = 0; - int redirs = 0, redirlimit = 50, printstatus = 0; - int verifypolicy = 1; + int redirs = 0, redirlimit = 50, printstatus = 0, showreq = 0; + int verifypolicy = 1, calcoutfile = 0, ifnewer = 0; ltc_mp = tfm_desc; - while ((option = getopt(ac, av, "o:rIfz:#R:SkK")) != -1) { + while ((option = getopt(ac, av, "do:OrIfz:np#RL:SkKU:")) != -1) { switch (option) { + case 'd': debuglevel++; break; case 'o': outfile = optarg; break; + case 'O': calcoutfile = 1; break; case 'S': printstatus = 1; head = 1; break; case 'k': verifypolicy = 0; break; case 'K': verifypolicy = 2; break; + case 'U': user_agent = optarg; break; case 'I': head = 1; case 'r': raw = 1; break; + case 'R': showreq = 1; break; case 'f': failsilent = 1; break; case 'z': lmfile = optarg; break; - case 'R': redirlimit = strtol(optarg, 0, 10); break; + case 'n': ifnewer = 1; break; + case 'L': redirlimit = strtol(optarg, 0, 10); break; + case 'p': case '#': progressbar = 1; break; default: exit(EXIT_FAILURE); @@ -563,34 +769,44 @@ int main(int ac, char *av[]) { exit(EXIT_FAILURE); } + url = strdup(av[optind]); + if (!url) { + exit(EXIT_FAILURE); + } + io.last_modified = 0; - if (lmfile) { - struct stat st; - int rv; - struct tm *mtime; - time_t ts; - rv = stat(lmfile, &st); - if (rv == -1) { - perror("stat failed:"); + if (calcoutfile && !outfile) { + tls_parse_uri(url, &uri); + outfile = pathlast(uri.path); + /* outfile leaks memory here, so if this + * were turned into a library function, + * we'd need to track it + */ + if (!outfile) { + fprintf(stderr, "unable to determine outfile\n"); exit(EXIT_FAILURE); } - ts = st.st_mtime; - io.last_modified = ts; - mtime = gmtime(&ts); - strftime(lmtime, sizeof lmtime, "%a, %d %b %Y %H:%M:%S GMT", mtime); } - url = strdup(av[optind]); - if (!url) { - exit(EXIT_FAILURE); + if (ifnewer && outfile && !lmfile) { + lmfile = outfile; } - if (outfile) { - out = open(outfile, O_WRONLY|O_CREAT, 0600); - if (out == -1) { - perror("can't open output file:"); + if (lmfile) { + struct tm *mtime; + time_t ts; + + ts = file_mtime(lmfile); + + if (ts == -1) { exit(EXIT_FAILURE); + } else if (ts != 0) { + io.last_modified = ts; + mtime = gmtime(&ts); + strftime(lmtime, sizeof lmtime, "%a, %d %b %Y %H:%M:%S GMT", mtime); + } else { + lmfile = 0; } } @@ -602,7 +818,9 @@ int main(int ac, char *av[]) { while (redirs++ <= redirlimit) { tls_free_uri(&uri); io.response.len = 0; + io.chunked = 0; request.len = 0; + eoh = 0; tls_parse_uri(url, &uri); host = uri.host; @@ -615,10 +833,19 @@ int main(int ac, char *av[]) { } else { tls_buffer_append(&request, "GET ", 4); } - tls_buffer_append(&request, uri.path, strlen(uri.path)); + tls_buffer_append(&request, uri.encoded_path, strlen(uri.encoded_path)); + if (uri.encoded_query) { + tls_buffer_append(&request, "?", 1); + tls_buffer_append(&request, uri.encoded_query, strlen(uri.encoded_query)); + } tls_buffer_append(&request, " HTTP/1.1\r\n", 11); append_header(&request, "Host", host); + if (user_agent) { + append_header(&request, "User-Agent", user_agent); + } + append_header(&request, "Accept", "*/*"); + //append_header(&request, "Accept-Encoding", "chunked, identity;q=0.5"); append_header(&request, "Connection", "close"); if (lmfile) { append_header(&request, "If-Modified-Since", lmtime); @@ -628,6 +855,7 @@ int main(int ac, char *av[]) { if (!strcmp(uri.scheme, "https")) { use_tls = 1; + DEBUG(1, "creating tls context\n"); clientssl = tls_create_context(TLS_CLIENT, TLS_V12); /* optionally, we can set a certificate validation @@ -646,10 +874,14 @@ int main(int ac, char *av[]) { fprintf(stderr, "Error loading root certs\n"); return 1; } + DEBUG(1, "verifying ssl cert via roots\n"); tls_set_verify(clientssl, verify_roots); } else if (verifypolicy == 1) { + DEBUG(1, "verifying ssl cert via first use\n"); tls_set_verify(clientssl, verify_first); + DEBUG(1, "verified ssl cert via first use\n"); } else { + DEBUG(1, "verifying ssl cert via trust\n"); tls_set_verify(clientssl, verify_trust); } @@ -658,9 +890,11 @@ int main(int ac, char *av[]) { return -1; } tls_sni_set(clientssl, uri.host); + DEBUG(1, "set sni to %s\n", uri.host); clientssl->sync = 1; io.tls = clientssl; sockfd = open_tcp_connection(host, port); + DEBUG(1, "opened tcp socket fd %d\n", sockfd); if (sockfd < 0) { perror("can't open connection"); exit(EXIT_FAILURE); @@ -687,6 +921,7 @@ int main(int ac, char *av[]) { exit(EXIT_FAILURE); } + DEBUG(1, "wrote http request\n"); if (ret == -1) { fprintf(stderr, "unable to write http request: %s\n", strerror(errno)); exit(EXIT_FAILURE); @@ -694,34 +929,47 @@ int main(int ac, char *av[]) { io.socket = sockfd; + eoh = 0; do { if (io.response.len >= 4) { eoh = strstr(io.response.buffer, "\r\n\r\n"); } if (!eoh) { + DEBUG(1, "filling buffer\n"); ret = fill_buffer(&io); if (ret <= 0) { break; } } } while (!eoh); + DEBUG(1, "got response\n"); if (!eoh) { - /* never got (complet) header */ - fprintf(stderr, "incomplete response to %s\n", av[optind]); + /* never got (complete) header */ + fprintf(stderr, "incomplete response (ret = %zd) to %s\n", ret, url); + fprintf(stderr, "have:\n"); + fwrite(io.response.buffer, io.response.len, 1, stderr); exit(EXIT_FAILURE); } header_len = (size_t)(eoh - io.response.buffer) + 4; + parse_header(&io); + DEBUG(1, "parsed response header, code %d\n", io.status_code); switch (io.status_code) { + case 304: + progressbar = 0; + break; case 301: case 302: case 303: case 307: + DEBUG(1, "redirecting to %s\n", io.redirect); free(url); url = strdup(io.redirect); + DEBUG(1, "redirecting to %s\n", url); + close(io.socket); continue; break; } @@ -730,12 +978,26 @@ int main(int ac, char *av[]) { printf("%d\n", io.status_code); break; } - - if (!raw) { - tls_buffer_shift(&io.response, header_len); + if (showreq) { + fwrite(request.buffer, request.len, 1, stderr); } + if (head) { io.response.len -= 2; + write(out, io.response.buffer, io.response.len); + break; + } + + if (io.status_code == 304) { + break; + } + + if (outfile) { + out = open(outfile, O_WRONLY|O_CREAT|O_TRUNC, 0600); + if (out == -1) { + perror("can't open output file:"); + exit(EXIT_FAILURE); + } } if (progressbar) { @@ -744,34 +1006,68 @@ int main(int ac, char *av[]) { } } + if (raw) { + write(out, io.response.buffer, header_len); + } + tls_buffer_shift(&io.response, header_len); + + if (io.chunked) { + /* we've written out the head if needed, so + * what's in the response buffer is the + * chunked encoding, so just reassign that + * to the chunkbuf and reinit */ + io.chunkbuf = io.response; + tls_buffer_init(&io.response, 0); + /* and put whatever we've got into the response + * buffer, may not be needed, fill buffer + * can handle it. + */ + //unchunk(&io); + } + do { - write(out, io.response.buffer, io.response.len); - ret = io.response.len; - io.response.len = 0; + size_t before = io.received; + if (io.response.len) { + if (io.content_length && io.response.len + io.received > io.content_length) { + io.response.len = io.content_length - io.received; + /* we just ignore trailing garbage */ + } + write(out, io.response.buffer, io.response.len); + io.received += io.response.len; + ret = io.response.len; + io.response.len = 0; + } if (progressbar) { if (io.content_length) { - pdots(50, '.', total, total+ret, + pdots(50, '.', before, io.received, io.content_length); } else { - int old = total / 1000000; - int new = (total+ret)/1000000; - while (old < new) { - putc('.',stderr); - } + putc('\r', stderr); + fprintf(stderr, "%zu", io.received); } total += ret; } if (head) { break; } + if (io.content_length && io.received >= io.content_length) { + break; + } ret = fill_buffer(&io); } while (ret > 0); + //fprintf(stderr, "total received: %zu/%zu\n", io.received, io.content_length); if (ret < 0) { fprintf(stderr, "%s read error %zd\n", uri.scheme, ret); } - /* futimens(out, ...) */ + if (io.last_modified != 0) { + struct timespec ts[2]; + ts[0].tv_sec = 0; ts[0].tv_nsec = UTIME_OMIT; + ts[1].tv_sec = io.last_modified; + ts[1].tv_nsec = 0; + futimens(out, ts); + } close(out); tls_buffer_free(&io.response); break; @@ -784,9 +1080,13 @@ int main(int ac, char *av[]) { close(sockfd); if (progressbar && io.status_code == 200) { - fprintf(stderr, "(%lu)", total); - putc('\n',stderr); + if (io.received == io.content_length || io.content_length == 0) { + fprintf(stderr, " done\n"); + } else if (io.content_length != io.received) { + fprintf(stderr, "failed (%zu bytes read)\n", total); + io.status_code = 531; /* non official code */ + } } - return io.status_code == 200 ? 0 : EXIT_FAILURE; + return io.status_code < 400 ? 0 : EXIT_FAILURE; }