From: Nathan Wagner Date: Mon, 4 Mar 2019 23:31:03 +0000 (+0000) Subject: make fetchurl understand chunked encoding X-Git-Url: https://pd.if.org/git/?p=zpackage;a=commitdiff_plain;h=cae5c951410e659f8c78562903056889f8e989a7 make fetchurl understand chunked encoding --- diff --git a/doc/zpm-fetchurl.8 b/doc/zpm-fetchurl.8 index 6df17c8..7a005c4 100644 --- a/doc/zpm-fetchurl.8 +++ b/doc/zpm-fetchurl.8 @@ -1,12 +1,20 @@ -.TH zpm-fetchurl 8 2019-02-27 "ZPM 0.4" +.TH zpm-fetchurl 8 2019-03-04 "ZPM 0.7.0" .SH NAME zpm-fetchurl \- download files .SH SYNOPSIS .B zpm fetchurl [ -.B -ISkKrOnp +.B -OIrRfnp ] [ +.BI -L " redirlimit" +] +[ +.BI -z " lastmodfile" +] +[ +.BI \-U " useragent"" +[ .BI -o file ] .I [ url ] @@ -29,6 +37,10 @@ output the response header only, implies -r .B \-r output the entire response, including the header .TP +.BI -L " limit" +Limit the maximum number of redirects to \fIlimit\fR. The default is +50. +.TP .B \-S output the response status code only .TP @@ -41,8 +53,8 @@ TLS, but zpm-fetchurl uses a trust on first use policy instead. Root certificates are found by default in /etc/zpm/roots.pem, but the ZPM_ROOTFILE environment variable can be used to override this. .TP -.BI \-R " limit" -set the limit on following redirects, defaults to 50 +.B \-R +Output the request header. .TP .BI \-z " path" Only download if remote is newer than the file given by path diff --git a/src/fetchurl.c b/src/fetchurl.c index 6d59588..15bf1ec 100644 --- a/src/fetchurl.c +++ b/src/fetchurl.c @@ -276,14 +276,23 @@ int verify_roots(struct TLSContext *context, struct TLSCertificate **chain, int struct io { struct tls_buffer response; + struct tls_buffer chunkbuf; struct TLSContext *tls; int socket; + int chunked; + int chunknum; + size_t chunksize; + size_t chunkleft; + size_t chunktotal; + size_t chunkbytesread; int status_code; time_t last_modified; time_t date; size_t content_length; + size_t received; char *redirect; }; +ssize_t unchunk(struct io *io); int month(char *m) { char *months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", @@ -389,6 +398,16 @@ void parse_header(struct io *io) { hval[hlen] = '\r'; } + hval = find_header(io, "Transfer-Encoding:", &hlen); + if (hval) { + hval[hlen] = 0; + io->content_length = strtoul(hval, 0, 10); + if (!strcmp(hval, "chunked")) { + io->chunked = 1; + } + hval[hlen] = '\r'; + } + switch (code) { case 301: case 302: @@ -405,23 +424,137 @@ void parse_header(struct io *io) { } +/* fill buffer needs to put bytes into the response buffer + * if the transfer encoding is chunked, it will need to + * put the bytes into the chunkbuf first, then call + * unchunk. if unchunk return 0, then it needs more data, + * otherwise unchunk returns the number of bytes transferred + */ + ssize_t fill_buffer(struct io *io) { unsigned char buffer[4096]; - ssize_t ret; + ssize_t ret = 0; - if (io->tls) { - ret = tls_read(io->tls, buffer, sizeof buffer); - } else { - ret = read(io->socket, buffer, sizeof buffer); - } + ret = unchunk(io); + + while (ret == 0) { + if (io->tls) { + ret = tls_read(io->tls, buffer, sizeof buffer); + } else { + ret = read(io->socket, buffer, sizeof buffer); + } + + if (ret <= 0) { + break; + } - if (ret > 0) { - tls_buffer_append(&io->response, buffer, ret); + if (io->chunked) { + tls_buffer_append(&io->chunkbuf, buffer, ret); + //fwrite(buffer, ret, 1, stderr); + ret = unchunk(io); + if (ret != 0 || io->chunksize == 0) { + break; + } + } else { + tls_buffer_append(&io->response, buffer, ret); + break; + } } + fprintf(stderr, "filled %zd bytes\n", ret); return ret; } +/* essentially memmem */ +void *lookfor(const void *buf, size_t buflen, const void *pattern, size_t len) { + const char *bf = buf; + const char *pt = pattern; + const char *p = bf; + + while (len <= (buflen - (p - bf))) { + if ((p = memchr(p, *pt, buflen - (p - bf))) != 0) { + if (memcmp(p, pattern, len) == 0) { + return (void *)p; + } else { + p++; + } + } else { + break; + } + } + return NULL; +} + +/* returns read chunksize, unshifts the line */ +ssize_t read_chunksize(struct io *io) { + char *cr; + ssize_t cs; + + //fwrite(io->chunkbuf.buffer, io->chunkbuf.len, 1, stderr); + + /* there could be up to two leading bytes */ + if (io->chunkbuf.len >= 2 && io->chunkbuf.buffer[0] == '\r' && io->chunkbuf.buffer[1] == '\n') { + tls_buffer_shift(&io->chunkbuf, 2); + } + + cr = lookfor(io->chunkbuf.buffer, io->chunkbuf.len, "\r\n", 2); + + if (cr == 0) { + return -1; + } + + cs = strtol(io->chunkbuf.buffer, 0, 16); + tls_buffer_shift(&io->chunkbuf, cr - io->chunkbuf.buffer + 2); + + return cs; +} + +/* unchunk's job is to move bytes from the chunk buf to the response buf */ +/* return bytes from chunk, 0 if unable. once last chunk, changed chunked + * to 0? + */ +ssize_t unchunk(struct io *io) { + ssize_t bytes_to_move = 0; + ssize_t chunksize; + + if (!io || !io->chunked) { + return 0; + } + + if (io->chunkleft == 0) { + chunksize = read_chunksize(io); + if (chunksize == -1) { + return 0; + } + io->chunksize = chunksize; + if (io->chunksize == 0) { + /* end of chunked data */ + io->chunked = 0; + return 0; + } + io->chunknum++; + io->chunkleft = io->chunksize; + io->chunktotal += io->chunksize; + } + + if (io->chunkbuf.len == 0) { + /* need more bytes */ + return 0; + } + + bytes_to_move = io->chunkbuf.len < io->chunkleft ? io->chunkbuf.len : io->chunkleft; + + tls_buffer_append(&io->response, io->chunkbuf.buffer, bytes_to_move); + io->chunkleft -= bytes_to_move; + io->chunkbytesread += bytes_to_move; + + /* chunk is terminated with a crlf */ + //tls_buffer_shift(&io->chunkbuf, bytes_to_move + io->chunkleft ? 0 : 2); + tls_buffer_shift(&io->chunkbuf, bytes_to_move); + + return bytes_to_move; +} + #if 0 char *nextline(struct io *io) { char *eol = 0;; @@ -578,7 +711,7 @@ int main(int ac, char *av[]) { int raw = 0, head = 0; int out = 1; /* output file descriptor */ int use_tls = 0; - struct io io = { {0}, 0, -1, 0, 0, 0, 0, 0 }; + struct io io = { {0}, {0}, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; struct TLSContext *clientssl = 0; int failsilent = 0; char *lmfile = 0; @@ -590,12 +723,12 @@ int main(int ac, char *av[]) { size_t total = 0; size_t header_len; char *url = 0; - int redirs = 0, redirlimit = 50, printstatus = 0; + int redirs = 0, redirlimit = 50, printstatus = 0, showreq = 0; int verifypolicy = 1, calcoutfile = 0, ifnewer = 0; ltc_mp = tfm_desc; - while ((option = getopt(ac, av, "o:OrIfz:np#R:SkKU:")) != -1) { + while ((option = getopt(ac, av, "o:OrIfz:np#RL:SkKU:")) != -1) { switch (option) { case 'o': outfile = optarg; break; case 'O': calcoutfile = 1; break; @@ -605,10 +738,11 @@ int main(int ac, char *av[]) { case 'U': user_agent = optarg; break; case 'I': head = 1; case 'r': raw = 1; break; + case 'R': showreq = 1; break; case 'f': failsilent = 1; break; case 'z': lmfile = optarg; break; case 'n': ifnewer = 1; break; - case 'R': redirlimit = strtol(optarg, 0, 10); break; + case 'L': redirlimit = strtol(optarg, 0, 10); break; case 'p': case '#': progressbar = 1; break; default: @@ -671,6 +805,7 @@ int main(int ac, char *av[]) { while (redirs++ <= redirlimit) { tls_free_uri(&uri); io.response.len = 0; + io.chunked = 0; request.len = 0; eoh = 0; @@ -697,6 +832,7 @@ int main(int ac, char *av[]) { append_header(&request, "User-Agent", user_agent); } append_header(&request, "Accept", "*/*"); + //append_header(&request, "Accept-Encoding", "chunked, identity;q=0.5"); append_header(&request, "Connection", "close"); if (lmfile) { append_header(&request, "If-Modified-Since", lmtime); @@ -794,6 +930,7 @@ int main(int ac, char *av[]) { } header_len = (size_t)(eoh - io.response.buffer) + 4; + parse_header(&io); switch (io.status_code) { @@ -815,32 +952,63 @@ int main(int ac, char *av[]) { printf("%d\n", io.status_code); break; } - - if (!raw) { - tls_buffer_shift(&io.response, header_len); + if (showreq) { + fwrite(request.buffer, request.len, 1, stderr); } + if (head) { io.response.len -= 2; } + if (outfile) { + out = open(outfile, O_WRONLY|O_CREAT|O_TRUNC, 0600); + if (out == -1) { + perror("can't open output file:"); + exit(EXIT_FAILURE); + } + } + if (progressbar) { if (io.content_length) { fprintf(stderr, "(%lu) ", io.content_length); } } - if (outfile) { - out = open(outfile, O_WRONLY|O_CREAT, 0600); - if (out == -1) { - perror("can't open output file:"); - exit(EXIT_FAILURE); - } + if (head) { + write(out, io.response.buffer, io.response.len); + break; + } + + if (raw) { + write(out, io.response.buffer, header_len); + } + tls_buffer_shift(&io.response, header_len); + + if (io.chunked) { + /* we've written out the head if needed, so + * what's in the response buffer is the + * chunked encoding, so just reassign that + * to the chunkbuf and reinit */ + io.chunkbuf = io.response; + tls_buffer_init(&io.response, 0); + /* and put whatever we've got into the response + * buffer, may not be needed, fill buffer + * can handle it. + */ + //unchunk(&io); } do { - write(out, io.response.buffer, io.response.len); - ret = io.response.len; - io.response.len = 0; + if (io.response.len) { + if (io.content_length && io.response.len + io.received > io.content_length) { + io.response.len = io.content_length - io.received; + /* we just ignore trailing garbage */ + } + write(out, io.response.buffer, io.response.len); + io.received += io.response.len; + ret = io.response.len; + io.response.len = 0; + } if (progressbar) { if (io.content_length) { @@ -848,25 +1016,30 @@ int main(int ac, char *av[]) { io.content_length); } else { putc('\r', stderr); - fprintf(stderr, "%zu", total+ret); + fprintf(stderr, "%zu", io.received); } total += ret; } if (head) { break; } + if (io.content_length && io.received >= io.content_length) { + break; + } ret = fill_buffer(&io); } while (ret > 0); + //fprintf(stderr, "total received: %zu/%zu\n", io.received, io.content_length); if (ret < 0) { fprintf(stderr, "%s read error %zd\n", uri.scheme, ret); } - struct timespec ts[2]; - ts[0].tv_sec = 0; ts[0].tv_nsec = UTIME_OMIT; - ts[1].tv_sec = io.last_modified; - ts[1].tv_nsec = 0; - - futimens(out, ts); + if (io.last_modified != 0) { + struct timespec ts[2]; + ts[0].tv_sec = 0; ts[0].tv_nsec = UTIME_OMIT; + ts[1].tv_sec = io.last_modified; + ts[1].tv_nsec = 0; + futimens(out, ts); + } close(out); tls_buffer_free(&io.response); break; @@ -879,8 +1052,12 @@ int main(int ac, char *av[]) { close(sockfd); if (progressbar && io.status_code == 200) { - fprintf(stderr, "(%lu)", total); - putc('\n',stderr); + if (total == io.content_length || io.content_length == 0) { + fprintf(stderr, " done\n"); + } else if (io.content_length != total) { + fprintf(stderr, "failed (%zu bytes read)\n", total); + io.status_code = 531; /* non official code */ + } } return io.status_code < 400 ? 0 : EXIT_FAILURE;