]> pd.if.org Git - zpackage/commitdiff
make fetchurl understand chunked encoding
authorNathan Wagner <nw@hydaspes.if.org>
Mon, 4 Mar 2019 23:31:03 +0000 (23:31 +0000)
committerNathan Wagner <nw@hydaspes.if.org>
Mon, 4 Mar 2019 23:31:03 +0000 (23:31 +0000)
doc/zpm-fetchurl.8
src/fetchurl.c

index 6df17c8f2a506ccd4cd75dcbb6f13d10834f378e..7a005c486c576d50bc04574a36bc7ab335b9ff2f 100644 (file)
@@ -1,12 +1,20 @@
-.TH zpm-fetchurl 8 2019-02-27 "ZPM 0.4"
+.TH zpm-fetchurl 8 2019-03-04 "ZPM 0.7.0"
 .SH NAME
 zpm-fetchurl \- download files
 .SH SYNOPSIS
 .B zpm fetchurl
 [
 .SH NAME
 zpm-fetchurl \- download files
 .SH SYNOPSIS
 .B zpm fetchurl
 [
-.B -ISkKrOnp
+.B -OIrRfnp
 ]
 [
 ]
 [
+.BI -L " redirlimit"
+]
+[
+.BI -z " lastmodfile"
+]
+[
+.BI \-U " useragent""
+[
 .BI -o file
 ]
 .I [ url ]
 .BI -o file
 ]
 .I [ url ]
@@ -29,6 +37,10 @@ output the response header only, implies -r
 .B \-r
 output the entire response, including the header
 .TP
 .B \-r
 output the entire response, including the header
 .TP
+.BI -L " limit"
+Limit the maximum number of redirects to \fIlimit\fR.  The default is
+50.
+.TP
 .B \-S
 output the response status code only
 .TP
 .B \-S
 output the response status code only
 .TP
@@ -41,8 +53,8 @@ TLS, but zpm-fetchurl uses a trust on first use policy instead.  Root
 certificates are found by default in /etc/zpm/roots.pem, but the ZPM_ROOTFILE
 environment variable can be used to override this.
 .TP
 certificates are found by default in /etc/zpm/roots.pem, but the ZPM_ROOTFILE
 environment variable can be used to override this.
 .TP
-.BI \-R " limit"
-set the limit on following redirects, defaults to 50
+.B \-R
+Output the request header.
 .TP
 .BI \-z " path"
 Only download if remote is newer than the file given by path
 .TP
 .BI \-z " path"
 Only download if remote is newer than the file given by path
index 6d5958848ecdb9c2e9091824841cefa92b4229f3..15bf1ecb3c2efdbbcd5475c2cfb97865740a63e4 100644 (file)
@@ -276,14 +276,23 @@ int verify_roots(struct TLSContext *context, struct TLSCertificate **chain, int
 
 struct io {
        struct tls_buffer response;
 
 struct io {
        struct tls_buffer response;
+       struct tls_buffer chunkbuf;
        struct TLSContext *tls;
        int socket;
        struct TLSContext *tls;
        int socket;
+       int chunked;
+       int chunknum;
+       size_t chunksize;
+       size_t chunkleft;
+       size_t chunktotal;
+       size_t chunkbytesread;
        int status_code;
        time_t last_modified;
        time_t date;
        size_t content_length;
        int status_code;
        time_t last_modified;
        time_t date;
        size_t content_length;
+       size_t received;
        char *redirect;
 };
        char *redirect;
 };
+ssize_t unchunk(struct io *io);
 
 int month(char *m) {
        char *months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
 
 int month(char *m) {
        char *months[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul",
@@ -389,6 +398,16 @@ void parse_header(struct io *io) {
                hval[hlen] = '\r';
        }
 
                hval[hlen] = '\r';
        }
 
+       hval = find_header(io, "Transfer-Encoding:", &hlen);
+       if (hval) {
+               hval[hlen] = 0;
+               io->content_length = strtoul(hval, 0, 10);
+               if (!strcmp(hval, "chunked")) {
+                       io->chunked = 1;
+               }
+               hval[hlen] = '\r';
+       }
+
        switch (code) {
                case 301:
                case 302:
        switch (code) {
                case 301:
                case 302:
@@ -405,23 +424,137 @@ void parse_header(struct io *io) {
 
 }
 
 
 }
 
+/* fill buffer needs to put bytes into the response buffer
+ * if the transfer encoding is chunked, it will need to
+ * put the bytes into the chunkbuf first, then call
+ * unchunk.  if unchunk return 0, then it needs more data,
+ * otherwise unchunk returns the number of bytes transferred
+ */
+
 ssize_t fill_buffer(struct io *io) {
        unsigned char buffer[4096];
 ssize_t fill_buffer(struct io *io) {
        unsigned char buffer[4096];
-       ssize_t ret;
+       ssize_t ret = 0;
 
 
-       if (io->tls) {
-               ret = tls_read(io->tls, buffer, sizeof buffer);
-       } else {
-               ret = read(io->socket, buffer, sizeof buffer);
-       }
+       ret = unchunk(io);
+
+       while (ret == 0) {
+               if (io->tls) {
+                       ret = tls_read(io->tls, buffer, sizeof buffer);
+               } else {
+                       ret = read(io->socket, buffer, sizeof buffer);
+               }
+
+               if (ret <= 0) {
+                       break;
+               }
 
 
-       if (ret > 0) {
-               tls_buffer_append(&io->response, buffer, ret);
+               if (io->chunked) {
+                       tls_buffer_append(&io->chunkbuf, buffer, ret);
+                       //fwrite(buffer, ret, 1, stderr);
+                       ret = unchunk(io);
+                       if (ret != 0 || io->chunksize == 0) {
+                               break;
+                       }
+               } else {
+                       tls_buffer_append(&io->response, buffer, ret);
+                       break;
+               }
        }
 
        }
 
+       fprintf(stderr, "filled %zd bytes\n", ret);
        return ret;
 }
 
        return ret;
 }
 
+/* essentially memmem */
+void *lookfor(const void *buf, size_t buflen, const void *pattern, size_t len) {
+      const char *bf = buf;
+      const char *pt = pattern;
+      const char *p = bf;
+
+      while (len <= (buflen - (p - bf))) {
+            if ((p = memchr(p, *pt, buflen - (p - bf))) != 0) {
+                  if (memcmp(p, pattern, len) == 0) {
+                        return (void *)p;
+                 } else {
+                         p++;
+                 }
+            } else {
+                   break;
+           }
+      }
+      return NULL;
+}
+
+/* returns read chunksize, unshifts the line */
+ssize_t read_chunksize(struct io *io) {
+       char *cr;
+       ssize_t cs;
+
+       //fwrite(io->chunkbuf.buffer, io->chunkbuf.len, 1, stderr);
+       
+       /* there could be up to two leading bytes */
+       if (io->chunkbuf.len >= 2 && io->chunkbuf.buffer[0] == '\r' && io->chunkbuf.buffer[1] == '\n') {
+               tls_buffer_shift(&io->chunkbuf, 2);
+       }
+
+       cr = lookfor(io->chunkbuf.buffer, io->chunkbuf.len, "\r\n", 2);
+
+       if (cr == 0) {
+               return -1;
+       }
+
+       cs = strtol(io->chunkbuf.buffer, 0, 16);
+       tls_buffer_shift(&io->chunkbuf, cr - io->chunkbuf.buffer + 2);
+
+       return cs;
+}
+
+/* unchunk's job is to move bytes from the chunk buf to the response buf */
+/* return bytes from chunk, 0 if unable.  once last chunk, changed chunked
+ * to 0?
+ */
+ssize_t unchunk(struct io *io) {
+       ssize_t bytes_to_move = 0;
+       ssize_t chunksize;
+
+       if (!io || !io->chunked) {
+               return 0;
+       }
+
+       if (io->chunkleft == 0) {
+               chunksize = read_chunksize(io);
+               if (chunksize == -1) {
+                       return 0;
+               }
+               io->chunksize = chunksize;
+               if (io->chunksize == 0) {
+                       /* end of chunked data */
+                       io->chunked = 0;
+                       return 0;
+               }
+               io->chunknum++;
+               io->chunkleft = io->chunksize;
+               io->chunktotal += io->chunksize;
+       }
+
+       if (io->chunkbuf.len == 0) {
+               /* need more bytes */
+               return 0;
+       }
+
+       bytes_to_move = io->chunkbuf.len < io->chunkleft ? io->chunkbuf.len : io->chunkleft;
+
+       tls_buffer_append(&io->response, io->chunkbuf.buffer, bytes_to_move);
+       io->chunkleft -= bytes_to_move;
+       io->chunkbytesread += bytes_to_move;
+
+       /* chunk is terminated with a crlf */
+       //tls_buffer_shift(&io->chunkbuf, bytes_to_move + io->chunkleft ? 0 : 2);
+       tls_buffer_shift(&io->chunkbuf, bytes_to_move);
+
+       return bytes_to_move;
+}
+
 #if 0
 char *nextline(struct io *io) {
        char *eol = 0;;
 #if 0
 char *nextline(struct io *io) {
        char *eol = 0;;
@@ -578,7 +711,7 @@ int main(int ac, char *av[]) {
        int raw = 0, head = 0;
        int out = 1; /* output file descriptor */
        int use_tls = 0;
        int raw = 0, head = 0;
        int out = 1; /* output file descriptor */
        int use_tls = 0;
-       struct io io = { {0}, 0, -1, 0, 0, 0, 0, 0 };
+       struct io io = { {0}, {0}, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
        struct TLSContext *clientssl = 0;
        int failsilent = 0;
        char *lmfile = 0;
        struct TLSContext *clientssl = 0;
        int failsilent = 0;
        char *lmfile = 0;
@@ -590,12 +723,12 @@ int main(int ac, char *av[]) {
        size_t total = 0;
        size_t header_len;
        char *url = 0;
        size_t total = 0;
        size_t header_len;
        char *url = 0;
-       int redirs = 0, redirlimit = 50, printstatus = 0;
+       int redirs = 0, redirlimit = 50, printstatus = 0, showreq = 0;
        int verifypolicy = 1, calcoutfile = 0, ifnewer = 0;
 
        ltc_mp = tfm_desc;
 
        int verifypolicy = 1, calcoutfile = 0, ifnewer = 0;
 
        ltc_mp = tfm_desc;
 
-       while ((option = getopt(ac, av, "o:OrIfz:np#R:SkKU:")) != -1) {
+       while ((option = getopt(ac, av, "o:OrIfz:np#RL:SkKU:")) != -1) {
                switch (option) {
                        case 'o': outfile = optarg; break;
                        case 'O': calcoutfile = 1; break;
                switch (option) {
                        case 'o': outfile = optarg; break;
                        case 'O': calcoutfile = 1; break;
@@ -605,10 +738,11 @@ int main(int ac, char *av[]) {
                        case 'U': user_agent = optarg; break;
                        case 'I': head = 1;
                        case 'r': raw = 1; break;
                        case 'U': user_agent = optarg; break;
                        case 'I': head = 1;
                        case 'r': raw = 1; break;
+                       case 'R': showreq = 1; break;
                        case 'f': failsilent = 1; break;
                        case 'z': lmfile = optarg; break;
                        case 'n': ifnewer = 1; break;
                        case 'f': failsilent = 1; break;
                        case 'z': lmfile = optarg; break;
                        case 'n': ifnewer = 1; break;
-                       case 'R': redirlimit = strtol(optarg, 0, 10); break;
+                       case 'L': redirlimit = strtol(optarg, 0, 10); break;
                        case 'p':
                        case '#': progressbar = 1; break;
                        default:
                        case 'p':
                        case '#': progressbar = 1; break;
                        default:
@@ -671,6 +805,7 @@ int main(int ac, char *av[]) {
        while (redirs++ <= redirlimit) {
                tls_free_uri(&uri);
                io.response.len = 0;
        while (redirs++ <= redirlimit) {
                tls_free_uri(&uri);
                io.response.len = 0;
+               io.chunked = 0;
                request.len = 0;
                eoh = 0;
 
                request.len = 0;
                eoh = 0;
 
@@ -697,6 +832,7 @@ int main(int ac, char *av[]) {
                        append_header(&request, "User-Agent", user_agent);
                }
                append_header(&request, "Accept", "*/*");
                        append_header(&request, "User-Agent", user_agent);
                }
                append_header(&request, "Accept", "*/*");
+               //append_header(&request, "Accept-Encoding", "chunked, identity;q=0.5");
                append_header(&request, "Connection", "close");
                if (lmfile) {
                        append_header(&request, "If-Modified-Since", lmtime);
                append_header(&request, "Connection", "close");
                if (lmfile) {
                        append_header(&request, "If-Modified-Since", lmtime);
@@ -794,6 +930,7 @@ int main(int ac, char *av[]) {
                }
 
                header_len = (size_t)(eoh - io.response.buffer) + 4;
                }
 
                header_len = (size_t)(eoh - io.response.buffer) + 4;
+
                parse_header(&io);
 
                switch (io.status_code) {
                parse_header(&io);
 
                switch (io.status_code) {
@@ -815,32 +952,63 @@ int main(int ac, char *av[]) {
                        printf("%d\n", io.status_code);
                        break;
                }
                        printf("%d\n", io.status_code);
                        break;
                }
-
-               if (!raw) {
-                       tls_buffer_shift(&io.response, header_len);
+               if (showreq) {
+                       fwrite(request.buffer, request.len, 1, stderr);
                }
                }
+
                if (head) {
                        io.response.len -= 2;
                }
 
                if (head) {
                        io.response.len -= 2;
                }
 
+               if (outfile) {
+                       out = open(outfile, O_WRONLY|O_CREAT|O_TRUNC, 0600);
+                       if (out == -1) {
+                               perror("can't open output file:");
+                               exit(EXIT_FAILURE);
+                       }
+               }
+
                if (progressbar) {
                        if (io.content_length) {
                                fprintf(stderr, "(%lu) ", io.content_length);
                        }
                }
 
                if (progressbar) {
                        if (io.content_length) {
                                fprintf(stderr, "(%lu) ", io.content_length);
                        }
                }
 
-               if (outfile) {
-                       out = open(outfile, O_WRONLY|O_CREAT, 0600);
-                       if (out == -1) {
-                               perror("can't open output file:");
-                               exit(EXIT_FAILURE);
-                       }
+               if (head) {
+                       write(out, io.response.buffer, io.response.len);
+                       break;
+               }
+
+               if (raw) {
+                       write(out, io.response.buffer, header_len);
+               }
+               tls_buffer_shift(&io.response, header_len);
+
+               if (io.chunked) {
+                       /* we've written out the head if needed, so
+                        * what's in the response buffer is the
+                        * chunked encoding, so just reassign that
+                        * to the chunkbuf and reinit */
+                       io.chunkbuf = io.response;
+                       tls_buffer_init(&io.response, 0);
+                       /* and put whatever we've got into the response
+                        * buffer, may not be needed, fill buffer
+                        * can handle it.
+                        */ 
+                       //unchunk(&io);
                }
 
                do {
                }
 
                do {
-                       write(out, io.response.buffer, io.response.len);
-                       ret = io.response.len;
-                       io.response.len = 0;
+                       if (io.response.len) {
+                               if (io.content_length && io.response.len + io.received > io.content_length) {
+                                       io.response.len = io.content_length - io.received;
+                                       /* we just ignore trailing garbage */
+                               }
+                               write(out, io.response.buffer, io.response.len);
+                               io.received += io.response.len;
+                               ret = io.response.len;
+                               io.response.len = 0;
+                       }
 
                        if (progressbar) {
                                if (io.content_length) {
 
                        if (progressbar) {
                                if (io.content_length) {
@@ -848,25 +1016,30 @@ int main(int ac, char *av[]) {
                                                        io.content_length);
                                } else {
                                        putc('\r', stderr);
                                                        io.content_length);
                                } else {
                                        putc('\r', stderr);
-                                       fprintf(stderr, "%zu", total+ret);
+                                       fprintf(stderr, "%zu", io.received);
                                }
                                total += ret;
                        }
                        if (head) {
                                break;
                        }
                                }
                                total += ret;
                        }
                        if (head) {
                                break;
                        }
+                       if (io.content_length && io.received >= io.content_length) {
+                               break;
+                       }
                        ret = fill_buffer(&io);
                } while (ret > 0);
 
                        ret = fill_buffer(&io);
                } while (ret > 0);
 
+               //fprintf(stderr, "total received: %zu/%zu\n", io.received, io.content_length);
                if (ret < 0) {
                        fprintf(stderr, "%s read error %zd\n", uri.scheme, ret);
                }
                if (ret < 0) {
                        fprintf(stderr, "%s read error %zd\n", uri.scheme, ret);
                }
-               struct timespec ts[2];
-               ts[0].tv_sec = 0; ts[0].tv_nsec = UTIME_OMIT;
-               ts[1].tv_sec = io.last_modified;
-               ts[1].tv_nsec = 0;
-
-               futimens(out, ts);
+               if (io.last_modified != 0) {
+                       struct timespec ts[2];
+                       ts[0].tv_sec = 0; ts[0].tv_nsec = UTIME_OMIT;
+                       ts[1].tv_sec = io.last_modified;
+                       ts[1].tv_nsec = 0;
+                       futimens(out, ts);
+               }
                close(out);
                tls_buffer_free(&io.response);
                break;
                close(out);
                tls_buffer_free(&io.response);
                break;
@@ -879,8 +1052,12 @@ int main(int ac, char *av[]) {
 
        close(sockfd);
        if (progressbar && io.status_code == 200) {
 
        close(sockfd);
        if (progressbar && io.status_code == 200) {
-               fprintf(stderr, "(%lu)", total);
-               putc('\n',stderr);
+               if (total == io.content_length || io.content_length == 0) {
+                       fprintf(stderr, " done\n");
+               } else if (io.content_length != total) {
+                       fprintf(stderr, "failed (%zu bytes read)\n", total);
+                       io.status_code = 531; /* non official code */
+               }
        }
 
        return io.status_code < 400 ? 0 : EXIT_FAILURE;
        }
 
        return io.status_code < 400 ? 0 : EXIT_FAILURE;