From 2ac486ab18adbbb84563eafc0d67fa8da6ca7822 Mon Sep 17 00:00:00 2001 From: Nathan Wagner Date: Fri, 1 Mar 2019 16:59:00 +0000 Subject: [PATCH] switch to blake2 Replaced the sha256 hash used internally with blake2b variant which is much faster in software. We use the 256 bit version, which is the same size as sha256. I was unable to find published test vectors for blake2b-256, so the b2sum utility was compiled from the blake2 sources, and the output of that program in 256 bit mode was used to generate testing data. --- lib/integ.c | 20 ++++----- lib/uncompress.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++ lib/zpm.c | 12 +----- lib/zpm_hash.c | 27 +++++++----- schema/main.sql | 10 ++++- src/fetchurl.c | 7 +++- src/hash.c | 99 +++++++++++++++++++++++++++++++------------- t/add.t | 1 + t/addfile.t | 3 +- t/extract.t | 2 +- t/hash.t | 16 ++++---- zpm-gc | 15 ++++++- zpm.h | 6 +++ 13 files changed, 247 insertions(+), 75 deletions(-) diff --git a/lib/integ.c b/lib/integ.c index 13a2505..b51d27e 100644 --- a/lib/integ.c +++ b/lib/integ.c @@ -3,17 +3,17 @@ #include "zpm.h" #include "sqlite3.h" -#include "sha256.h" +#include "lib/blake2/ref/blake2.h" -static void hash_byte(struct sha256_state *h, int ch) { +static void hash_byte(struct blake2b_state__ *h, int ch) { unsigned char buf[1]; buf[0] = ch & 0xff; - sha256_process(h, buf, 1); + blake2b_update(h, buf, 1); } /* i will be positive, we are hashing column sizes */ -static void hash_int(struct sha256_state *h, int i) { +static void hash_int(struct blake2b_state__ *h, int i) { int n; uint64_t z; @@ -56,7 +56,7 @@ static void hash_int(struct sha256_state *h, int i) { * with no delimiters of any kind. */ -static void hash_query(struct zpm *zpm, const char *zSql, struct sha256_state *h) { +static void hash_query(struct zpm *zpm, const char *zSql, struct blake2b_state__ *h) { sqlite3 *db; sqlite3_stmt *pStmt = 0; int nCol; /* Number of columns in the result set */ @@ -86,7 +86,7 @@ static void hash_query(struct zpm *zpm, const char *zSql, struct sha256_state *h nCol = sqlite3_column_count(pStmt); while (sqlite3_step(pStmt) == SQLITE_ROW) { - sha256_process(h, (const unsigned char *)"R", 1); + blake2b_update(h, "R", 1); for (i = 0; i < nCol; i++) { switch (sqlite3_column_type(pStmt, i)) { case SQLITE_NULL: @@ -132,14 +132,14 @@ static void hash_query(struct zpm *zpm, const char *zSql, struct sha256_state *h continue; break; } - sha256_process(h, data, bytes); + blake2b_update(h, data, bytes); } } sqlite3_finalize(pStmt); } int zpm_package_hash(struct zpm *zpm, char *pkgid, char *hash) { - struct sha256_state d; + struct blake2b_state__ d; char *sql; int i; unsigned char tmp[32]; @@ -150,7 +150,7 @@ int zpm_package_hash(struct zpm *zpm, char *pkgid, char *hash) { /* find package */ - sha256_init(&d); + blake2b_init(&d, 32); sql = sqlite3_mprintf("select package,version,release,description,architecture,url,licenses,packager,build_time from packages_pkgid where pkgid = %Q", pkgid); @@ -174,7 +174,7 @@ int zpm_package_hash(struct zpm *zpm, char *pkgid, char *hash) { hash_query(zpm, sql, &d); sqlite3_free(sql); - sha256_done(&d, tmp); + blake2b_final(&d, tmp, sizeof tmp); for (i=0; i<32; i++) { sprintf(hash+i*2, "%02x", (unsigned)tmp[i]); } diff --git a/lib/uncompress.c b/lib/uncompress.c index bbc1adb..b8e0b67 100644 --- a/lib/uncompress.c +++ b/lib/uncompress.c @@ -12,6 +12,110 @@ #include "lzma.h" +ssize_t zpm_uncompress_cb(void *buf, size_t bufsize, void *cbdata, + int (*cb)(void *ud, void *buf, size_t bufsize)) { + lzma_stream s = LZMA_STREAM_INIT; + lzma_stream *strm; + ssize_t bytes = 0; + + uint8_t outbuf[BUFSIZ]; + + int ret; + + strm = &s; + + ret = lzma_stream_decoder(strm, UINT64_MAX, 0); + /* The only reasonable error here is LZMA_MEM_ERROR. */ + if (ret != LZMA_OK) { + fprintf(stderr, "%s", ret == LZMA_MEM_ERROR ? strerror(ENOMEM) + : "Internal error (bug)"); + return -1; + } + + strm->avail_in = bufsize; + strm->next_in = buf; + strm->avail_out = BUFSIZ; + strm->next_out = outbuf; + + lzma_action action = LZMA_RUN; + + while (1) { + ret = lzma_code(strm, action); + + // Write and check write error before checking decoder error. + // This way as much data as possible gets written to output + // even if decoder detected an error. + if (strm->avail_out == 0 || ret != LZMA_OK) { + size_t avail = BUFSIZ - strm->avail_out; + ssize_t written = 0; + uint8_t *start; + + start = outbuf; + + while (avail > 0) { + written = cb(cbdata, outbuf, avail); + if (written == -1) { + /* Wouldn't be a surprise if writing to + * stderr would fail too but at least + * try to show an error message. + */ + return -1; + } + avail -= written; + start += written; + bytes += written; + } + + strm->next_out = outbuf; + strm->avail_out = BUFSIZ; + } + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) { + // lzma_stream_decoder() already guarantees + // that there's no trailing garbage. + assert(strm->avail_in == 0); + //assert(action == LZMA_FINISH); + lzma_end(strm); + return bytes; + } + + lzma_end(strm); + const char *msg; + switch (ret) { + case LZMA_MEM_ERROR: + msg = strerror(ENOMEM); + break; + + case LZMA_FORMAT_ERROR: + msg = "File format not recognized"; + break; + + case LZMA_OPTIONS_ERROR: + // FIXME: Better message? + msg = "Unsupported compression options"; + break; + + case LZMA_DATA_ERROR: + msg = "File is corrupt"; + break; + + case LZMA_BUF_ERROR: + msg = "Unexpected end of input"; + break; + + default: + msg = "Internal error (bug)"; + break; + } + + fprintf(stderr, "zpmuncompress: %s\n", msg); + return -1; + } + } + +} + ssize_t uncompresslzma(void *buf, size_t bufsize, int out) { lzma_stream s = LZMA_STREAM_INIT; lzma_stream *strm; diff --git a/lib/zpm.c b/lib/zpm.c index 3cc7632..63ecc17 100644 --- a/lib/zpm.c +++ b/lib/zpm.c @@ -581,11 +581,9 @@ int zpm_import(struct zpm *zpm, char *path, uint32_t flags, char *hash) { int fd; void *content = 0; struct stat sbuf; - unsigned char tmp[32]; - struct sha256_state md; sqlite3_stmt *ifile = 0; int haverow = 0,havedata = 0; - int j,rc,type; + int rc,type; char hashbuf[65]; /* xz compress it */ @@ -642,13 +640,7 @@ int zpm_import(struct zpm *zpm, char *path, uint32_t flags, char *hash) { return 0; } - /* get hash */ - sha256_init(&md); - sha256_process(&md, content, sbuf.st_size); - sha256_done(&md, tmp); - for (j=0;j<32;j++) { - sprintf(hash+j*2, "%02x", (unsigned)tmp[j]); - } + zpm_hash_mem(content, sbuf.st_size, hash); hash[64] = 0; /* TODO check null */ diff --git a/lib/zpm_hash.c b/lib/zpm_hash.c index a14f7d6..f4c7b67 100644 --- a/lib/zpm_hash.c +++ b/lib/zpm_hash.c @@ -13,16 +13,27 @@ #include "zpm.h" #include "elf.h" -#include "sha256.h" +#include "lib/blake2/ref/blake2.h" + +int zpm_hash_mem(void *mem, size_t size, char *hash) { + struct blake2b_state__ blake; + int j; + unsigned char tmp[32]; + + blake2b_init(&blake, sizeof tmp); + blake2b_update(&blake, mem, size); + blake2b_final(&blake, tmp, sizeof tmp); + for (j=0;j<32;j++) { + sprintf(hash+j*2, "%02x", (unsigned)tmp[j]); + } + return 1; +} /* flags 0, close mmap, flags 1, return mmap fd */ int zpm_hash(char *path, char *hash, uint32_t flags) { int fd; void *content; struct stat sbuf; - struct sha256_state md; - int j; - unsigned char tmp[32]; /* mmap the file */ fd = open(path, O_RDONLY); @@ -48,13 +59,7 @@ int zpm_hash(char *path, char *hash, uint32_t flags) { return 0; } - /* get hash */ - sha256_init(&md); - sha256_process(&md, content, sbuf.st_size); - sha256_done(&md, tmp); - for (j=0;j<32;j++) { - sprintf(hash+j*2, "%02x", (unsigned)tmp[j]); - } + zpm_hash_mem(content, sbuf.st_size, hash); hash[64] = 0; munmap(content, sbuf.st_size); return flags ? fd : 1; diff --git a/schema/main.sql b/schema/main.sql index 85546d9..1980178 100644 --- a/schema/main.sql +++ b/schema/main.sql @@ -6,7 +6,7 @@ PRAGMA user_version = 1; -- TODO copyright and license information should probably -- go here CREATE TABLE files ( - hash text primary key, -- sha256 of content + hash text primary key, -- sha256 of (uncompressed) content size integer, -- bigint? certainly need > 2GB compression text, -- always xz? content blob @@ -15,11 +15,17 @@ CREATE TABLE files ( create view filerefs as select F.hash, -count(PF.hash) + count(S.hash) + count(EL.file) + count(N.file) as refcount +count(PF.hash) + count(S.hash) + count(EL.file) + count(N.file) + count(EN.file) as refcount, +count(PF.hash) as pfrefs, +count(S.hash) as scriptrefs, +count(EL.file) as librefs, +count(EN.file) as needrefs, +count(N.file) as noterefs from files F left join packagefiles PF on PF.hash = F.hash left join scripts S on S.hash = F.hash left join elflibraries EL on EL.file = F.hash +left join elfneeded EN on EN.file = F.hash left join notes N on N.file = F.hash group by F.hash ; diff --git a/src/fetchurl.c b/src/fetchurl.c index 21e2e66..6d59588 100644 --- a/src/fetchurl.c +++ b/src/fetchurl.c @@ -772,6 +772,7 @@ int main(int ac, char *av[]) { io.socket = sockfd; + eoh = 0; do { if (io.response.len >= 4) { eoh = strstr(io.response.buffer, "\r\n\r\n"); @@ -785,8 +786,10 @@ int main(int ac, char *av[]) { } while (!eoh); if (!eoh) { - /* never got (complet) header */ - fprintf(stderr, "incomplete response to %s\n", av[optind]); + /* never got (complete) header */ + fprintf(stderr, "incomplete response (ret = %zd) to %s\n", ret, url); + fprintf(stderr, "have:\n"); + fwrite(io.response.buffer, io.response.len, 1, stderr); exit(EXIT_FAILURE); } diff --git a/src/hash.c b/src/hash.c index 46c99d2..498a7a1 100644 --- a/src/hash.c +++ b/src/hash.c @@ -1,49 +1,92 @@ #include #include #include +#include +#include +#include #include "zpm.h" #include "sha256.h" +#include "lib/blake2/ref/blake2.h" + +static int hash_file(int fd, char *hash) { + unsigned char buf[4096]; + ssize_t bytes; + unsigned char tmp[32]; + int j; + +#if 0 + struct sha256_state md; + sha256_init(&md); + do { + bytes = read(fd, buf, sizeof buf); + if (bytes == -1) { + return 0; + } + sha256_process(&md, buf, bytes); + } while (bytes); + sha256_done(&md, tmp); + for (j=0;j<32;j++) { + sprintf(hash+j*2, "%02x", (unsigned)tmp[j]); + } + hash[64] = 0; + +#else + struct blake2b_state__ blake; + blake2b_init(&blake, sizeof tmp); + do { + bytes = read(fd, buf, sizeof buf); + if (bytes == -1) { + return 0; + } + blake2b_update(&blake, buf, bytes); + } while (bytes); + blake2b_final(&blake, tmp, sizeof tmp); + for (j=0;j<32;j++) { + sprintf(hash+j*2, "%02x", (unsigned)tmp[j]); + } + hash[64] = 0; +#endif + return 1; +} int main(int ac, char **av){ int rv; char hash[65]; + int input; + char *filename; + int i; - /* - * hash stdin - */ - if (ac == 1 || (ac == 2 && !strcmp(av[1], "-"))) { - struct sha256_state md; - unsigned char buf[4096]; - size_t bytes; - unsigned char tmp[32]; - int j; - - sha256_init(&md); - do { - bytes = fread(buf, 1, sizeof buf, stdin); - sha256_process(&md, buf, bytes); - } while (bytes && !feof(stdin)); - if (ferror(stdin)) { - exit(1); + for (i=1; i < ac; i++) { + filename = av[i]; + if (strcmp(filename, "-") == 0) { + input = 0; + } else { + input = open(filename, O_RDONLY); + if (input == -1) { + fprintf(stderr, "%s ", filename); + perror("zpm-hash1"); + exit(EXIT_FAILURE); + } } - sha256_done(&md, tmp); - for (j=0;j<32;j++) { - sprintf(hash+j*2, "%02x", (unsigned)tmp[j]); + rv = hash_file(input, hash); + close(input); + if (rv == 0) { + perror("zpm-hash2"); + exit(EXIT_FAILURE); } - hash[64] = 0; printf("%s\n", hash); - return 0; } if (ac < 2) { - fprintf(stderr, "usage: path\n"); - return 1; - } - rv = zpm_hash(av[1], hash, 0); - if (rv) { + filename = "-"; + rv = hash_file(0, hash); + if (rv == 0) { + perror("zpm-hash3"); + exit(EXIT_FAILURE); + } printf("%s\n", hash); } - return !rv; + return 0; } diff --git a/t/add.t b/t/add.t index 0305c8d..53a15d5 100755 --- a/t/add.t +++ b/t/add.t @@ -22,6 +22,7 @@ PF=zpmtest-1.0-1.zpm require -v zpm newpackage -C $pkgid require -v zpm add -vvv -f $PF -p $pkgid foo h=$(zpm hash foo) +diag "hash $h" require -v zpm extract -f zpmtest-1.0-1.zpm $h foo2 h2=$(zpm hash foo2) okstreq "$h" "$h2" "foo and foo2 hash match" diff --git a/t/addfile.t b/t/addfile.t index 4b8e093..65188bc 100755 --- a/t/addfile.t +++ b/t/addfile.t @@ -16,8 +16,7 @@ okexit create addfile test db hash=$(zpm addfile $PF test.empty 2>> test.out) okexit add empty file content -okstreq "$hash" e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 hash contents - +okstreq "$hash" 0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8 hash contents appid=$(sqlite3 test.db 'pragma application_id') okstreq $appid 1515209794 application id set diff --git a/t/extract.t b/t/extract.t index 0b8a610..203cc38 100755 --- a/t/extract.t +++ b/t/extract.t @@ -16,7 +16,7 @@ hash=$(zpm-addfile $PF test.foo 2>> test.out) save=$? okexit add foo file content -okstreq "$hash" b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c foo hash contents +okstreq "$hash" 20590a52c4f00588c500328b16d466c982a26fabaa5fa4dcc83052dd0a84f233 foo hash contents rm -f foo diff --git a/t/hash.t b/t/hash.t index 342ddef..d47650b 100755 --- a/t/hash.t +++ b/t/hash.t @@ -5,32 +5,32 @@ . tap.sh vtest() { - res=$(zpm-hash "$1" "$2") + res=$(zpm-hash "$1") okstreq "$res" "$2" "$3" } plan 7 printf '' > hash.test -vtest hash.test e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 "empty file" +vtest hash.test 0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8 "empty file" printf 'foo\n' > hash.test -vtest hash.test b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c "one line file" +vtest hash.test 20590a52c4f00588c500328b16d466c982a26fabaa5fa4dcc83052dd0a84f233 "one line file" rm hash.test res=$(printf 'foo\n' | zpm-hash) -okstreq $res b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c "foo" +okstreq $res 20590a52c4f00588c500328b16d466c982a26fabaa5fa4dcc83052dd0a84f233 "foo stdin" res=$(printf 'foo\n' | zpm-hash -) -okstreq $res b5bb9d8014a0f9b1d61e21e796d78dccdf1352f23cd32812f4850b878ae4944c "foo stdin" +okstreq $res 20590a52c4f00588c500328b16d466c982a26fabaa5fa4dcc83052dd0a84f233 "foo named stdin" stest() { res=$(printf '%s' $1 | zpm-hash -) okstreq "$res" "$2" "${1:-empty string}" } -stest abc ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad -stest '' e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 -stest 'abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq' '248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1' +stest abc bddd813c634239723171ef3fee98579b94964e3bb1cb3e427262c8c068d52319 +stest '' 0e5751c026e543b2e8ab2eb06099daa1d1e5df47778f7787faab45cdf12fe3a8 +stest 'abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq' '5f7a93da9c5621583f22e49e8e91a40cbba37536622235a380f434b9f68e49c4' finish diff --git a/zpm-gc b/zpm-gc index 2559f0d..fdf8088 100755 --- a/zpm-gc +++ b/zpm-gc @@ -77,7 +77,20 @@ export ZPMDB # check for incorrect hash file content -# remove orphaned elf info +# removes old packages +zpm shell local.db.rehash "select package||'-'||version||'-'||release from package_age where age > 2 and status = 'updated'"|xargs -n1 zpm rmpackage -S installed -f local.db.rehash + +select +EL.file, PF.path, +PF.package || '-' || PF.version || '-' || PF.release as pkgid +from elfneeded EL +left join files F on F.hash = EL.file +left join packagefiles PF on PF.hash = EL.file +where F.hash is null +and EL.file is not null +and PF.path is not null +; + # remove failed packages for pkg in $(zpm list -s failed); do diff --git a/zpm.h b/zpm.h index 98971c1..1b61986 100644 --- a/zpm.h +++ b/zpm.h @@ -192,10 +192,16 @@ int zpm_checkinstall(struct zpm *local); int zpm_merge(struct zpm *z, struct zpm *src, uint32_t flags); ssize_t uncompresslzma(void *buf, size_t bufsize, int outfd); +ssize_t zpm_uncompress_cb(void *buf, size_t bufsize, void *cbdata, + int (*cb)(void *ud, void *buf, size_t bufsize)); void *compresslzma(void *buf, size_t bufsize, size_t *len); #define SQLERROR(x) fprintf(stderr, "%s %d: %s\n", __func__, __LINE__, (x)) int zpm_hash(char *path, char *hash, uint32_t flags); + +/* hex encoded hash, not null terminated */ +int zpm_hash_mem(void *mem, size_t size, char *hash); + int zpm_readopts(struct zpm *pkg, int ac, char **av); struct zpm_version_info { -- 2.40.0