From dcad62ece826198d941fee395371ef36afd22483 Mon Sep 17 00:00:00 2001 From: claudio Date: Tue, 27 Feb 2024 11:28:30 +0000 Subject: [PATCH] Split hash_file into three steps, setup, add buf and final. Setup inits the context and adds the seed. The buf function simply adds a block from the file to the hash. The final function calls MD4_Final() to close the context and generate the hash. This will help to remove the mmap in the sender and should result in a more atomic view of the file since hash_file() is now called together with the other hash_functions. OK deraadt@ tb@ --- usr.bin/rsync/blocks.c | 9 ++++++++- usr.bin/rsync/extern.h | 11 ++++++++--- usr.bin/rsync/hash.c | 23 +++++++++++++++-------- usr.bin/rsync/sender.c | 5 +++-- 4 files changed, 34 insertions(+), 14 deletions(-) diff --git a/usr.bin/rsync/blocks.c b/usr.bin/rsync/blocks.c index 56790729f27..11f9e2740f0 100644 --- a/usr.bin/rsync/blocks.c +++ b/usr.bin/rsync/blocks.c @@ -1,4 +1,4 @@ -/* $OpenBSD: blocks.c,v 1.21 2021/11/03 14:42:12 deraadt Exp $ */ +/* $OpenBSD: blocks.c,v 1.22 2024/02/27 11:28:30 claudio Exp $ */ /* * Copyright (c) 2019 Kristaps Dzonsons * @@ -280,6 +280,8 @@ blk_match(struct sess *sess, const struct blkset *blks, blk->len, blk->idx); tok = -(blk->idx + 1); + hash_file_buf(&st->ctx, st->map + last, sz + blk->len); + /* * Write the data we have, then follow it with * the tag of the block that matches. @@ -293,6 +295,7 @@ blk_match(struct sess *sess, const struct blkset *blks, st->total += blk->len; st->offs += blk->len; st->hint = blk->idx + 1; + return; } @@ -308,6 +311,8 @@ blk_match(struct sess *sess, const struct blkset *blks, st->curlen = st->curpos + sz; st->curtok = 0; st->curst = sz ? BLKSTAT_DATA : BLKSTAT_TOK; + + hash_file_buf(&st->ctx, st->map + st->curpos, sz); } else { st->curpos = 0; st->curlen = st->mapsz; @@ -315,6 +320,8 @@ blk_match(struct sess *sess, const struct blkset *blks, st->curst = st->mapsz ? BLKSTAT_DATA : BLKSTAT_TOK; st->dirty = st->total = st->mapsz; + hash_file_buf(&st->ctx, st->map, st->mapsz); + LOG4("%s: flushing whole file %zu B", path, st->mapsz); } diff --git a/usr.bin/rsync/extern.h b/usr.bin/rsync/extern.h index a36d9371542..940db81d5db 100644 --- a/usr.bin/rsync/extern.h +++ b/usr.bin/rsync/extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: extern.h,v 1.47 2023/11/27 11:30:49 claudio Exp $ */ +/* $OpenBSD: extern.h,v 1.48 2024/02/27 11:28:30 claudio Exp $ */ /* * Copyright (c) 2019 Kristaps Dzonsons * @@ -17,6 +17,8 @@ #ifndef EXTERN_H #define EXTERN_H +#include + /* * This is the rsync protocol version that we support. */ @@ -214,6 +216,7 @@ struct blkstat { struct blktab *blktab; /* hashtable of blocks */ uint32_t s1; /* partial sum for computing fast hash */ uint32_t s2; /* partial sum for computing fast hash */ + MD4_CTX ctx; /* context for hash_file */ }; /* @@ -388,8 +391,10 @@ int blk_send_ack(struct sess *, int, struct blkset *); uint32_t hash_fast(const void *, size_t); void hash_slow(const void *, size_t, unsigned char *, const struct sess *); -void hash_file(const void *, size_t, unsigned char *, - const struct sess *); + +void hash_file_start(MD4_CTX *, const struct sess *); +void hash_file_buf(MD4_CTX *, const void *, size_t); +void hash_file_final(MD4_CTX *, unsigned char *); void copy_file(int, const char *, const struct flist *); diff --git a/usr.bin/rsync/hash.c b/usr.bin/rsync/hash.c index b87c56f527c..37529e951b4 100644 --- a/usr.bin/rsync/hash.c +++ b/usr.bin/rsync/hash.c @@ -1,4 +1,4 @@ -/* $OpenBSD: hash.c,v 1.4 2021/06/30 13:10:04 claudio Exp $ */ +/* $OpenBSD: hash.c,v 1.5 2024/02/27 11:28:30 claudio Exp $ */ /* * Copyright (c) 2019 Kristaps Dzonsons * @@ -82,14 +82,21 @@ hash_slow(const void *buf, size_t len, * of the sequence, not the beginning. */ void -hash_file(const void *buf, size_t len, - unsigned char *md, const struct sess *sess) +hash_file_start(MD4_CTX *ctx, const struct sess *sess) { - MD4_CTX ctx; int32_t seed = htole32(sess->seed); - MD4_Init(&ctx); - MD4_Update(&ctx, (unsigned char *)&seed, sizeof(int32_t)); - MD4_Update(&ctx, buf, len); - MD4_Final(md, &ctx); + MD4_Init(ctx); + MD4_Update(ctx, (unsigned char *)&seed, sizeof(int32_t)); +} + +void +hash_file_buf(MD4_CTX *ctx, const void *buf, size_t len) +{ + MD4_Update(ctx, buf, len); +} +void +hash_file_final(MD4_CTX *ctx, unsigned char *md) +{ + MD4_Final(md, ctx); } diff --git a/usr.bin/rsync/sender.c b/usr.bin/rsync/sender.c index 8e49f4d0e72..43ccfbd6467 100644 --- a/usr.bin/rsync/sender.c +++ b/usr.bin/rsync/sender.c @@ -1,4 +1,4 @@ -/* $OpenBSD: sender.c,v 1.31 2024/02/19 16:39:18 claudio Exp $ */ +/* $OpenBSD: sender.c,v 1.32 2024/02/27 11:28:30 claudio Exp $ */ /* * Copyright (c) 2019 Kristaps Dzonsons * @@ -159,7 +159,7 @@ send_up_fsm(struct sess *sess, size_t *phase, * finished with the file. */ - hash_file(up->stat.map, up->stat.mapsz, fmd, sess); + hash_file_final(&up->stat.ctx, fmd); if (!io_lowbuffer_alloc(sess, wb, wbsz, wbmax, dsz)) { ERRX1("io_lowbuffer_alloc"); return 0; @@ -619,6 +619,7 @@ rsync_sender(struct sess *sess, int fdin, /* Hash our blocks. */ + hash_file_start(&up.stat.ctx, sess); blkhash_set(up.stat.blktab, up.cur->blks); /* -- 2.20.1