Skip to content

Commit 1c38080

Browse files
committed
dd: improve allocation strategy for huge bs
1 parent af81585 commit 1c38080

2 files changed

Lines changed: 32 additions & 25 deletions

File tree

src/uu/dd/src/dd.rs

Lines changed: 24 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,6 @@ use uucore::error::{USimpleError, set_exit_code};
6767
use uucore::show_if_err;
6868
use uucore::{format_usage, show_error};
6969

70-
const BUF_INIT_BYTE: u8 = 0xDD;
71-
7270
/// Final settings after parsing
7371
#[derive(Default)]
7472
struct Settings {
@@ -520,54 +518,57 @@ impl Input<'_> {
520518
/// Fills a given buffer.
521519
/// Reads in increments of 'self.ibs'.
522520
/// The start of each ibs-sized read follows the previous one.
523-
fn fill_consecutive(&mut self, buf: &mut Vec<u8>) -> io::Result<ReadStat> {
521+
fn fill_consecutive(&mut self, buf: &mut Vec<u8>, bsize: usize) -> io::Result<ReadStat> {
522+
buf.clear();
524523
let mut reads_complete = 0;
525524
let mut reads_partial = 0;
526-
let mut bytes_total = 0;
527-
528-
for chunk in buf.chunks_mut(self.settings.ibs) {
529-
match self.read(chunk)? {
530-
rlen if rlen == self.settings.ibs => {
531-
bytes_total += rlen;
525+
let ibs = self.settings.ibs;
526+
let mut bounded_reader = self.take(bsize as u64);
527+
while buf.len() < bsize {
528+
match bounded_reader.by_ref().take(ibs as u64).read_to_end(buf)? {
529+
n if n == ibs => {
532530
reads_complete += 1;
533531
}
534-
rlen if rlen > 0 => {
535-
bytes_total += rlen;
532+
n if n > 0 => {
536533
reads_partial += 1;
537534
}
538535
_ => break,
539536
}
540537
}
541-
buf.truncate(bytes_total);
538+
542539
Ok(ReadStat {
543540
reads_complete,
544541
reads_partial,
545542
// Records are not truncated when filling.
546543
records_truncated: 0,
547-
bytes_total: bytes_total.try_into().unwrap(),
544+
bytes_total: buf.len() as u64,
548545
})
549546
}
550547

551548
/// Fills a given buffer.
552549
/// Reads in increments of 'self.ibs'.
553550
/// The start of each ibs-sized read is aligned to multiples of ibs; remaining space is filled with the 'pad' byte.
554-
fn fill_blocks(&mut self, buf: &mut Vec<u8>, pad: u8) -> io::Result<ReadStat> {
551+
fn fill_blocks(&mut self, buf: &mut Vec<u8>, bsize: usize, pad: u8) -> io::Result<ReadStat> {
555552
let mut reads_complete = 0;
556553
let mut reads_partial = 0;
557554
let mut base_idx = 0;
558555
let mut bytes_total = 0;
559556

560-
while base_idx < buf.len() {
561-
let next_blk = cmp::min(base_idx + self.settings.ibs, buf.len());
557+
buf.clear();
558+
while base_idx < bsize {
559+
let next_blk = cmp::min(base_idx + self.settings.ibs, bsize);
562560
let target_len = next_blk - base_idx;
561+
buf.resize(next_blk, 0);
563562

564563
match self.read(&mut buf[base_idx..next_blk])? {
565-
0 => break,
564+
0 => {
565+
buf.truncate(base_idx);
566+
break;
567+
}
566568
rlen if rlen < target_len => {
567569
bytes_total += rlen;
568570
reads_partial += 1;
569-
let padding = vec![pad; target_len - rlen];
570-
buf.splice(base_idx + rlen..next_blk, padding.into_iter());
571+
buf[base_idx + rlen..next_blk].fill(pad);
571572
}
572573
rlen => {
573574
bytes_total += rlen;
@@ -578,7 +579,6 @@ impl Input<'_> {
578579
base_idx += self.settings.ibs;
579580
}
580581

581-
buf.truncate(base_idx);
582582
Ok(ReadStat {
583583
reads_complete,
584584
reads_partial,
@@ -1168,7 +1168,8 @@ fn dd_copy(mut i: Input, o: Output) -> io::Result<()> {
11681168

11691169
// Create a common buffer with a capacity of the block size.
11701170
// This is the max size needed.
1171-
let mut buf = vec![BUF_INIT_BYTE; bsize];
1171+
let mut buf = vec![0; 0];
1172+
buf.try_reserve(bsize)?;
11721173

11731174
// Spawn a timer thread to provide a scheduled signal indicating when we
11741175
// should send an update of our progress to the reporting thread.
@@ -1365,12 +1366,10 @@ fn read_helper(i: &mut Input, buf: &mut Vec<u8>, bsize: usize) -> io::Result<Rea
13651366
}
13661367
// ------------------------------------------------------------------
13671368
// Read
1368-
// Resize the buffer to the bsize. Any garbage data in the buffer is overwritten or truncated, so there is no need to fill with BUF_INIT_BYTE first.
1369-
buf.resize(bsize, BUF_INIT_BYTE);
13701369

13711370
let mut rstat = match i.settings.iconv.sync {
1372-
Some(ch) => i.fill_blocks(buf, ch)?,
1373-
_ => i.fill_consecutive(buf)?,
1371+
Some(ch) => i.fill_blocks(buf, bsize, ch)?,
1372+
_ => i.fill_consecutive(buf, bsize)?,
13741373
};
13751374
// Return early if no data
13761375
if rstat.reads_complete == 0 && rstat.reads_partial == 0 {

tests/by-util/test_dd.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,14 @@ fn help() {
115115
new_ucmd!().args(&["--help"]).succeeds();
116116
}
117117

118+
#[test]
119+
fn test_out_of_memory() {
120+
new_ucmd!()
121+
.arg("bs=1PB")
122+
.fails_with_code(1)
123+
.stderr_contains("memory"); //todo: improve error message at all platforms
124+
}
125+
118126
#[test]
119127
fn test_stdin_stdout() {
120128
let input = build_ascii_block(521);

0 commit comments

Comments
 (0)