Improve chunked transfer upload performance

This implementation improves over chunked_transfer's Encoder + io::copy
with the following performance optimizations:

1) It avoid copying memory
2) chunked_transfer's Encoder issues 4 separate write() per chunk.
   This is costly overhead. Instead, we do a single write() per chunk

The measured benefit on a Linux machine is a 50% reduction in CPU usage
on a https connection.
This commit is contained in:
Nicolas Viennot
2020-04-03 16:34:10 +00:00
committed by Martin Algesten
parent b62db974f0
commit 652500f5a8

View File

@@ -1,6 +1,5 @@
use crate::stream::Stream; use crate::stream::Stream;
use chunked_transfer; use std::io::{copy, empty, Cursor, Read, Write, Result as IoResult};
use std::io::{copy, empty, Cursor, Read, Result as IoResult};
#[cfg(feature = "charset")] #[cfg(feature = "charset")]
use crate::response::DEFAULT_CHARACTER_SET; use crate::response::DEFAULT_CHARACTER_SET;
@@ -100,6 +99,69 @@ impl Payload {
} }
} }
const CHUNK_MAX_SIZE: usize = 0x4000; // Maximum size of a TLS fragment
const CHUNK_HEADER_MAX_SIZE: usize = 6; // four hex digits plus "\r\n"
const CHUNK_FOOTER_SIZE: usize = 2; // "\r\n"
const CHUNK_MAX_PAYLOAD_SIZE: usize = CHUNK_MAX_SIZE - CHUNK_HEADER_MAX_SIZE - CHUNK_FOOTER_SIZE;
// copy_chunks() improves over chunked_transfer's Encoder + io::copy with the
// following performance optimizations:
// 1) It avoid copying memory.
// 2) chunked_transfer's Encoder issues 4 separate write() per chunk. This is costly
// overhead. Instead, we do a single write() per chunk.
// The measured benefit on a Linux machine is a 50% reduction in CPU usage on a https connection.
fn copy_chunked<R: Read, W: Write>(reader: &mut R, writer: &mut W) -> IoResult<u64> {
// The chunk layout is:
// header:header_max_size | payload:max_payload_size | footer:footer_size
let mut chunk = Vec::with_capacity(CHUNK_MAX_SIZE);
let mut written = 0;
loop {
// We first read the payload
chunk.resize(CHUNK_HEADER_MAX_SIZE, 0);
let payload_size = reader.take(CHUNK_MAX_PAYLOAD_SIZE as u64).read_to_end(&mut chunk)?;
// Then write the header
let header_str = format!("{:x}\r\n", payload_size);
let header = header_str.as_bytes();
assert!(header.len() <= CHUNK_HEADER_MAX_SIZE);
let start_index = CHUNK_HEADER_MAX_SIZE - header.len();
(&mut chunk[start_index..]).write(&header).unwrap();
// And add the footer
chunk.extend_from_slice(b"\r\n");
// Finally Write the chunk
writer.write_all(&chunk[start_index..])?;
written += payload_size as u64;
// On EOF, we wrote a 0 sized chunk. This is what the chunked encoding protocol requires.
if payload_size == 0 {
return Ok(written);
}
}
}
#[test]
fn test_copy_chunked() {
let mut source = Vec::<u8>::new();
source.resize(CHUNK_MAX_PAYLOAD_SIZE, 33);
source.extend_from_slice(b"hello world");
let mut dest = Vec::<u8>::new();
copy_chunked(&mut &source[..], &mut dest).unwrap();
let mut dest_expected = Vec::<u8>::new();
dest_expected.extend_from_slice(format!("{:x}\r\n", CHUNK_MAX_PAYLOAD_SIZE).as_bytes());
dest_expected.resize(dest_expected.len() + CHUNK_MAX_PAYLOAD_SIZE, 33);
dest_expected.extend_from_slice(b"\r\n");
dest_expected.extend_from_slice(b"b\r\nhello world\r\n");
dest_expected.extend_from_slice(b"0\r\n\r\n");
assert_eq!(dest, dest_expected);
}
/// Helper to send a body, either as chunked or not. /// Helper to send a body, either as chunked or not.
pub(crate) fn send_body( pub(crate) fn send_body(
mut body: SizedReader, mut body: SizedReader,
@@ -107,8 +169,7 @@ pub(crate) fn send_body(
stream: &mut Stream, stream: &mut Stream,
) -> IoResult<u64> { ) -> IoResult<u64> {
let n = if do_chunk { let n = if do_chunk {
let mut chunker = chunked_transfer::Encoder::new(stream); copy_chunked(&mut body.reader, stream)?
copy(&mut body.reader, &mut chunker)?
} else { } else {
copy(&mut body.reader, stream)? copy(&mut body.reader, stream)?
}; };