Return stream to pool on exact read (#509)

If the user reads exactly the number of bytes in the response, then
drops the Read object, streams will never get returned to the pool since
the user never triggered a read past the end of the LimitedRead.

This fixes that by making PoolReturnRead aware of the level below it, so
it can ask if a stream is "done" without actually doing a read.

Also, a refactorign:

Previously, Response contained an Option<Box<Unit>> because the testing
method `from_str()` would construct a Response with no associated Unit.
However, this increased code complexity with no corresponding test
benefit. Instead, construct a fake Unit in from_str().

Also, instead of taking an `Option<Box<Unit>>`, PoolReturnRead now takes
a URL (to figure out host and port for the PoolKey) and an &Agent where
it will return the stream. This cuts interconnectedness somewhat:
PoolReturnRead doesn't need to know about Unit anymore.
This commit is contained in:
Jacob Hoffman-Andrews
2022-04-30 17:22:27 -07:00
committed by GitHub
parent 4d77d365a0
commit 101467f13f
4 changed files with 154 additions and 70 deletions

View File

@@ -3,10 +3,11 @@ use std::collections::{HashMap, VecDeque};
use std::io::{self, Read}; use std::io::{self, Read};
use std::sync::Mutex; use std::sync::Mutex;
use crate::response::LimitedRead;
use crate::stream::Stream; use crate::stream::Stream;
use crate::unit::Unit; use crate::{Agent, Proxy};
use crate::Proxy;
use chunked_transfer::Decoder;
use log::debug; use log::debug;
use url::Url; use url::Url;
@@ -123,7 +124,7 @@ impl ConnectionPool {
} }
} }
fn add(&self, key: PoolKey, stream: Stream) { fn add(&self, key: &PoolKey, stream: Stream) {
if self.noop() { if self.noop() {
return; return;
} }
@@ -143,7 +144,7 @@ impl ConnectionPool {
streams.len(), streams.len(),
stream stream
); );
remove_first_match(&mut inner.lru, &key) remove_first_match(&mut inner.lru, key)
.expect("invariant failed: key in recycle but not in lru"); .expect("invariant failed: key in recycle but not in lru");
} }
} }
@@ -151,7 +152,7 @@ impl ConnectionPool {
vacant_entry.insert(vec![stream].into()); vacant_entry.insert(vec![stream].into());
} }
} }
inner.lru.push_back(key); inner.lru.push_back(key.clone());
if inner.lru.len() > self.max_idle_connections { if inner.lru.len() > self.max_idle_connections {
drop(inner); drop(inner);
self.remove_oldest() self.remove_oldest()
@@ -219,28 +220,32 @@ impl PoolKey {
} }
} }
/// Read wrapper that returns the stream to the pool once the /// Read wrapper that returns a stream to the pool once the
/// read is exhausted (reached a 0). /// read is exhausted (reached a 0).
/// ///
/// *Internal API* /// *Internal API*
pub(crate) struct PoolReturnRead<R: Read + Sized + Into<Stream>> { pub(crate) struct PoolReturnRead<R: Read + Sized + Into<Stream>> {
// unit that contains the agent where we want to return the reader. // the agent where we want to return the stream.
unit: Option<Box<Unit>>, agent: Agent,
// wrapped reader around the same stream // wrapped reader around the same stream. It's an Option because we `take()` it
// upon returning the stream to the Agent.
reader: Option<R>, reader: Option<R>,
// Key under which to store the stream when we're done.
key: PoolKey,
} }
impl<R: Read + Sized + Into<Stream>> PoolReturnRead<R> { impl<R: Read + Sized + Into<Stream>> PoolReturnRead<R> {
pub fn new(unit: Option<Box<Unit>>, reader: R) -> Self { pub fn new(agent: &Agent, url: &Url, reader: R) -> Self {
PoolReturnRead { PoolReturnRead {
unit, agent: agent.clone(),
key: PoolKey::new(url, agent.config.proxy.clone()),
reader: Some(reader), reader: Some(reader),
} }
} }
fn return_connection(&mut self) -> io::Result<()> { fn return_connection(&mut self) -> io::Result<()> {
// guard we only do this once. // guard we only do this once.
if let (Some(unit), Some(reader)) = (self.unit.take(), self.reader.take()) { if let Some(reader) = self.reader.take() {
// bring back stream here to either go into pool or dealloc // bring back stream here to either go into pool or dealloc
let mut stream = reader.into(); let mut stream = reader.into();
if !stream.is_poolable() { if !stream.is_poolable() {
@@ -252,8 +257,7 @@ impl<R: Read + Sized + Into<Stream>> PoolReturnRead<R> {
stream.reset()?; stream.reset()?;
// insert back into pool // insert back into pool
let key = PoolKey::new(&unit.url, unit.agent.config.proxy.clone()); self.agent.state.pool.add(&self.key, stream);
unit.agent.state.pool.add(key, stream);
} }
Ok(()) Ok(())
@@ -267,12 +271,33 @@ impl<R: Read + Sized + Into<Stream>> PoolReturnRead<R> {
} }
} }
impl<R: Read + Sized + Into<Stream>> Read for PoolReturnRead<R> { // Done allows a reader to indicate it is done (next read will return Ok(0))
// without actually performing a read. This is useful so LimitedRead can
// inform PoolReturnRead to return a stream to the pool even if the user
// never read past the end of the response (For instance because their
// application is handling length information on its own).
pub(crate) trait Done {
fn done(&self) -> bool;
}
impl<R: Read> Done for LimitedRead<R> {
fn done(&self) -> bool {
self.remaining() == 0
}
}
impl<R: Read> Done for Decoder<R> {
fn done(&self) -> bool {
false
}
}
impl<R: Read + Sized + Done + Into<Stream>> Read for PoolReturnRead<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let amount = self.do_read(buf)?; let amount = self.do_read(buf)?;
// only if the underlying reader is exhausted can we send a new // only if the underlying reader is exhausted can we send a new
// request to the same socket. hence, we only return it now. // request to the same socket. hence, we only return it now.
if amount == 0 { if amount == 0 || self.reader.as_ref().map(|r| r.done()).unwrap_or_default() {
self.return_connection()?; self.return_connection()?;
} }
Ok(amount) Ok(amount)
@@ -303,7 +328,7 @@ mod tests {
proxy: None, proxy: None,
}); });
for key in poolkeys.clone() { for key in poolkeys.clone() {
pool.add(key, Stream::from_vec(vec![])) pool.add(&key, Stream::from_vec(vec![]))
} }
assert_eq!(pool.len(), pool.max_idle_connections); assert_eq!(pool.len(), pool.max_idle_connections);
@@ -328,7 +353,7 @@ mod tests {
}; };
for _ in 0..pool.max_idle_connections_per_host * 2 { for _ in 0..pool.max_idle_connections_per_host * 2 {
pool.add(poolkey.clone(), Stream::from_vec(vec![])) pool.add(&poolkey, Stream::from_vec(vec![]))
} }
assert_eq!(pool.len(), pool.max_idle_connections_per_host); assert_eq!(pool.len(), pool.max_idle_connections_per_host);
@@ -345,23 +370,42 @@ mod tests {
// Each insertion should result in an additional entry in the pool. // Each insertion should result in an additional entry in the pool.
let pool = ConnectionPool::new_with_limits(10, 1); let pool = ConnectionPool::new_with_limits(10, 1);
let url = Url::parse("zzz:///example.com").unwrap(); let url = Url::parse("zzz:///example.com").unwrap();
let pool_key = PoolKey::new(&url, None);
pool.add(PoolKey::new(&url, None), Stream::from_vec(vec![])); pool.add(&pool_key, Stream::from_vec(vec![]));
assert_eq!(pool.len(), 1); assert_eq!(pool.len(), 1);
pool.add( let pool_key = PoolKey::new(&url, Some(Proxy::new("localhost:9999").unwrap()));
PoolKey::new(&url, Some(Proxy::new("localhost:9999").unwrap())),
Stream::from_vec(vec![]), pool.add(&pool_key, Stream::from_vec(vec![]));
);
assert_eq!(pool.len(), 2); assert_eq!(pool.len(), 2);
pool.add( let pool_key = PoolKey::new(
PoolKey::new(
&url, &url,
Some(Proxy::new("user:password@localhost:9999").unwrap()), Some(Proxy::new("user:password@localhost:9999").unwrap()),
),
Stream::from_vec(vec![]),
); );
pool.add(&pool_key, Stream::from_vec(vec![]));
assert_eq!(pool.len(), 3); assert_eq!(pool.len(), 3);
} }
// Test that a stream gets returned to the pool if it was wrapped in a LimitedRead, and
// user reads the exact right number of bytes (but never gets a read of 0 bytes).
#[test]
fn read_exact() {
let url = Url::parse("https:///example.com").unwrap();
let mut out_buf = [0u8; 500];
let long_vec = vec![0u8; 1000];
let agent = Agent::new();
let stream = Stream::from_vec_poolable(long_vec);
let limited_read = LimitedRead::new(stream, 500);
let mut pool_return_read = PoolReturnRead::new(&agent, &url, limited_read);
pool_return_read.read_exact(&mut out_buf).unwrap();
assert_eq!(agent.state.pool.len(), 1);
}
} }

View File

@@ -6,12 +6,13 @@ use chunked_transfer::Decoder as ChunkDecoder;
use sync_wrapper::SyncWrapper; use sync_wrapper::SyncWrapper;
use url::Url; use url::Url;
use crate::body::SizedReader;
use crate::error::{Error, ErrorKind::BadStatus}; use crate::error::{Error, ErrorKind::BadStatus};
use crate::header::{get_all_headers, get_header, Header, HeaderLine}; use crate::header::{get_all_headers, get_header, Header, HeaderLine};
use crate::pool::PoolReturnRead; use crate::pool::PoolReturnRead;
use crate::stream::{DeadlineStream, Stream}; use crate::stream::{DeadlineStream, Stream};
use crate::unit::Unit; use crate::unit::Unit;
use crate::{stream, ErrorKind}; use crate::{stream, Agent, ErrorKind};
#[cfg(feature = "json")] #[cfg(feature = "json")]
use serde::de::DeserializeOwned; use serde::de::DeserializeOwned;
@@ -60,13 +61,13 @@ const MAX_HEADER_COUNT: usize = 100;
/// # } /// # }
/// ``` /// ```
pub struct Response { pub struct Response {
pub(crate) url: Option<Url>, pub(crate) url: Url,
status_line: String, status_line: String,
index: ResponseStatusIndex, index: ResponseStatusIndex,
status: u16, status: u16,
headers: Vec<Header>, headers: Vec<Header>,
// Boxed to avoid taking up too much size. // Boxed to avoid taking up too much size.
unit: Option<Box<Unit>>, unit: Box<Unit>,
// Boxed to avoid taking up too much size. // Boxed to avoid taking up too much size.
stream: SyncWrapper<Box<Stream>>, stream: SyncWrapper<Box<Stream>>,
/// The redirect history of this response, if any. The history starts with /// The redirect history of this response, if any. The history starts with
@@ -93,14 +94,11 @@ impl fmt::Debug for Response {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!( write!(
f, f,
"Response[status: {}, status_text: {}", "Response[status: {}, status_text: {}, url: {}]",
self.status(), self.status(),
self.status_text(), self.status_text(),
)?; self.url,
if let Some(url) = &self.url { )
write!(f, ", url: {}", url)?;
}
write!(f, "]")
} }
} }
@@ -128,7 +126,7 @@ impl Response {
/// The URL we ended up at. This can differ from the request url when /// The URL we ended up at. This can differ from the request url when
/// we have followed redirects. /// we have followed redirects.
pub fn get_url(&self) -> &str { pub fn get_url(&self) -> &str {
self.url.as_ref().map(|s| &s[..]).unwrap_or("") &self.url[..]
} }
/// The http version: `HTTP/1.1` /// The http version: `HTTP/1.1`
@@ -270,7 +268,7 @@ impl Response {
.map(|c| c.eq_ignore_ascii_case("close")) .map(|c| c.eq_ignore_ascii_case("close"))
.unwrap_or(false); .unwrap_or(false);
let is_head = self.unit.as_ref().map(|u| u.is_head()).unwrap_or(false); let is_head = self.unit.is_head();
let has_no_body = is_head let has_no_body = is_head
|| match self.status { || match self.status {
204 | 304 => true, 204 | 304 => true,
@@ -295,20 +293,24 @@ impl Response {
let stream = self.stream.into_inner(); let stream = self.stream.into_inner();
let unit = self.unit; let unit = self.unit;
if let Some(unit) = &unit {
let result = stream.set_read_timeout(unit.agent.config.timeout_read); let result = stream.set_read_timeout(unit.agent.config.timeout_read);
if let Err(e) = result { if let Err(e) = result {
return Box::new(ErrorReader(e)) as Box<dyn Read + Send>; return Box::new(ErrorReader(e)) as Box<dyn Read + Send>;
} }
} let deadline = unit.deadline;
let deadline = unit.as_ref().and_then(|u| u.deadline);
let stream = DeadlineStream::new(*stream, deadline); let stream = DeadlineStream::new(*stream, deadline);
let body_reader: Box<dyn Read + Send> = match (use_chunked, limit_bytes) { let body_reader: Box<dyn Read + Send> = match (use_chunked, limit_bytes) {
(true, _) => Box::new(PoolReturnRead::new(unit, ChunkDecoder::new(stream))), (true, _) => Box::new(PoolReturnRead::new(
(false, Some(len)) => { &unit.agent,
Box::new(PoolReturnRead::new(unit, LimitedRead::new(stream, len))) &unit.url,
} ChunkDecoder::new(stream),
)),
(false, Some(len)) => Box::new(PoolReturnRead::new(
&unit.agent,
&unit.url,
LimitedRead::new(stream, len),
)),
(false, None) => Box::new(stream), (false, None) => Box::new(stream),
}; };
@@ -467,11 +469,10 @@ impl Response {
/// let resp = ureq::Response::do_from_read(read); /// let resp = ureq::Response::do_from_read(read);
/// ///
/// assert_eq!(resp.status(), 401); /// assert_eq!(resp.status(), 401);
pub(crate) fn do_from_stream(stream: Stream, unit: Option<Unit>) -> Result<Response, Error> { pub(crate) fn do_from_stream(stream: Stream, unit: Unit) -> Result<Response, Error> {
// //
// HTTP/1.1 200 OK\r\n // HTTP/1.1 200 OK\r\n
let mut stream = let mut stream = stream::DeadlineStream::new(stream, unit.deadline);
stream::DeadlineStream::new(stream, unit.as_ref().and_then(|u| u.deadline));
// The status line we can ignore non-utf8 chars and parse as_str_lossy(). // The status line we can ignore non-utf8 chars and parse as_str_lossy().
let status_line = read_next_line(&mut stream, "the status line")?.into_string_lossy(); let status_line = read_next_line(&mut stream, "the status line")?.into_string_lossy();
@@ -504,7 +505,7 @@ impl Response {
headers.retain(|h| !h.is_name("content-encoding") && !h.is_name("content-length")); headers.retain(|h| !h.is_name("content-encoding") && !h.is_name("content-length"));
} }
let url = unit.as_ref().map(|u| u.url.clone()); let url = unit.url.clone();
Ok(Response { Ok(Response {
url, url,
@@ -512,7 +513,7 @@ impl Response {
index, index,
status, status,
headers, headers,
unit: unit.map(Box::new), unit: Box::new(unit),
stream: SyncWrapper::new(Box::new(stream.into())), stream: SyncWrapper::new(Box::new(stream.into())),
history: vec![], history: vec![],
length, length,
@@ -528,14 +529,13 @@ impl Response {
#[cfg(test)] #[cfg(test)]
pub fn set_url(&mut self, url: Url) { pub fn set_url(&mut self, url: Url) {
self.url = Some(url); self.url = url;
} }
#[cfg(test)] #[cfg(test)]
pub fn history_from_previous(&mut self, previous: Response) { pub fn history_from_previous(&mut self, previous: Response) {
let previous_url = previous.url.expect("previous url");
self.history = previous.history; self.history = previous.history;
self.history.push(previous_url); self.history.push(previous.url);
} }
} }
@@ -645,7 +645,20 @@ impl FromStr for Response {
/// ``` /// ```
fn from_str(s: &str) -> Result<Self, Self::Err> { fn from_str(s: &str) -> Result<Self, Self::Err> {
let stream = Stream::from_vec(s.as_bytes().to_owned()); let stream = Stream::from_vec(s.as_bytes().to_owned());
Self::do_from_stream(stream, None) let request_url = "https://example.com".parse().unwrap();
let request_reader = SizedReader {
size: crate::body::BodySize::Empty,
reader: Box::new(std::io::empty()),
};
let unit = Unit::new(
&Agent::new(),
"GET",
&request_url,
vec![],
&request_reader,
None,
);
Self::do_from_stream(stream, unit)
} }
} }
@@ -695,30 +708,33 @@ fn read_next_line(reader: &mut impl BufRead, context: &str) -> io::Result<Header
} }
/// Limits a `Read` to a content size (as set by a "Content-Length" header). /// Limits a `Read` to a content size (as set by a "Content-Length" header).
struct LimitedRead<R> { pub(crate) struct LimitedRead<R> {
reader: R, reader: R,
limit: usize, limit: usize,
position: usize, position: usize,
} }
impl<R: Read> LimitedRead<R> { impl<R: Read> LimitedRead<R> {
fn new(reader: R, limit: usize) -> Self { pub(crate) fn new(reader: R, limit: usize) -> Self {
LimitedRead { LimitedRead {
reader, reader,
limit, limit,
position: 0, position: 0,
} }
} }
pub(crate) fn remaining(&self) -> usize {
self.limit - self.position
}
} }
impl<R: Read> Read for LimitedRead<R> { impl<R: Read> Read for LimitedRead<R> {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let left = self.limit - self.position; if self.remaining() == 0 {
if left == 0 {
return Ok(0); return Ok(0);
} }
let from = if left < buf.len() { let from = if self.remaining() < buf.len() {
&mut buf[0..left] &mut buf[0..self.remaining()]
} else { } else {
buf buf
}; };
@@ -990,7 +1006,20 @@ mod tests {
); );
let v = cow.to_vec(); let v = cow.to_vec();
let s = Stream::from_vec(v); let s = Stream::from_vec(v);
let resp = Response::do_from_stream(s.into(), None).unwrap(); let request_url = "https://example.com".parse().unwrap();
let request_reader = SizedReader {
size: crate::body::BodySize::Empty,
reader: Box::new(std::io::empty()),
};
let unit = Unit::new(
&Agent::new(),
"GET",
&request_url,
vec![],
&request_reader,
None,
);
let resp = Response::do_from_stream(s.into(), unit).unwrap();
assert_eq!(resp.status(), 200); assert_eq!(resp.status(), 200);
assert_eq!(resp.header("x-geo-header"), None); assert_eq!(resp.header("x-geo-header"), None);
} }

View File

@@ -69,11 +69,11 @@ impl Inner for TcpStream {
} }
} }
struct TestStream(Box<dyn Read + Send + Sync>, Vec<u8>); struct TestStream(Box<dyn Read + Send + Sync>, Vec<u8>, bool);
impl Inner for TestStream { impl Inner for TestStream {
fn is_poolable(&self) -> bool { fn is_poolable(&self) -> bool {
false self.2
} }
fn socket(&self) -> Option<&TcpStream> { fn socket(&self) -> Option<&TcpStream> {
None None
@@ -201,7 +201,18 @@ impl Stream {
pub(crate) fn from_vec(v: Vec<u8>) -> Stream { pub(crate) fn from_vec(v: Vec<u8>) -> Stream {
Stream::logged_create(Stream { Stream::logged_create(Stream {
inner: BufReader::new(Box::new(TestStream(Box::new(Cursor::new(v)), vec![]))), inner: BufReader::new(Box::new(TestStream(
Box::new(Cursor::new(v)),
vec![],
false,
))),
})
}
#[cfg(test)]
pub(crate) fn from_vec_poolable(v: Vec<u8>) -> Stream {
Stream::logged_create(Stream {
inner: BufReader::new(Box::new(TestStream(Box::new(Cursor::new(v)), vec![], true))),
}) })
} }

View File

@@ -284,7 +284,7 @@ fn connect_inner(
// TODO: this unit.clone() bothers me. At this stage, we're not // TODO: this unit.clone() bothers me. At this stage, we're not
// going to use the unit (much) anymore, and it should be possible // going to use the unit (much) anymore, and it should be possible
// to have ownership of it and pass it into the Response. // to have ownership of it and pass it into the Response.
let result = Response::do_from_stream(stream, Some(unit.clone())); let result = Response::do_from_stream(stream, unit.clone());
// https://tools.ietf.org/html/rfc7230#section-6.3.1 // https://tools.ietf.org/html/rfc7230#section-6.3.1
// When an inbound connection is closed prematurely, a client MAY // When an inbound connection is closed prematurely, a client MAY