Use iteration instead of recursion for connect (#291)
This allows handling larger redirect chains. Fixes #290
This commit is contained in:
27
src/error.rs
27
src/error.rs
@@ -103,8 +103,8 @@ impl Display for Error {
|
|||||||
match self {
|
match self {
|
||||||
Error::Status(status, response) => {
|
Error::Status(status, response) => {
|
||||||
write!(f, "{}: status code {}", response.get_url(), status)?;
|
write!(f, "{}: status code {}", response.get_url(), status)?;
|
||||||
if let Some(original) = response.history().last() {
|
if let Some(original) = response.history.get(0) {
|
||||||
write!(f, " (redirected from {})", original.get_url())?;
|
write!(f, " (redirected from {})", original)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Error::Transport(err) => {
|
Error::Transport(err) => {
|
||||||
@@ -300,19 +300,22 @@ fn status_code_error() {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn status_code_error_redirect() {
|
fn status_code_error_redirect() {
|
||||||
use std::sync::Arc;
|
use crate::{get, test};
|
||||||
let mut response0 = Response::new(302, "Found", "").unwrap();
|
|
||||||
response0.set_url("http://example.org/".parse().unwrap());
|
|
||||||
let mut response1 = Response::new(302, "Found", "").unwrap();
|
|
||||||
response1.set_previous(Arc::new(response0));
|
|
||||||
let mut response2 = Response::new(500, "Internal Server Error", "server overloaded").unwrap();
|
|
||||||
response2.set_previous(Arc::new(response1));
|
|
||||||
response2.set_url("http://example.com/".parse().unwrap());
|
|
||||||
let err = Error::Status(response2.status(), response2);
|
|
||||||
|
|
||||||
|
test::set_handler("/redirect_a", |unit| {
|
||||||
|
assert_eq!(unit.method, "GET");
|
||||||
|
test::make_response(302, "Go here", vec!["Location: test://example.edu/redirect_b"], vec![])
|
||||||
|
});
|
||||||
|
test::set_handler("/redirect_b", |unit| {
|
||||||
|
assert_eq!(unit.method, "GET");
|
||||||
|
test::make_response(302, "Go here", vec!["Location: http://example.com/status/500"], vec![])
|
||||||
|
});
|
||||||
|
|
||||||
|
let err = get("test://example.org/redirect_a").call().unwrap_err();
|
||||||
|
assert_eq!(err.kind(), ErrorKind::HTTP, "{:?}", err);
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
err.to_string(),
|
err.to_string(),
|
||||||
"http://example.com/: status code 500 (redirected from http://example.org/)"
|
"http://example.com/status/500: status code 500 (redirected from test://example.org/redirect_a)"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -118,7 +118,7 @@ impl Request {
|
|||||||
}
|
}
|
||||||
let reader = payload.into_read();
|
let reader = payload.into_read();
|
||||||
let unit = Unit::new(&self.agent, &self.method, &url, &self.headers, &reader);
|
let unit = Unit::new(&self.agent, &self.method, &url, &self.headers, &reader);
|
||||||
let response = unit::connect(unit, true, reader, None).map_err(|e| e.url(url.clone()))?;
|
let response = unit::connect(unit, true, reader).map_err(|e| e.url(url.clone()))?;
|
||||||
|
|
||||||
if response.status() >= 400 {
|
if response.status() >= 400 {
|
||||||
Err(Error::Status(response.status(), response))
|
Err(Error::Status(response.status(), response))
|
||||||
|
|||||||
@@ -1,9 +1,6 @@
|
|||||||
|
use std::io::{self, Read};
|
||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
use std::{fmt, io::BufRead};
|
use std::{fmt, io::BufRead};
|
||||||
use std::{
|
|
||||||
io::{self, Read},
|
|
||||||
sync::Arc,
|
|
||||||
};
|
|
||||||
|
|
||||||
use chunked_transfer::Decoder as ChunkDecoder;
|
use chunked_transfer::Decoder as ChunkDecoder;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
@@ -52,9 +49,12 @@ pub struct Response {
|
|||||||
headers: Vec<Header>,
|
headers: Vec<Header>,
|
||||||
unit: Option<Unit>,
|
unit: Option<Unit>,
|
||||||
stream: Stream,
|
stream: Stream,
|
||||||
// If this Response resulted from a redirect, the Response containing
|
/// The redirect history of this response, if any. The history starts with
|
||||||
// that redirect.
|
/// the first response received and ends with the response immediately
|
||||||
previous: Option<Arc<Response>>,
|
/// previous to this one.
|
||||||
|
///
|
||||||
|
/// If this response was not redirected, the history is empty.
|
||||||
|
pub(crate) history: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// index into status_line where we split: HTTP/1.1 200 OK
|
/// index into status_line where we split: HTTP/1.1 200 OK
|
||||||
@@ -390,13 +390,6 @@ impl Response {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns an iterator across the redirect history of this response,
|
|
||||||
// if any. The iterator starts with the response before this one.
|
|
||||||
// If this response was not redirected, the iterator is empty.
|
|
||||||
pub(crate) fn history(&self) -> Hist {
|
|
||||||
Hist::new(self.previous.as_deref())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a response from a Read trait impl.
|
/// Create a response from a Read trait impl.
|
||||||
///
|
///
|
||||||
/// This is hopefully useful for unit tests.
|
/// This is hopefully useful for unit tests.
|
||||||
@@ -438,18 +431,13 @@ impl Response {
|
|||||||
headers,
|
headers,
|
||||||
unit,
|
unit,
|
||||||
stream: stream.into(),
|
stream: stream.into(),
|
||||||
previous: None,
|
history: vec![],
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn do_from_request(
|
pub(crate) fn do_from_request(unit: Unit, stream: Stream) -> Result<Response, Error> {
|
||||||
unit: Unit,
|
|
||||||
stream: Stream,
|
|
||||||
previous: Option<Arc<Response>>,
|
|
||||||
) -> Result<Response, Error> {
|
|
||||||
let url = Some(unit.url.clone());
|
let url = Some(unit.url.clone());
|
||||||
let mut resp = Response::do_from_stream(stream, Some(unit))?;
|
let mut resp = Response::do_from_stream(stream, Some(unit))?;
|
||||||
resp.previous = previous;
|
|
||||||
resp.url = url;
|
resp.url = url;
|
||||||
Ok(resp)
|
Ok(resp)
|
||||||
}
|
}
|
||||||
@@ -465,8 +453,10 @@ impl Response {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub fn set_previous(&mut self, previous: Arc<Response>) {
|
pub fn history_from_previous(&mut self, previous: Response) {
|
||||||
self.previous = Some(previous);
|
let previous_url = previous.get_url().to_string();
|
||||||
|
self.history = previous.history;
|
||||||
|
self.history.push(previous_url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -538,31 +528,6 @@ impl FromStr for Response {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hist is an iterator over the history of a redirected response. It
|
|
||||||
// yields the URLs that were requested in backwards order, from most recent
|
|
||||||
// to least recent.
|
|
||||||
pub(crate) struct Hist<'a> {
|
|
||||||
response: Option<&'a Response>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Hist<'a> {
|
|
||||||
fn new(response: Option<&'a Response>) -> Hist<'a> {
|
|
||||||
Hist { response }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl<'a> Iterator for Hist<'a> {
|
|
||||||
type Item = &'a Response;
|
|
||||||
fn next(&mut self) -> Option<&'a Response> {
|
|
||||||
let response = match self.response {
|
|
||||||
None => return None,
|
|
||||||
Some(r) => r,
|
|
||||||
};
|
|
||||||
|
|
||||||
self.response = response.previous.as_deref();
|
|
||||||
Some(response)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn read_next_line(reader: &mut impl BufRead) -> io::Result<String> {
|
fn read_next_line(reader: &mut impl BufRead) -> io::Result<String> {
|
||||||
let mut s = String::new();
|
let mut s = String::new();
|
||||||
if reader.read_line(&mut s)? == 0 {
|
if reader.read_line(&mut s)? == 0 {
|
||||||
@@ -774,18 +739,18 @@ mod tests {
|
|||||||
fn history() {
|
fn history() {
|
||||||
let mut response0 = Response::new(302, "Found", "").unwrap();
|
let mut response0 = Response::new(302, "Found", "").unwrap();
|
||||||
response0.set_url("http://1.example.com/".parse().unwrap());
|
response0.set_url("http://1.example.com/".parse().unwrap());
|
||||||
assert_eq!(response0.history().count(), 0);
|
assert!(response0.history.is_empty());
|
||||||
|
|
||||||
let mut response1 = Response::new(302, "Found", "").unwrap();
|
let mut response1 = Response::new(302, "Found", "").unwrap();
|
||||||
response1.set_url("http://2.example.com/".parse().unwrap());
|
response1.set_url("http://2.example.com/".parse().unwrap());
|
||||||
response1.set_previous(Arc::new(response0));
|
response1.history_from_previous(response0);
|
||||||
|
|
||||||
let mut response2 = Response::new(404, "NotFound", "").unwrap();
|
let mut response2 = Response::new(404, "NotFound", "").unwrap();
|
||||||
response2.set_url("http://2.example.com/".parse().unwrap());
|
response2.set_url("http://2.example.com/".parse().unwrap());
|
||||||
response2.set_previous(Arc::new(response1));
|
response2.history_from_previous(response1);
|
||||||
|
|
||||||
let hist: Vec<&str> = response2.history().map(|r| r.get_url()).collect();
|
let hist: Vec<&str> = response2.history.iter().map(|r| &**r).collect();
|
||||||
assert_eq!(hist, ["http://2.example.com/", "http://1.example.com/"])
|
assert_eq!(hist, ["http://1.example.com/", "http://2.example.com/"])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -35,6 +35,22 @@ fn redirect_many() {
|
|||||||
.get("test://host/redirect_many1")
|
.get("test://host/redirect_many1")
|
||||||
.call();
|
.call();
|
||||||
assert!(matches!(result, Err(e) if e.kind() == ErrorKind::TooManyRedirects));
|
assert!(matches!(result, Err(e) if e.kind() == ErrorKind::TooManyRedirects));
|
||||||
|
|
||||||
|
test::set_handler("/redirect_many1", |_| {
|
||||||
|
test::make_response(302, "Go here", vec!["Location: /redirect_many2"], vec![])
|
||||||
|
});
|
||||||
|
test::set_handler("/redirect_many2", |_| {
|
||||||
|
test::make_response(302, "Go here", vec!["Location: /redirect_many3"], vec![])
|
||||||
|
});
|
||||||
|
test::set_handler("/redirect_many3", |_| {
|
||||||
|
test::make_response(302, "Go here", vec!["Location: /redirect_many4"], vec![])
|
||||||
|
});
|
||||||
|
let result = builder()
|
||||||
|
.redirects(2)
|
||||||
|
.build()
|
||||||
|
.get("test://host/redirect_many1")
|
||||||
|
.call();
|
||||||
|
assert!(matches!(result, Err(e) if e.kind() == ErrorKind::TooManyRedirects));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -141,3 +157,22 @@ fn redirect_308() {
|
|||||||
assert_eq!(resp.status(), 200);
|
assert_eq!(resp.status(), 200);
|
||||||
assert_eq!(resp.get_url(), "test://host/valid_response");
|
assert_eq!(resp.get_url(), "test://host/valid_response");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn too_many_redirects() {
|
||||||
|
for i in 0..10_000 {
|
||||||
|
test::set_handler(&format!("/malicious_redirect_{}", i), move |_| {
|
||||||
|
let location = format!("Location: /malicious_redirect_{}", i + 1);
|
||||||
|
test::make_response(302, "Go here", vec![&location], vec![])
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
test::set_handler("/malicious_redirect_10000", |unit| {
|
||||||
|
assert_eq!(unit.method, "GET");
|
||||||
|
test::make_response(200, "OK", vec![], vec![])
|
||||||
|
});
|
||||||
|
|
||||||
|
let req = crate::builder().redirects(10001).build();
|
||||||
|
let resp = req.get("test://host/malicious_redirect_0").call().unwrap();
|
||||||
|
assert_eq!(resp.get_url(), "test://host/malicious_redirect_10000");
|
||||||
|
}
|
||||||
|
|||||||
129
src/unit.rs
129
src/unit.rs
@@ -1,8 +1,5 @@
|
|||||||
|
use std::io::{self, Write};
|
||||||
use std::time;
|
use std::time;
|
||||||
use std::{
|
|
||||||
io::{self, Write},
|
|
||||||
sync::Arc,
|
|
||||||
};
|
|
||||||
|
|
||||||
use log::{debug, info};
|
use log::{debug, info};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
@@ -163,12 +160,70 @@ impl Unit {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Perform a connection. Used recursively for redirects.
|
/// Perform a connection. Follows redirects.
|
||||||
pub(crate) fn connect(
|
pub(crate) fn connect(
|
||||||
unit: Unit,
|
mut unit: Unit,
|
||||||
|
use_pooled: bool,
|
||||||
|
mut body: SizedReader,
|
||||||
|
) -> Result<Response, Error> {
|
||||||
|
let mut history = vec![];
|
||||||
|
let mut resp = loop {
|
||||||
|
let resp = connect_inner(&unit, use_pooled, body, &history)?;
|
||||||
|
|
||||||
|
// handle redirects
|
||||||
|
if !(300..399).contains(&resp.status()) || unit.agent.config.redirects == 0 {
|
||||||
|
break resp;
|
||||||
|
}
|
||||||
|
if history.len() + 1 >= unit.agent.config.redirects as usize {
|
||||||
|
return Err(ErrorKind::TooManyRedirects.new());
|
||||||
|
}
|
||||||
|
// the location header
|
||||||
|
let location = match resp.header("location") {
|
||||||
|
Some(l) => l,
|
||||||
|
None => break resp,
|
||||||
|
};
|
||||||
|
|
||||||
|
let url = &unit.url;
|
||||||
|
let method = &unit.method;
|
||||||
|
// join location header to current url in case it is relative
|
||||||
|
let new_url = url.join(location).map_err(|e| {
|
||||||
|
ErrorKind::InvalidUrl
|
||||||
|
.msg(&format!("Bad redirection: {}", location))
|
||||||
|
.src(e)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
// perform the redirect differently depending on 3xx code.
|
||||||
|
let new_method = match resp.status() {
|
||||||
|
// this is to follow how curl does it. POST, PUT etc change
|
||||||
|
// to GET on a redirect.
|
||||||
|
301 | 302 | 303 => match &method[..] {
|
||||||
|
"GET" | "HEAD" => unit.method,
|
||||||
|
_ => "GET".into(),
|
||||||
|
},
|
||||||
|
// never change the method for 307/308
|
||||||
|
// only resend the request if it cannot have a body
|
||||||
|
// NOTE: DELETE is intentionally excluded: https://stackoverflow.com/questions/299628
|
||||||
|
307 | 308 if ["GET", "HEAD", "OPTIONS", "TRACE"].contains(&method.as_str()) => {
|
||||||
|
unit.method
|
||||||
|
}
|
||||||
|
_ => break resp,
|
||||||
|
};
|
||||||
|
debug!("redirect {} {} -> {}", resp.status(), url, new_url);
|
||||||
|
history.push(unit.url.to_string());
|
||||||
|
body = Payload::Empty.into_read();
|
||||||
|
// recreate the unit to get a new hostname and cookies for the new host.
|
||||||
|
unit = Unit::new(&unit.agent, &new_method, &new_url, &unit.headers, &body);
|
||||||
|
};
|
||||||
|
resp.history = history;
|
||||||
|
Ok(resp)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Perform a connection. Does not follow redirects.
|
||||||
|
fn connect_inner(
|
||||||
|
unit: &Unit,
|
||||||
use_pooled: bool,
|
use_pooled: bool,
|
||||||
body: SizedReader,
|
body: SizedReader,
|
||||||
previous: Option<Arc<Response>>,
|
previous: &[String],
|
||||||
) -> Result<Response, Error> {
|
) -> Result<Response, Error> {
|
||||||
let host = unit
|
let host = unit
|
||||||
.url
|
.url
|
||||||
@@ -185,14 +240,15 @@ pub(crate) fn connect(
|
|||||||
info!("sending request {} {}", method, url);
|
info!("sending request {} {}", method, url);
|
||||||
}
|
}
|
||||||
|
|
||||||
let send_result = send_prelude(&unit, &mut stream, previous.is_some());
|
let send_result = send_prelude(&unit, &mut stream, !previous.is_empty());
|
||||||
|
|
||||||
if let Err(err) = send_result {
|
if let Err(err) = send_result {
|
||||||
if is_recycled {
|
if is_recycled {
|
||||||
debug!("retrying request early {} {}: {}", method, url, err);
|
debug!("retrying request early {} {}: {}", method, url, err);
|
||||||
// we try open a new connection, this time there will be
|
// we try open a new connection, this time there will be
|
||||||
// no connection in the pool. don't use it.
|
// no connection in the pool. don't use it.
|
||||||
return connect(unit, false, body, previous);
|
// NOTE: this recurses at most once because `use_pooled` is `false`.
|
||||||
|
return connect_inner(unit, false, body, previous);
|
||||||
} else {
|
} else {
|
||||||
// not a pooled connection, propagate the error.
|
// not a pooled connection, propagate the error.
|
||||||
return Err(err.into());
|
return Err(err.into());
|
||||||
@@ -204,7 +260,7 @@ pub(crate) fn connect(
|
|||||||
body::send_body(body, unit.is_chunked, &mut stream)?;
|
body::send_body(body, unit.is_chunked, &mut stream)?;
|
||||||
|
|
||||||
// start reading the response to process cookies and redirects.
|
// start reading the response to process cookies and redirects.
|
||||||
let result = Response::do_from_request(unit.clone(), stream, previous.clone());
|
let result = Response::do_from_request(unit.clone(), stream);
|
||||||
|
|
||||||
// https://tools.ietf.org/html/rfc7230#section-6.3.1
|
// https://tools.ietf.org/html/rfc7230#section-6.3.1
|
||||||
// When an inbound connection is closed prematurely, a client MAY
|
// When an inbound connection is closed prematurely, a client MAY
|
||||||
@@ -220,7 +276,8 @@ pub(crate) fn connect(
|
|||||||
Err(err) if err.connection_closed() && retryable && is_recycled => {
|
Err(err) if err.connection_closed() && retryable && is_recycled => {
|
||||||
debug!("retrying request {} {}: {}", method, url, err);
|
debug!("retrying request {} {}: {}", method, url, err);
|
||||||
let empty = Payload::Empty.into_read();
|
let empty = Payload::Empty.into_read();
|
||||||
return connect(unit, false, empty, previous);
|
// NOTE: this recurses at most once because `use_pooled` is `false`.
|
||||||
|
return connect_inner(unit, false, empty, previous);
|
||||||
}
|
}
|
||||||
Err(e) => return Err(e),
|
Err(e) => return Err(e),
|
||||||
Ok(resp) => resp,
|
Ok(resp) => resp,
|
||||||
@@ -230,56 +287,6 @@ pub(crate) fn connect(
|
|||||||
#[cfg(feature = "cookies")]
|
#[cfg(feature = "cookies")]
|
||||||
save_cookies(&unit, &resp);
|
save_cookies(&unit, &resp);
|
||||||
|
|
||||||
// handle redirects
|
|
||||||
if (300..399).contains(&resp.status()) && unit.agent.config.redirects > 0 {
|
|
||||||
if let Some(previous) = previous {
|
|
||||||
if previous.history().count() + 1 >= unit.agent.config.redirects as usize {
|
|
||||||
return Err(ErrorKind::TooManyRedirects.new());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// the location header
|
|
||||||
let location = resp.header("location");
|
|
||||||
if let Some(location) = location {
|
|
||||||
// join location header to current url in case it it relative
|
|
||||||
let new_url = url.join(location).map_err(|e| {
|
|
||||||
ErrorKind::InvalidUrl
|
|
||||||
.msg(&format!("Bad redirection: {}", location))
|
|
||||||
.src(e)
|
|
||||||
})?;
|
|
||||||
|
|
||||||
// perform the redirect differently depending on 3xx code.
|
|
||||||
match resp.status() {
|
|
||||||
301 | 302 | 303 => {
|
|
||||||
let empty = Payload::Empty.into_read();
|
|
||||||
// this is to follow how curl does it. POST, PUT etc change
|
|
||||||
// to GET on a redirect.
|
|
||||||
let new_method = match &method[..] {
|
|
||||||
"GET" | "HEAD" => method.to_string(),
|
|
||||||
_ => "GET".into(),
|
|
||||||
};
|
|
||||||
// recreate the unit to get a new hostname and cookies for the new host.
|
|
||||||
let new_unit =
|
|
||||||
Unit::new(&unit.agent, &new_method, &new_url, &unit.headers, &empty);
|
|
||||||
|
|
||||||
debug!("redirect {} {} -> {}", resp.status(), url, new_url);
|
|
||||||
return connect(new_unit, use_pooled, empty, Some(Arc::new(resp)));
|
|
||||||
}
|
|
||||||
// never change the method for 307/308
|
|
||||||
// only resend the request if it cannot have a body
|
|
||||||
// NOTE: DELETE is intentionally excluded: https://stackoverflow.com/questions/299628
|
|
||||||
307 | 308 if ["GET", "HEAD", "OPTIONS", "TRACE"].contains(&method.as_str()) => {
|
|
||||||
let empty = Payload::Empty.into_read();
|
|
||||||
debug!("redirect {} {} -> {}", resp.status(), url, new_url);
|
|
||||||
// recreate the unit to get a new hostname and cookies for the new host.
|
|
||||||
let new_unit = Unit::new(&unit.agent, &unit.method, &new_url, &unit.headers, &empty);
|
|
||||||
return connect(new_unit, use_pooled, empty, Some(Arc::new(resp)));
|
|
||||||
}
|
|
||||||
_ => (),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
debug!("response {} to {} {}", resp.status(), method, url);
|
debug!("response {} to {} {}", resp.status(), method, url);
|
||||||
|
|
||||||
// release the response
|
// release the response
|
||||||
|
|||||||
Reference in New Issue
Block a user