From 8cb4f401e3243329bfcc250ae2f3cad56aadbfa6 Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Sun, 13 Dec 2020 11:59:11 -0800 Subject: [PATCH] Add history to response objects (#275) This allows Error to report both the URL that caused an error, and the original URL that was requested. Change unit::connect to use the Response history for tracking number of redirects, instead of passing the count as a separate parameter. Incidentally, move handling of the `stream` fully inside `Response`. Instead of `do_from_read` + `set_stream`, we now have `do_from_stream`, which takes ownership of the stream and keeps it. We also have `do_from_request`, which does all of `do_from_stream`, but also sets the `previous` field. --- src/error.rs | 38 ++++++++++++---- src/request.rs | 3 +- src/response.rs | 118 +++++++++++++++++++++++++++++++++++++++--------- src/unit.rs | 37 +++++++-------- 4 files changed, 143 insertions(+), 53 deletions(-) diff --git a/src/error.rs b/src/error.rs index ad1121d..272c2c4 100644 --- a/src/error.rs +++ b/src/error.rs @@ -2,7 +2,7 @@ use url::Url; use std::error; use std::fmt::{self, Display}; -use std::io::{self}; +use std::io; use crate::Response; @@ -67,6 +67,9 @@ impl Display for Error { match self { Error::Status(status, response) => { write!(f, "{}: status code {}", response.get_url(), status)?; + if let Some(original) = response.history().last() { + write!(f, " (redirected from {})", original.get_url())?; + } } Error::Transport(err) => { write!(f, "{}", err)?; @@ -250,15 +253,32 @@ impl fmt::Display for ErrorKind { } } -// #[test] -// fn status_code_error() { -// let mut err = Error::new(ErrorKind::HTTP, None); -// err = err.response(Response::new(500, "Internal Server Error", "too much going on").unwrap()); -// assert_eq!(err.to_string(), "status code 500"); +#[test] +fn status_code_error() { + let mut response = Response::new(404, "NotFound", "").unwrap(); + response.set_url("http://example.org/".parse().unwrap()); + let err = Error::Status(response.status(), response); -// err = err.url("http://example.com/".parse().unwrap()); -// assert_eq!(err.to_string(), "http://example.com/: status code 500"); -// } + assert_eq!(err.to_string(), "http://example.org/: status code 404"); +} + +#[test] +fn status_code_error_redirect() { + use std::sync::Arc; + let mut response0 = Response::new(302, "Found", "").unwrap(); + response0.set_url("http://example.org/".parse().unwrap()); + let mut response1 = Response::new(302, "Found", "").unwrap(); + response1.set_previous(Arc::new(response0)); + let mut response2 = Response::new(500, "Internal Server Error", "server overloaded").unwrap(); + response2.set_previous(Arc::new(response1)); + response2.set_url("http://example.com/".parse().unwrap()); + let err = Error::Status(response2.status(), response2); + + assert_eq!( + err.to_string(), + "http://example.com/: status code 500 (redirected from http://example.org/)" + ); +} #[test] fn io_error() { diff --git a/src/request.rs b/src/request.rs index 0394ad9..5622ca1 100644 --- a/src/request.rs +++ b/src/request.rs @@ -118,8 +118,7 @@ impl Request { } let reader = payload.into_read(); let unit = Unit::new(&self.agent, &self.method, &url, &self.headers, &reader); - let response = - unit::connect(unit, true, 0, reader, false).map_err(|e| e.url(url.clone()))?; + let response = unit::connect(unit, true, reader, None).map_err(|e| e.url(url.clone()))?; if self.error_on_non_2xx && response.status() >= 400 { Err(Error::Status(response.status(), response)) diff --git a/src/response.rs b/src/response.rs index bb68527..73875e9 100644 --- a/src/response.rs +++ b/src/response.rs @@ -1,14 +1,21 @@ -use std::io::{self, Read}; use std::str::FromStr; use std::{fmt, io::BufRead}; +use std::{ + io::{self, Read}, + sync::Arc, +}; use chunked_transfer::Decoder as ChunkDecoder; +use url::Url; -use crate::error::{Error, ErrorKind}; use crate::header::Header; use crate::pool::PoolReturnRead; use crate::stream::{DeadlineStream, Stream}; use crate::unit::Unit; +use crate::{ + error::{Error, ErrorKind}, + stream, +}; #[cfg(feature = "json")] use serde::de::DeserializeOwned; @@ -40,13 +47,16 @@ pub const DEFAULT_CHARACTER_SET: &str = "utf-8"; /// # } /// ``` pub struct Response { - url: Option, + url: Option, status_line: String, index: ResponseStatusIndex, status: u16, headers: Vec
, unit: Option, - stream: Option, + stream: Stream, + // If this Response resulted from a redirect, the Response containing + // that redirect. + previous: Option>, } /// index into status_line where we split: HTTP/1.1 200 OK @@ -243,7 +253,7 @@ impl Response { .and_then(|l| l.parse::().ok()) }; - let stream = self.stream.expect("No reader in response?!"); + let stream = self.stream; let unit = self.unit; if let Some(unit) = &unit { let result = stream.set_read_timeout(unit.agent.config.timeout_read); @@ -382,6 +392,13 @@ impl Response { }) } + // Returns an iterator across the redirect history of this response, + // if any. The iterator starts with the response before this one. + // If this response was not redirected, the iterator is empty. + pub(crate) fn history(&self) -> Hist { + Hist::new(self.previous.as_deref()) + } + /// Create a response from a Read trait impl. /// /// This is hopefully useful for unit tests. @@ -395,16 +412,18 @@ impl Response { /// let resp = ureq::Response::do_from_read(read); /// /// assert_eq!(resp.status(), 401); - pub(crate) fn do_from_read(mut reader: impl BufRead) -> Result { + pub(crate) fn do_from_stream(stream: Stream, unit: Option) -> Result { // // HTTP/1.1 200 OK\r\n - let status_line = read_next_line(&mut reader)?; + let mut stream = + stream::DeadlineStream::new(stream, unit.as_ref().and_then(|u| u.deadline.clone())); + let status_line = read_next_line(&mut stream)?; let (index, status) = parse_status_line(status_line.as_str())?; let mut headers: Vec
= Vec::new(); loop { - let line = read_next_line(&mut reader)?; + let line = read_next_line(&mut stream)?; if line.is_empty() { break; } @@ -419,14 +438,37 @@ impl Response { index, status, headers, - unit: None, - stream: None, + unit, + stream: stream.into(), + previous: None, }) } + pub(crate) fn do_from_request( + unit: Unit, + stream: Stream, + previous: Option>, + ) -> Result { + let url = Some(unit.url.clone()); + let mut resp = Response::do_from_stream(stream, Some(unit))?; + resp.previous = previous; + resp.url = url; + Ok(resp) + } + #[cfg(test)] pub fn to_write_vec(self) -> Vec { - self.stream.unwrap().to_write_vec() + self.stream.to_write_vec() + } + + #[cfg(test)] + pub fn set_url(&mut self, url: Url) { + self.url = Some(url); + } + + #[cfg(test)] + pub fn set_previous(&mut self, previous: Arc) { + self.previous = Some(previous); } } @@ -478,20 +520,34 @@ impl FromStr for Response { /// assert_eq!(body, "Hello World!!!"); /// ``` fn from_str(s: &str) -> Result { - let mut stream = Stream::from_vec(s.as_bytes().to_owned()); - let mut resp = Self::do_from_read(&mut stream)?; - set_stream(&mut resp, "".into(), None, stream); - Ok(resp) + let stream = Stream::from_vec(s.as_bytes().to_owned()); + Self::do_from_stream(stream, None) } } -/// "Give away" Unit and Stream to the response. -/// -/// *Internal API* -pub(crate) fn set_stream(resp: &mut Response, url: String, unit: Option, stream: Stream) { - resp.url = Some(url); - resp.unit = unit; - resp.stream = Some(stream); +// Hist is an iterator over the history of a redirected response. It +// yields the URLs that were requested in backwards order, from most recent +// to least recent. +pub(crate) struct Hist<'a> { + response: Option<&'a Response>, +} + +impl<'a> Hist<'a> { + fn new(response: Option<&'a Response>) -> Hist<'a> { + Hist { response } + } +} +impl<'a> Iterator for Hist<'a> { + type Item = &'a Response; + fn next(&mut self) -> Option<&'a Response> { + let response = match self.response { + None => return None, + Some(r) => r, + }; + + self.response = response.previous.as_deref(); + return Some(response); + } } fn read_next_line(reader: &mut impl BufRead) -> io::Result { @@ -700,6 +756,24 @@ mod tests { let err = s.parse::().unwrap_err(); assert_eq!(err.kind(), ErrorKind::BadStatus); } + + #[test] + fn history() { + let mut response0 = Response::new(302, "Found", "").unwrap(); + response0.set_url("http://1.example.com/".parse().unwrap()); + assert_eq!(response0.history().count(), 0); + + let mut response1 = Response::new(302, "Found", "").unwrap(); + response1.set_url("http://2.example.com/".parse().unwrap()); + response1.set_previous(Arc::new(response0)); + + let mut response2 = Response::new(404, "NotFound", "").unwrap(); + response2.set_url("http://2.example.com/".parse().unwrap()); + response2.set_previous(Arc::new(response1)); + + let hist: Vec<&str> = response2.history().map(|r| r.get_url()).collect(); + assert_eq!(hist, ["http://2.example.com/", "http://1.example.com/"]) + } } // ErrorReader returns an error for every read. diff --git a/src/unit.rs b/src/unit.rs index 091bef7..f0d75a0 100644 --- a/src/unit.rs +++ b/src/unit.rs @@ -1,5 +1,8 @@ -use std::io::{self, Write}; use std::time; +use std::{ + io::{self, Write}, + sync::Arc, +}; use log::{debug, info}; use url::Url; @@ -19,6 +22,7 @@ use crate::Agent; /// A Unit is fully-built Request, ready to execute. /// /// *Internal API* +#[derive(Clone)] pub(crate) struct Unit { pub agent: Agent, pub method: String, @@ -163,9 +167,8 @@ impl Unit { pub(crate) fn connect( unit: Unit, use_pooled: bool, - redirect_count: u32, body: SizedReader, - redir: bool, + previous: Option>, ) -> Result { // @@ -184,14 +187,14 @@ pub(crate) fn connect( info!("sending request {} {}", method, url); } - let send_result = send_prelude(&unit, &mut stream, redir); + let send_result = send_prelude(&unit, &mut stream, previous.is_some()); if let Err(err) = send_result { if is_recycled { debug!("retrying request early {} {}: {}", method, url, err); // we try open a new connection, this time there will be // no connection in the pool. don't use it. - return connect(unit, false, redirect_count, body, redir); + return connect(unit, false, body, previous); } else { // not a pooled connection, propagate the error. return Err(err.into()); @@ -203,8 +206,7 @@ pub(crate) fn connect( body::send_body(body, unit.is_chunked, &mut stream)?; // start reading the response to process cookies and redirects. - let mut stream = stream::DeadlineStream::new(stream, unit.deadline); - let result = Response::do_from_read(&mut stream); + let result = Response::do_from_request(unit.clone(), stream, previous.clone()); // https://tools.ietf.org/html/rfc7230#section-6.3.1 // When an inbound connection is closed prematurely, a client MAY @@ -216,11 +218,11 @@ pub(crate) fn connect( // from the ConnectionPool, since those are most likely to have // reached a server-side timeout. Note that this means we may do // up to N+1 total tries, where N is max_idle_connections_per_host. - let mut resp = match result { + let resp = match result { Err(err) if err.connection_closed() && retryable && is_recycled => { debug!("retrying request {} {}: {}", method, url, err); let empty = Payload::Empty.into_read(); - return connect(unit, false, redirect_count, empty, redir); + return connect(unit, false, empty, previous); } Err(e) => return Err(e), Ok(resp) => resp, @@ -232,8 +234,10 @@ pub(crate) fn connect( // handle redirects if (300..399).contains(&resp.status()) && unit.agent.config.redirects > 0 { - if redirect_count == unit.agent.config.redirects { - return Err(ErrorKind::TooManyRedirects.new()); + if let Some(previous) = previous { + if previous.history().count() + 1 >= unit.agent.config.redirects as usize { + return Err(ErrorKind::TooManyRedirects.new()); + } } // the location header @@ -261,7 +265,7 @@ pub(crate) fn connect( Unit::new(&unit.agent, &new_method, &new_url, &unit.headers, &empty); debug!("redirect {} {} -> {}", resp.status(), url, new_url); - return connect(new_unit, use_pooled, redirect_count + 1, empty, true); + return connect(new_unit, use_pooled, empty, Some(Arc::new(resp))); } // never change the method for 307/308 // only resend the request if it cannot have a body @@ -269,7 +273,7 @@ pub(crate) fn connect( 307 | 308 if ["GET", "HEAD", "OPTIONS", "TRACE"].contains(&method.as_str()) => { let empty = Payload::Empty.into_read(); debug!("redirect {} {} -> {}", resp.status(), url, new_url); - return connect(unit, use_pooled, redirect_count - 1, empty, true); + return connect(unit, use_pooled, empty, Some(Arc::new(resp))); } _ => (), }; @@ -278,13 +282,6 @@ pub(crate) fn connect( debug!("response {} to {} {}", resp.status(), method, url); - let mut stream: Stream = stream.into(); - stream.reset()?; - - // since it is not a redirect, or we're not following redirects, - // give away the incoming stream to the response object. - crate::response::set_stream(&mut resp, unit.url.to_string(), Some(unit), stream); - // release the response Ok(resp) }