From 4b95d4d29e72012f7365e31331a17f8ca496e1ec Mon Sep 17 00:00:00 2001 From: Jacob Hoffman-Andrews Date: Sun, 27 Sep 2020 22:54:56 -0700 Subject: [PATCH] Cookie refactor --- src/agent.rs | 15 ++++- src/unit.rs | 183 ++++++++++++++++++++++++++++++--------------------- 2 files changed, 121 insertions(+), 77 deletions(-) diff --git a/src/agent.rs b/src/agent.rs index 1aa5929..e9bf406 100644 --- a/src/agent.rs +++ b/src/agent.rs @@ -52,6 +52,7 @@ pub(crate) struct AgentState { /// Reused connections between requests. pub(crate) pool: ConnectionPool, /// Cookies saved between requests. + /// Invariant: All cookies must have a nonempty domain and path. #[cfg(feature = "cookie")] pub(crate) jar: CookieJar, pub(crate) resolver: ArcResolver, @@ -237,11 +238,23 @@ impl Agent { /// ``` /// let agent = ureq::agent(); /// - /// let cookie = ureq::Cookie::new("name", "value"); + /// let cookie = ureq::Cookie::build("name", "value") + /// .domain("example.com") + /// .path("/") + /// .secure(true) + /// .finish(); /// agent.set_cookie(cookie); /// ``` #[cfg(feature = "cookie")] pub fn set_cookie(&self, cookie: Cookie<'static>) { + if cookie.domain().is_none() { + return; + } + let mut cookie = cookie.clone(); + if cookie.path().is_none() { + cookie.set_path("/"); + } + let mut state = self.state.lock().unwrap(); state.jar.add_original(cookie); } diff --git a/src/unit.rs b/src/unit.rs index 506bd83..4f071a0 100644 --- a/src/unit.rs +++ b/src/unit.rs @@ -48,10 +48,6 @@ impl Unit { let query_string = combine_query(&url, &req.query, mix_queries); - let cookie_header: Option
= url - .host_str() - .and_then(|host_str| extract_cookies(&req.agent, &url.scheme(), host_str, &url.path())); - let extra_headers = { let mut extra = vec![]; @@ -83,12 +79,15 @@ impl Unit { extra.push(Header::new("Authorization", &format!("Basic {}", encoded))); } + #[cfg(feature = "cookie")] + extra.extend(extract_cookies(&req.agent, &url).into_iter()); + extra }; + let headers: Vec<_> = req .headers .iter() - .chain(cookie_header.iter()) .chain(extra_headers.iter()) .cloned() .collect(); @@ -191,6 +190,7 @@ pub(crate) fn connect( } // squirrel away cookies + #[cfg(feature = "cookie")] save_cookies(&unit, &resp); // handle redirects @@ -238,62 +238,79 @@ pub(crate) fn connect( } #[cfg(feature = "cookie")] -fn extract_cookies( - state: &std::sync::Mutex, - scheme: &str, - host: &str, - path: &str, -) -> Option
{ +fn extract_cookies(state: &std::sync::Mutex, url: &Url) -> Option
{ + // We specifically use url.domain() here because cookies cannot be + // set for IP addresses. + let domain = match url.domain() { + Some(d) => d, + None => return None, + }; + let path = url.path(); + let is_secure = url.scheme().eq_ignore_ascii_case("https"); + let state = state.lock().unwrap(); - let is_secure = scheme.eq_ignore_ascii_case("https"); - - match_cookies(&state.jar, host, path, is_secure) + match_cookies(&state.jar, domain, path, is_secure) } -#[cfg(not(feature = "cookie"))] -fn extract_cookies( - _state: &std::sync::Mutex, - _scheme: &str, - _host: &str, - _path: &str, -) -> Option
{ - None +// Return true iff the string domain-matches the domain. +// This function must only be called on hostnames, not IP addresses. +// +// https://tools.ietf.org/html/rfc6265#section-5.1.3 +// A string domain-matches a given domain string if at least one of the +// following conditions hold: +// +// o The domain string and the string are identical. (Note that both +// the domain string and the string will have been canonicalized to +// lower case at this point.) +// o All of the following conditions hold: +// * The domain string is a suffix of the string. +// * The last character of the string that is not included in the +// domain string is a %x2E (".") character. +// * The string is a host name (i.e., not an IP address). +#[cfg(feature = "cookie")] +fn domain_match(s: &str, domain: &str) -> bool { + match s.strip_suffix(domain) { + Some("") => true, // domain and string are identical. + Some(remains) => remains.ends_with('.'), + None => false, // domain was not a suffix of string. + } +} + +// Return true iff the request-path path-matches the cookie-path. +// https://tools.ietf.org/html/rfc6265#section-5.1.4 +// A request-path path-matches a given cookie-path if at least one of +// the following conditions holds: +// +// o The cookie-path and the request-path are identical. +// o The cookie-path is a prefix of the request-path, and the last +// character of the cookie-path is %x2F ("/"). +// o The cookie-path is a prefix of the request-path, and the first +// character of the request-path that is not included in the cookie- +// path is a %x2F ("/") character. +#[cfg(feature = "cookie")] +fn path_match(request_path: &str, cookie_path: &str) -> bool { + match request_path.strip_prefix(cookie_path) { + Some("") => true, // cookie path and request path were identical. + Some(remains) => cookie_path.ends_with('/') || remains.starts_with('/'), + None => false, // cookie path was not a prefix of request path + } } -// TODO check so cookies can't be set for tld:s #[cfg(feature = "cookie")] fn match_cookies(jar: &CookieJar, domain: &str, path: &str, is_secure: bool) -> Option
{ - Some( - jar.iter() - .filter(|c| { - // if there is a domain, it must be matched. - // if there is no domain, then ignore cookie - let domain_ok = c - .domain() - .map(|cdom| domain.contains(cdom)) - .unwrap_or(false); - // a path must match the beginning of request path. - // no cookie path, we say is ok. is it?! - let path_ok = c - .path() - .map(|cpath| path.find(cpath).map(|pos| pos == 0).unwrap_or(false)) - .unwrap_or(true); - // either the cookie isnt secure, or we're not doing a secure request. - let secure_ok = !c.secure().unwrap_or(false) || is_secure; - - domain_ok && path_ok && secure_ok - }) - .map(|c| { - let name = c.name().to_string(); - let value = c.value().to_string(); - let nameval = Cookie::new(name, value).encoded().to_string(); - nameval - }) - .collect::>() - .join(";"), - ) - .filter(|x| !x.is_empty()) - .map(|s| Header::new("Cookie", &s)) + let header_value = jar + .iter() + .filter(|c| domain_match(domain, c.domain().unwrap())) + .filter(|c| path_match(path, c.path().unwrap())) + .filter(|c| is_secure || !c.secure().unwrap_or(false)) + // Create a new cookie with just the name and value so we don't send attributes. + .map(|c| Cookie::new(c.name(), c.value()).encoded().to_string()) + .collect::>() + .join(";"); + match header_value.as_str() { + "" => None, + val => Some(Header::new("Cookie", val)), + } } /// Combine the query of the url and the query options set on the request object. @@ -398,35 +415,49 @@ fn send_prelude(unit: &Unit, stream: &mut Stream, redir: bool) -> io::Result<()> Ok(()) } -#[cfg(not(feature = "cookie"))] -fn save_cookies(_unit: &Unit, _resp: &Response) {} - /// Investigate a response for "Set-Cookie" headers. #[cfg(feature = "cookie")] fn save_cookies(unit: &Unit, resp: &Response) { // - let cookies = resp.all("set-cookie"); - if cookies.is_empty() { + // Specifically use domain here because IPs cannot have cookies. + let request_domain = match unit.url.domain() { + Some(d) => d.to_ascii_lowercase(), + None => return, + }; + let headers = resp.all("set-cookie"); + // Avoid locking if there are no cookie headers + if headers.is_empty() { return; } - - // only lock if we know there is something to process - let state = &mut unit.req.agent.lock().unwrap(); - for raw_cookie in cookies.iter() { - let to_parse = if raw_cookie.to_lowercase().contains("domain=") { - (*raw_cookie).to_string() - } else { - let host = &unit.url.host_str().unwrap().to_string(); - format!("{}; Domain={}", raw_cookie, host) + let cookies = headers.into_iter().flat_map(|header_value| { + let mut cookie = match Cookie::parse_encoded(header_value) { + Err(_) => return None, + Ok(c) => c, }; - match Cookie::parse_encoded(&to_parse[..]) { - Err(_) => (), // ignore unparseable cookies - Ok(cookie) => { - let cookie = cookie.into_owned(); - state.jar.add(cookie) - } + // Canonicalize the cookie domain, check that it matches the request, + // and store it back in the cookie. + // https://tools.ietf.org/html/rfc6265#section-5.3, Item 6 + // Summary: If domain is empty, set it from the request and + // set the host_only flag. + // TODO: store a host_only flag. + // TODO: Check so cookies can't be set for TLDs. + let cookie_domain = match cookie.domain() { + None => request_domain.clone(), + Some(d) if domain_match(&request_domain, &d) => d.to_ascii_lowercase(), + Some(_) => return None, + }; + cookie.set_domain(cookie_domain); + if cookie.path().is_none() { + cookie.set_path("/"); } + Some(cookie) + }); + let state = &mut unit.req.agent.lock().unwrap(); + for c in cookies { + assert!(c.domain().is_some()); + assert!(c.path().is_some()); + state.jar.add(c.into_owned()); } } @@ -448,8 +479,8 @@ mod tests { #[test] fn match_cookies_returns_one_header() { let mut jar = CookieJar::new(); - let cookie1 = Cookie::parse("cookie1=value1; Domain=crates.io").unwrap(); - let cookie2 = Cookie::parse("cookie2=value2; Domain=crates.io").unwrap(); + let cookie1 = Cookie::parse("cookie1=value1; Domain=crates.io; Path=/").unwrap(); + let cookie2 = Cookie::parse("cookie2=value2; Domain=crates.io; Path=/").unwrap(); jar.add(cookie1); jar.add(cookie2);