Handle non-utf8 status and headers
Non-utf8 headers are ignored and reading the value for them will yield `None`.
This commit is contained in:
203
src/header.rs
203
src/header.rs
@@ -1,6 +1,66 @@
|
||||
use crate::error::{Error, ErrorKind};
|
||||
use std::fmt;
|
||||
use std::str::FromStr;
|
||||
use std::str::{from_utf8, FromStr};
|
||||
|
||||
/// Since a status line or header can contain non-utf8 characters the
|
||||
/// backing store is a `Vec<u8>`
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub(crate) struct HeaderLine(Vec<u8>);
|
||||
|
||||
impl From<String> for HeaderLine {
|
||||
fn from(s: String) -> Self {
|
||||
HeaderLine(s.into_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<u8>> for HeaderLine {
|
||||
fn from(b: Vec<u8>) -> Self {
|
||||
HeaderLine(b)
|
||||
}
|
||||
}
|
||||
|
||||
impl HeaderLine {
|
||||
pub fn into_string_lossy(self) -> String {
|
||||
// Try to avoid an extra allcation.
|
||||
String::from_utf8(self.0)
|
||||
.unwrap_or_else(|e| String::from_utf8_lossy(&e.into_bytes()).to_string())
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
fn as_bytes(&self) -> &[u8] {
|
||||
&self.0
|
||||
}
|
||||
|
||||
pub fn into_header(self) -> Result<Header, Error> {
|
||||
// The header name should always be ascii, we can read anything up to the
|
||||
// ':' delimiter byte-by-byte.
|
||||
let mut index = 0;
|
||||
|
||||
for c in self.as_bytes() {
|
||||
if *c == b':' {
|
||||
break;
|
||||
}
|
||||
if !is_tchar(c) {
|
||||
return Err(Error::new(
|
||||
ErrorKind::BadHeader,
|
||||
Some(format!("Invalid char ({:0x?}) while looking for ':'", *c)),
|
||||
));
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
|
||||
Ok(Header { line: self, index })
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for HeaderLine {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}", String::from_utf8_lossy(&self.0))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
/// Wrapper type for a header field.
|
||||
@@ -8,7 +68,7 @@ use std::str::FromStr;
|
||||
pub struct Header {
|
||||
// Line contains the unmodified bytes of single header field.
|
||||
// It does not contain the final CRLF.
|
||||
line: String,
|
||||
line: HeaderLine,
|
||||
// Index is the position of the colon within the header field.
|
||||
// Invariant: index > 0
|
||||
// Invariant: index + 1 < line.len()
|
||||
@@ -23,19 +83,53 @@ impl fmt::Debug for Header {
|
||||
|
||||
impl Header {
|
||||
pub fn new(name: &str, value: &str) -> Self {
|
||||
let line = format!("{}: {}", name, value);
|
||||
let line = format!("{}: {}", name, value).into();
|
||||
let index = name.len();
|
||||
Header { line, index }
|
||||
}
|
||||
|
||||
/// The header name.
|
||||
pub fn name(&self) -> &str {
|
||||
&self.line.as_str()[0..self.index]
|
||||
let bytes = &self.line.as_bytes()[0..self.index];
|
||||
// Since we validate the header name in HeaderLine::into_header, we
|
||||
// are guaranteed it is valid utf-8 at this point.
|
||||
from_utf8(bytes).expect("Legal chars in header name")
|
||||
}
|
||||
|
||||
/// The header value.
|
||||
pub fn value(&self) -> &str {
|
||||
&self.line.as_str()[self.index + 1..].trim()
|
||||
///
|
||||
/// For non-utf8 headers this returns None (use [`Header::value_raw()`]).
|
||||
pub fn value(&self) -> Option<&str> {
|
||||
let bytes = &self.line.as_bytes()[self.index + 1..];
|
||||
from_utf8(bytes)
|
||||
.map(|s| s.trim())
|
||||
.ok()
|
||||
// ensure all bytes are valid field name.
|
||||
.filter(|s| s.as_bytes().iter().all(is_field_vchar_or_obs_fold))
|
||||
}
|
||||
|
||||
/// The header value as a byte slice.
|
||||
///
|
||||
/// For legacy reasons, the HTTP spec allows headers to be non-ascii characters.
|
||||
/// Typically such headers are encoded in a non-utf8 encoding (such as iso-8859-1).
|
||||
///
|
||||
/// ureq can't know what encoding the header is in, but this function provides
|
||||
/// an escape hatch for users that need to handle such headers.
|
||||
pub fn value_raw(&self) -> &[u8] {
|
||||
let mut bytes = &self.line.as_bytes()[self.index + 1..];
|
||||
|
||||
if !bytes.is_empty() {
|
||||
// trim front
|
||||
while !bytes.is_empty() && bytes[0].is_ascii_whitespace() {
|
||||
bytes = &bytes[1..];
|
||||
}
|
||||
// trim back
|
||||
while !bytes.is_empty() && bytes[bytes.len() - 1].is_ascii_whitespace() {
|
||||
bytes = &bytes[..(bytes.len() - 1)];
|
||||
}
|
||||
}
|
||||
|
||||
bytes
|
||||
}
|
||||
|
||||
/// Compares the given str to the header name ignoring case.
|
||||
@@ -44,7 +138,11 @@ impl Header {
|
||||
}
|
||||
|
||||
pub(crate) fn validate(&self) -> Result<(), Error> {
|
||||
if !valid_name(self.name()) || !valid_value(&self.line.as_str()[self.index + 1..]) {
|
||||
let bytes = self.line.as_bytes();
|
||||
let name_raw = &bytes[0..self.index];
|
||||
let value_raw = &bytes[self.index + 1..];
|
||||
|
||||
if !valid_name(name_raw) || !valid_value(value_raw) {
|
||||
Err(ErrorKind::BadHeader.msg(&format!("invalid header '{}'", self.line)))
|
||||
} else {
|
||||
Ok(())
|
||||
@@ -53,14 +151,17 @@ impl Header {
|
||||
}
|
||||
|
||||
pub fn get_header<'a, 'b>(headers: &'b [Header], name: &'a str) -> Option<&'b str> {
|
||||
headers.iter().find(|h| h.is_name(name)).map(|h| h.value())
|
||||
headers
|
||||
.iter()
|
||||
.find(|h| h.is_name(name))
|
||||
.and_then(|h| h.value())
|
||||
}
|
||||
|
||||
pub fn get_all_headers<'a, 'b>(headers: &'b [Header], name: &'a str) -> Vec<&'b str> {
|
||||
headers
|
||||
.iter()
|
||||
.filter(|h| h.is_name(name))
|
||||
.map(|h| h.value())
|
||||
.filter_map(|h| h.value())
|
||||
.collect()
|
||||
}
|
||||
|
||||
@@ -84,12 +185,12 @@ pub fn add_header(headers: &mut Vec<Header>, header: Header) {
|
||||
// token = 1*tchar
|
||||
// tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
|
||||
// "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
|
||||
fn valid_name(name: &str) -> bool {
|
||||
!name.is_empty() && name.bytes().all(is_tchar)
|
||||
fn valid_name(name: &[u8]) -> bool {
|
||||
!name.is_empty() && name.iter().all(is_tchar)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_tchar(b: u8) -> bool {
|
||||
fn is_tchar(b: &u8) -> bool {
|
||||
match b {
|
||||
b'!' | b'#' | b'$' | b'%' | b'&' => true,
|
||||
b'\'' | b'*' | b'+' | b'-' | b'.' => true,
|
||||
@@ -112,12 +213,12 @@ fn is_tchar(b: u8) -> bool {
|
||||
// https://tools.ietf.org/html/rfc5234#appendix-B.1
|
||||
// VCHAR = %x21-7E
|
||||
// ; visible (printing) characters
|
||||
fn valid_value(value: &str) -> bool {
|
||||
value.bytes().all(is_field_vchar_or_obs_fold)
|
||||
fn valid_value(value: &[u8]) -> bool {
|
||||
value.iter().all(is_field_vchar_or_obs_fold)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_field_vchar_or_obs_fold(b: u8) -> bool {
|
||||
fn is_field_vchar_or_obs_fold(b: &u8) -> bool {
|
||||
match b {
|
||||
b' ' | b'\t' => true,
|
||||
0x21..=0x7E => true,
|
||||
@@ -129,17 +230,10 @@ impl FromStr for Header {
|
||||
type Err = Error;
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
//
|
||||
let line = s.to_string();
|
||||
let index = s
|
||||
.find(':')
|
||||
.ok_or_else(|| ErrorKind::BadHeader.msg("no colon in header"))?;
|
||||
let line: HeaderLine = s.to_string().into();
|
||||
|
||||
// no value?
|
||||
if index >= s.len() {
|
||||
return Err(ErrorKind::BadHeader.msg("no value in header"));
|
||||
}
|
||||
let header = line.into_header()?;
|
||||
|
||||
let header = Header { line, index };
|
||||
header.validate()?;
|
||||
Ok(header)
|
||||
}
|
||||
@@ -151,27 +245,27 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn test_valid_name() {
|
||||
assert!(valid_name("example"));
|
||||
assert!(valid_name("Content-Type"));
|
||||
assert!(valid_name("h-123456789"));
|
||||
assert!(!valid_name("Content-Type:"));
|
||||
assert!(!valid_name("Content-Type "));
|
||||
assert!(!valid_name(" some-header"));
|
||||
assert!(!valid_name("\"invalid\""));
|
||||
assert!(!valid_name("Gödel"));
|
||||
assert!(valid_name(b"example"));
|
||||
assert!(valid_name(b"Content-Type"));
|
||||
assert!(valid_name(b"h-123456789"));
|
||||
assert!(!valid_name(b"Content-Type:"));
|
||||
assert!(!valid_name(b"Content-Type "));
|
||||
assert!(!valid_name(b" some-header"));
|
||||
assert!(!valid_name(b"\"invalid\""));
|
||||
assert!(!valid_name(b"G\xf6del"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_valid_value() {
|
||||
assert!(valid_value("example"));
|
||||
assert!(valid_value("foo bar"));
|
||||
assert!(valid_value(" foobar "));
|
||||
assert!(valid_value(" foo\tbar "));
|
||||
assert!(valid_value(" foo~"));
|
||||
assert!(valid_value(" !bar"));
|
||||
assert!(valid_value(" "));
|
||||
assert!(!valid_value(" \nfoo"));
|
||||
assert!(!valid_value("foo\x7F"));
|
||||
assert!(valid_value(b"example"));
|
||||
assert!(valid_value(b"foo bar"));
|
||||
assert!(valid_value(b" foobar "));
|
||||
assert!(valid_value(b" foo\tbar "));
|
||||
assert!(valid_value(b" foo~"));
|
||||
assert!(valid_value(b" !bar"));
|
||||
assert!(valid_value(b" "));
|
||||
assert!(!valid_value(b" \nfoo"));
|
||||
assert!(!valid_value(b"foo\x7F"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -197,25 +291,46 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "charset")]
|
||||
fn test_parse_non_utf8_value() {
|
||||
let (cow, _, _) = encoding_rs::WINDOWS_1252.encode("x-geo-stuff: älvsjö ");
|
||||
let bytes = cow.to_vec();
|
||||
let line: HeaderLine = bytes.into();
|
||||
let header = line.into_header().unwrap();
|
||||
assert_eq!(header.name(), "x-geo-stuff");
|
||||
assert_eq!(header.value(), None);
|
||||
assert_eq!(header.value_raw(), [228, 108, 118, 115, 106, 246]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_value() {
|
||||
let h = "foo:".parse::<Header>().unwrap();
|
||||
assert_eq!(h.value(), "");
|
||||
assert_eq!(h.value(), Some(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn value_with_whitespace() {
|
||||
let h = "foo: bar ".parse::<Header>().unwrap();
|
||||
assert_eq!(h.value(), "bar");
|
||||
assert_eq!(h.value(), Some("bar"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn name_and_value() {
|
||||
let header: Header = "X-Forwarded-For: 127.0.0.1".parse().unwrap();
|
||||
assert_eq!("X-Forwarded-For", header.name());
|
||||
assert_eq!("127.0.0.1", header.value());
|
||||
assert_eq!(header.value(), Some("127.0.0.1"));
|
||||
assert!(header.is_name("X-Forwarded-For"));
|
||||
assert!(header.is_name("x-forwarded-for"));
|
||||
assert!(header.is_name("X-FORWARDED-FOR"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_iso8859_utf8_mixup() {
|
||||
// C2 A5 is ¥ in UTF-8 and Â¥ in ISO-8859-1
|
||||
let b = "header: \0xc2\0xa5".to_string().into_bytes();
|
||||
let l: HeaderLine = b.into();
|
||||
let h = l.into_header().unwrap();
|
||||
assert_eq!(h.value(), None);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ use chunked_transfer::Decoder as ChunkDecoder;
|
||||
use url::Url;
|
||||
|
||||
use crate::error::{Error, ErrorKind::BadStatus};
|
||||
use crate::header::Header;
|
||||
use crate::header::{get_all_headers, get_header, Header, HeaderLine};
|
||||
use crate::pool::PoolReturnRead;
|
||||
use crate::stream::{DeadlineStream, Stream};
|
||||
use crate::unit::Unit;
|
||||
@@ -119,20 +119,31 @@ impl Response {
|
||||
}
|
||||
|
||||
/// The status text: `OK`
|
||||
///
|
||||
/// The HTTP spec allows for non-utf8 status texts. This uses from_utf8_lossy to
|
||||
/// convert such lines to &str.
|
||||
pub fn status_text(&self) -> &str {
|
||||
&self.status_line.as_str()[self.index.response_code + 1..].trim()
|
||||
}
|
||||
|
||||
/// The header corresponding header value for the give name, if any.
|
||||
/// The header value for the given name, or None if not found.
|
||||
///
|
||||
/// For historical reasons, the HTTP spec allows for header values
|
||||
/// to be encoded using encodigs like iso-8859-1. Such encodings
|
||||
/// means the values are not possible to interpret as utf-8.
|
||||
///
|
||||
/// In case the header value can't be read as utf-8, this function
|
||||
/// returns `None` (while the name is visible in [`Response::headers_names()`]).
|
||||
pub fn header(&self, name: &str) -> Option<&str> {
|
||||
self.headers
|
||||
.iter()
|
||||
.find(|h| h.is_name(name))
|
||||
.map(|h| h.value())
|
||||
get_header(&self.headers, name)
|
||||
}
|
||||
|
||||
/// A list of the header names in this response.
|
||||
/// Lowercased to be uniform.
|
||||
///
|
||||
/// It's possible for a header name to be returned by this function, and
|
||||
/// still give a `None` value. See [`Response::header()`] for an explanation
|
||||
/// as to why.
|
||||
pub fn headers_names(&self) -> Vec<String> {
|
||||
self.headers
|
||||
.iter()
|
||||
@@ -147,11 +158,7 @@ impl Response {
|
||||
|
||||
/// All headers corresponding values for the give name, or empty vector.
|
||||
pub fn all(&self, name: &str) -> Vec<&str> {
|
||||
self.headers
|
||||
.iter()
|
||||
.filter(|h| h.is_name(name))
|
||||
.map(|h| h.value())
|
||||
.collect()
|
||||
get_all_headers(&self.headers, name)
|
||||
}
|
||||
|
||||
/// The content type part of the "Content-Type" header without
|
||||
@@ -414,8 +421,9 @@ impl Response {
|
||||
// HTTP/1.1 200 OK\r\n
|
||||
let mut stream =
|
||||
stream::DeadlineStream::new(stream, unit.as_ref().and_then(|u| u.deadline));
|
||||
let status_line = read_next_line(&mut stream, "the status line")?;
|
||||
|
||||
// The status line we can ignore non-utf8 chars and parse as_str_lossy().
|
||||
let status_line = read_next_line(&mut stream, "the status line")?.into_string_lossy();
|
||||
let (index, status) = parse_status_line(status_line.as_str())?;
|
||||
|
||||
let mut headers: Vec<Header> = Vec::new();
|
||||
@@ -424,7 +432,7 @@ impl Response {
|
||||
if line.is_empty() {
|
||||
break;
|
||||
}
|
||||
if let Ok(header) = line.as_str().parse::<Header>() {
|
||||
if let Ok(header) = line.into_header() {
|
||||
headers.push(header);
|
||||
}
|
||||
}
|
||||
@@ -539,22 +547,13 @@ impl FromStr for Response {
|
||||
}
|
||||
}
|
||||
|
||||
fn read_next_line(reader: &mut impl BufRead, context: &str) -> io::Result<String> {
|
||||
let mut s = String::new();
|
||||
let result = reader.read_line(&mut s);
|
||||
fn read_next_line(reader: &mut impl BufRead, context: &str) -> io::Result<HeaderLine> {
|
||||
let mut buf = Vec::new();
|
||||
let result = reader.read_until(b'\n', &mut buf);
|
||||
|
||||
if let Err(e) = result {
|
||||
// Provide context to errors encountered while reading the line.
|
||||
// ureq does not currently handle non-ascii status lines and
|
||||
// header values. For historical reasons, the HTTP spec does
|
||||
// allow for characters in the range 0x80-0xff, but these are
|
||||
// very rarely encountered in the wild.
|
||||
// See https://github.com/algesten/ureq/issues/320
|
||||
let reason = if e.kind() == io::ErrorKind::InvalidData {
|
||||
format!("Invalid data in {}", context)
|
||||
} else {
|
||||
format!("Error encountered in {}", context)
|
||||
};
|
||||
let reason = format!("Error encountered in {}", context);
|
||||
|
||||
let kind = e.kind();
|
||||
|
||||
@@ -572,17 +571,19 @@ fn read_next_line(reader: &mut impl BufRead, context: &str) -> io::Result<String
|
||||
));
|
||||
}
|
||||
|
||||
if !s.ends_with("\n") {
|
||||
if !buf.ends_with(b"\n") {
|
||||
return Err(io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
format!("Header field didn't end with \\n: {}", s),
|
||||
format!("Header field didn't end with \\n: {:?}", buf),
|
||||
));
|
||||
}
|
||||
s.pop();
|
||||
if s.ends_with("\r") {
|
||||
s.pop();
|
||||
|
||||
buf.pop();
|
||||
if buf.ends_with(b"\r") {
|
||||
buf.pop();
|
||||
}
|
||||
Ok(s)
|
||||
|
||||
Ok(buf.into())
|
||||
}
|
||||
|
||||
/// Limits a `Read` to a content size (as set by a "Content-Length" header).
|
||||
@@ -808,12 +809,24 @@ mod tests {
|
||||
let bytes = cow.to_vec();
|
||||
let mut reader = io::BufReader::new(io::Cursor::new(bytes));
|
||||
let r = read_next_line(&mut reader, "test status line");
|
||||
let e = r.unwrap_err();
|
||||
assert_eq!(e.kind(), io::ErrorKind::InvalidData);
|
||||
assert_eq!(
|
||||
e.to_string(),
|
||||
"Network Error: Invalid data in test status line: stream did not contain valid UTF-8"
|
||||
let h = r.unwrap();
|
||||
assert_eq!(h.to_string(), "HTTP/1.1 302 D<>plac<61> Temporairement");
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(feature = "charset")]
|
||||
fn parse_header_with_non_utf8() {
|
||||
let (cow, _, _) = encoding_rs::WINDOWS_1252.encode(
|
||||
"HTTP/1.1 200 OK\r\n\
|
||||
x-geo-header: gött mos!\r\n\
|
||||
\r\n\
|
||||
OK",
|
||||
);
|
||||
let v = cow.to_vec();
|
||||
let s = Stream::from_vec(v);
|
||||
let resp = Response::do_from_stream(s.into(), None).unwrap();
|
||||
assert_eq!(resp.status(), 200);
|
||||
assert_eq!(resp.header("x-geo-header"), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -386,7 +386,9 @@ fn send_prelude(unit: &Unit, stream: &mut Stream, redir: bool) -> io::Result<()>
|
||||
// other headers
|
||||
for header in &unit.headers {
|
||||
if !redir || !header.is_name("Authorization") {
|
||||
write!(prelude, "{}: {}\r\n", header.name(), header.value())?;
|
||||
if let Some(v) = header.value() {
|
||||
write!(prelude, "{}: {}\r\n", header.name(), v)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user