diff --git a/src/lib.rs b/src/lib.rs index 1d1d4dc..e865c95 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -67,7 +67,18 @@ //! .set("Transfer-Encoding", "chunked") //! .send_string("Hello world"); //! ``` - +//! +//! # Character encoding +//! +//! For [`response.into_string()`](struct.Response.html#method.into_string) we read the +//! header `Content-Type: text/plain; charset=iso-8859-1` and if it contains a charset +//! specification, we try to decode the body using that encoding. In the absence of, or failing +//! to interpret the charset, we fall back on `utf-8`. +//! +//! Similarly when using [`.send_string()`](struct.Request.html#method.send_string), to +//! we first check if the user has set a `; charset=` and attempt +//! to encode the request body using that. +//! extern crate ascii; extern crate base64; extern crate chunked_transfer; diff --git a/src/request.rs b/src/request.rs index bba720d..0b0fe4e 100644 --- a/src/request.rs +++ b/src/request.rs @@ -47,7 +47,7 @@ impl ::std::fmt::Debug for Request { enum Payload { Empty, - Text(String), + Text(String, String), JSON(SerdeValue), Reader(Box), } @@ -73,8 +73,11 @@ impl Payload { fn into_read(self) -> SizedReader { match self { Payload::Empty => SizedReader::new(None, Box::new(empty())), - Payload::Text(s) => { - let bytes = s.into_bytes(); + Payload::Text(text, charset) => { + let encoding = encoding_from_whatwg_label(&charset) + .or_else(|| encoding_from_whatwg_label(DEFAULT_CHARACTER_SET)) + .unwrap(); + let bytes = encoding.encode(&text, EncoderTrap::Replace).unwrap(); let len = bytes.len(); let cursor = Cursor::new(bytes); SizedReader::new(Some(len), Box::new(cursor)) @@ -183,10 +186,14 @@ impl Request { /// /// The `Content-Length` header is implicitly set to the length of the serialized value. /// + /// If a `Content-Type` header is present and it contains a charset specification, we + /// attempt to encode the string using that character set. If it fails, we fall back + /// on utf-8. + /// /// ``` /// let r = ureq::post("/my_page") - /// .set("Content-Type", "text/plain") - /// .send_string("Hello World!"); + /// .set("Content-Type", "text/plain; charset=iso-8859-1") + /// .send_string("Hällo Wörld!"); /// println!("{:?}", r); /// ``` pub fn send_string(&mut self, data: S) -> Response @@ -194,7 +201,8 @@ impl Request { S: Into, { let text = data.into(); - self.do_call(Payload::Text(text)) + let charset = charset_from_content_type(self.header("content-type")).to_string(); + self.do_call(Payload::Text(text, charset)) } /// Send data from a reader. diff --git a/src/response.rs b/src/response.rs index 3953a83..83937c5 100644 --- a/src/response.rs +++ b/src/response.rs @@ -1,7 +1,7 @@ use ascii::AsciiString; use chunked_transfer; use encoding::label::encoding_from_whatwg_label; -use encoding::DecoderTrap; +use encoding::{DecoderTrap, EncoderTrap}; use std::io::Error as IoError; use std::io::ErrorKind; use std::io::Read; @@ -208,15 +208,7 @@ impl Response { /// assert_eq!("ISO-8859-1", resp.charset()); /// ``` pub fn charset(&self) -> &str { - self.header("content-type") - .and_then(|header| { - header.find(";").and_then(|semi| { - (&header[semi + 1..]) - .find("=") - .map(|equal| (&header[semi + equal + 2..]).trim()) - }) - }) - .unwrap_or(DEFAULT_CHARACTER_SET) + charset_from_content_type(self.header("content-type")) } /// Turn this response into a `impl Read` of the body. @@ -476,3 +468,16 @@ impl Read for LimitedRead { } } } + +fn charset_from_content_type(header: Option<&str>) -> &str { + header + .and_then(|header| { + header.find(";").and_then(|semi| { + (&header[semi + 1..]) + .find("=") + .map(|equal| (&header[semi + equal + 2..]).trim()) + }) + }) + .unwrap_or(DEFAULT_CHARACTER_SET) +} + diff --git a/src/test/body_send.rs b/src/test/body_send.rs index 3fe252c..64770a4 100644 --- a/src/test/body_send.rs +++ b/src/test/body_send.rs @@ -55,3 +55,19 @@ fn content_length_and_chunked() { assert!(s.contains("Transfer-Encoding: chunked\r\n")); assert!(!s.contains("\r\nContent-Length:\r\n")); } + +#[test] +fn str_with_encoding() { + test::set_handler("/str_with_encoding", |_req, _url| { + test::make_response(200, "OK", vec![], vec![]) + }); + let resp = post("test://host/str_with_encoding") + .set("Content-Type", "text/plain; charset=iso-8859-1") + .send_string("Hällo Wörld!!!"); + let vec = resp.to_write_vec(); + assert_eq!( + &vec[vec.len() - 14..], + //H ä l l o _ W ö r l d ! ! ! + [72, 228, 108, 108, 111, 32, 87, 246, 114, 108, 100, 33, 33, 33] + ); +}