use charset encoding for send_string

This commit is contained in:
Martin Algesten
2018-06-22 11:34:39 +02:00
parent 48dcdeb92a
commit e9ccf1c2cc
4 changed files with 57 additions and 17 deletions

View File

@@ -67,7 +67,18 @@
//! .set("Transfer-Encoding", "chunked") //! .set("Transfer-Encoding", "chunked")
//! .send_string("Hello world"); //! .send_string("Hello world");
//! ``` //! ```
//!
//! # Character encoding
//!
//! For [`response.into_string()`](struct.Response.html#method.into_string) we read the
//! header `Content-Type: text/plain; charset=iso-8859-1` and if it contains a charset
//! specification, we try to decode the body using that encoding. In the absence of, or failing
//! to interpret the charset, we fall back on `utf-8`.
//!
//! Similarly when using [`.send_string()`](struct.Request.html#method.send_string), to
//! we first check if the user has set a `; charset=<whatwg charset>` and attempt
//! to encode the request body using that.
//!
extern crate ascii; extern crate ascii;
extern crate base64; extern crate base64;
extern crate chunked_transfer; extern crate chunked_transfer;

View File

@@ -47,7 +47,7 @@ impl ::std::fmt::Debug for Request {
enum Payload { enum Payload {
Empty, Empty,
Text(String), Text(String, String),
JSON(SerdeValue), JSON(SerdeValue),
Reader(Box<Read + 'static>), Reader(Box<Read + 'static>),
} }
@@ -73,8 +73,11 @@ impl Payload {
fn into_read(self) -> SizedReader { fn into_read(self) -> SizedReader {
match self { match self {
Payload::Empty => SizedReader::new(None, Box::new(empty())), Payload::Empty => SizedReader::new(None, Box::new(empty())),
Payload::Text(s) => { Payload::Text(text, charset) => {
let bytes = s.into_bytes(); let encoding = encoding_from_whatwg_label(&charset)
.or_else(|| encoding_from_whatwg_label(DEFAULT_CHARACTER_SET))
.unwrap();
let bytes = encoding.encode(&text, EncoderTrap::Replace).unwrap();
let len = bytes.len(); let len = bytes.len();
let cursor = Cursor::new(bytes); let cursor = Cursor::new(bytes);
SizedReader::new(Some(len), Box::new(cursor)) SizedReader::new(Some(len), Box::new(cursor))
@@ -183,10 +186,14 @@ impl Request {
/// ///
/// The `Content-Length` header is implicitly set to the length of the serialized value. /// The `Content-Length` header is implicitly set to the length of the serialized value.
/// ///
/// If a `Content-Type` header is present and it contains a charset specification, we
/// attempt to encode the string using that character set. If it fails, we fall back
/// on utf-8.
///
/// ``` /// ```
/// let r = ureq::post("/my_page") /// let r = ureq::post("/my_page")
/// .set("Content-Type", "text/plain") /// .set("Content-Type", "text/plain; charset=iso-8859-1")
/// .send_string("Hello World!"); /// .send_string("Hällo Wörld!");
/// println!("{:?}", r); /// println!("{:?}", r);
/// ``` /// ```
pub fn send_string<S>(&mut self, data: S) -> Response pub fn send_string<S>(&mut self, data: S) -> Response
@@ -194,7 +201,8 @@ impl Request {
S: Into<String>, S: Into<String>,
{ {
let text = data.into(); let text = data.into();
self.do_call(Payload::Text(text)) let charset = charset_from_content_type(self.header("content-type")).to_string();
self.do_call(Payload::Text(text, charset))
} }
/// Send data from a reader. /// Send data from a reader.

View File

@@ -1,7 +1,7 @@
use ascii::AsciiString; use ascii::AsciiString;
use chunked_transfer; use chunked_transfer;
use encoding::label::encoding_from_whatwg_label; use encoding::label::encoding_from_whatwg_label;
use encoding::DecoderTrap; use encoding::{DecoderTrap, EncoderTrap};
use std::io::Error as IoError; use std::io::Error as IoError;
use std::io::ErrorKind; use std::io::ErrorKind;
use std::io::Read; use std::io::Read;
@@ -208,15 +208,7 @@ impl Response {
/// assert_eq!("ISO-8859-1", resp.charset()); /// assert_eq!("ISO-8859-1", resp.charset());
/// ``` /// ```
pub fn charset(&self) -> &str { pub fn charset(&self) -> &str {
self.header("content-type") charset_from_content_type(self.header("content-type"))
.and_then(|header| {
header.find(";").and_then(|semi| {
(&header[semi + 1..])
.find("=")
.map(|equal| (&header[semi + equal + 2..]).trim())
})
})
.unwrap_or(DEFAULT_CHARACTER_SET)
} }
/// Turn this response into a `impl Read` of the body. /// Turn this response into a `impl Read` of the body.
@@ -476,3 +468,16 @@ impl Read for LimitedRead {
} }
} }
} }
fn charset_from_content_type(header: Option<&str>) -> &str {
header
.and_then(|header| {
header.find(";").and_then(|semi| {
(&header[semi + 1..])
.find("=")
.map(|equal| (&header[semi + equal + 2..]).trim())
})
})
.unwrap_or(DEFAULT_CHARACTER_SET)
}

View File

@@ -55,3 +55,19 @@ fn content_length_and_chunked() {
assert!(s.contains("Transfer-Encoding: chunked\r\n")); assert!(s.contains("Transfer-Encoding: chunked\r\n"));
assert!(!s.contains("\r\nContent-Length:\r\n")); assert!(!s.contains("\r\nContent-Length:\r\n"));
} }
#[test]
fn str_with_encoding() {
test::set_handler("/str_with_encoding", |_req, _url| {
test::make_response(200, "OK", vec![], vec![])
});
let resp = post("test://host/str_with_encoding")
.set("Content-Type", "text/plain; charset=iso-8859-1")
.send_string("Hällo Wörld!!!");
let vec = resp.to_write_vec();
assert_eq!(
&vec[vec.len() - 14..],
//H ä l l o _ W ö r l d ! ! !
[72, 228, 108, 108, 111, 32, 87, 246, 114, 108, 100, 33, 33, 33]
);
}