use charset encoding for send_string
This commit is contained in:
13
src/lib.rs
13
src/lib.rs
@@ -67,7 +67,18 @@
|
||||
//! .set("Transfer-Encoding", "chunked")
|
||||
//! .send_string("Hello world");
|
||||
//! ```
|
||||
|
||||
//!
|
||||
//! # Character encoding
|
||||
//!
|
||||
//! For [`response.into_string()`](struct.Response.html#method.into_string) we read the
|
||||
//! header `Content-Type: text/plain; charset=iso-8859-1` and if it contains a charset
|
||||
//! specification, we try to decode the body using that encoding. In the absence of, or failing
|
||||
//! to interpret the charset, we fall back on `utf-8`.
|
||||
//!
|
||||
//! Similarly when using [`.send_string()`](struct.Request.html#method.send_string), to
|
||||
//! we first check if the user has set a `; charset=<whatwg charset>` and attempt
|
||||
//! to encode the request body using that.
|
||||
//!
|
||||
extern crate ascii;
|
||||
extern crate base64;
|
||||
extern crate chunked_transfer;
|
||||
|
||||
@@ -47,7 +47,7 @@ impl ::std::fmt::Debug for Request {
|
||||
|
||||
enum Payload {
|
||||
Empty,
|
||||
Text(String),
|
||||
Text(String, String),
|
||||
JSON(SerdeValue),
|
||||
Reader(Box<Read + 'static>),
|
||||
}
|
||||
@@ -73,8 +73,11 @@ impl Payload {
|
||||
fn into_read(self) -> SizedReader {
|
||||
match self {
|
||||
Payload::Empty => SizedReader::new(None, Box::new(empty())),
|
||||
Payload::Text(s) => {
|
||||
let bytes = s.into_bytes();
|
||||
Payload::Text(text, charset) => {
|
||||
let encoding = encoding_from_whatwg_label(&charset)
|
||||
.or_else(|| encoding_from_whatwg_label(DEFAULT_CHARACTER_SET))
|
||||
.unwrap();
|
||||
let bytes = encoding.encode(&text, EncoderTrap::Replace).unwrap();
|
||||
let len = bytes.len();
|
||||
let cursor = Cursor::new(bytes);
|
||||
SizedReader::new(Some(len), Box::new(cursor))
|
||||
@@ -183,10 +186,14 @@ impl Request {
|
||||
///
|
||||
/// The `Content-Length` header is implicitly set to the length of the serialized value.
|
||||
///
|
||||
/// If a `Content-Type` header is present and it contains a charset specification, we
|
||||
/// attempt to encode the string using that character set. If it fails, we fall back
|
||||
/// on utf-8.
|
||||
///
|
||||
/// ```
|
||||
/// let r = ureq::post("/my_page")
|
||||
/// .set("Content-Type", "text/plain")
|
||||
/// .send_string("Hello World!");
|
||||
/// .set("Content-Type", "text/plain; charset=iso-8859-1")
|
||||
/// .send_string("Hällo Wörld!");
|
||||
/// println!("{:?}", r);
|
||||
/// ```
|
||||
pub fn send_string<S>(&mut self, data: S) -> Response
|
||||
@@ -194,7 +201,8 @@ impl Request {
|
||||
S: Into<String>,
|
||||
{
|
||||
let text = data.into();
|
||||
self.do_call(Payload::Text(text))
|
||||
let charset = charset_from_content_type(self.header("content-type")).to_string();
|
||||
self.do_call(Payload::Text(text, charset))
|
||||
}
|
||||
|
||||
/// Send data from a reader.
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use ascii::AsciiString;
|
||||
use chunked_transfer;
|
||||
use encoding::label::encoding_from_whatwg_label;
|
||||
use encoding::DecoderTrap;
|
||||
use encoding::{DecoderTrap, EncoderTrap};
|
||||
use std::io::Error as IoError;
|
||||
use std::io::ErrorKind;
|
||||
use std::io::Read;
|
||||
@@ -208,15 +208,7 @@ impl Response {
|
||||
/// assert_eq!("ISO-8859-1", resp.charset());
|
||||
/// ```
|
||||
pub fn charset(&self) -> &str {
|
||||
self.header("content-type")
|
||||
.and_then(|header| {
|
||||
header.find(";").and_then(|semi| {
|
||||
(&header[semi + 1..])
|
||||
.find("=")
|
||||
.map(|equal| (&header[semi + equal + 2..]).trim())
|
||||
})
|
||||
})
|
||||
.unwrap_or(DEFAULT_CHARACTER_SET)
|
||||
charset_from_content_type(self.header("content-type"))
|
||||
}
|
||||
|
||||
/// Turn this response into a `impl Read` of the body.
|
||||
@@ -476,3 +468,16 @@ impl Read for LimitedRead {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn charset_from_content_type(header: Option<&str>) -> &str {
|
||||
header
|
||||
.and_then(|header| {
|
||||
header.find(";").and_then(|semi| {
|
||||
(&header[semi + 1..])
|
||||
.find("=")
|
||||
.map(|equal| (&header[semi + equal + 2..]).trim())
|
||||
})
|
||||
})
|
||||
.unwrap_or(DEFAULT_CHARACTER_SET)
|
||||
}
|
||||
|
||||
|
||||
@@ -55,3 +55,19 @@ fn content_length_and_chunked() {
|
||||
assert!(s.contains("Transfer-Encoding: chunked\r\n"));
|
||||
assert!(!s.contains("\r\nContent-Length:\r\n"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn str_with_encoding() {
|
||||
test::set_handler("/str_with_encoding", |_req, _url| {
|
||||
test::make_response(200, "OK", vec![], vec![])
|
||||
});
|
||||
let resp = post("test://host/str_with_encoding")
|
||||
.set("Content-Type", "text/plain; charset=iso-8859-1")
|
||||
.send_string("Hällo Wörld!!!");
|
||||
let vec = resp.to_write_vec();
|
||||
assert_eq!(
|
||||
&vec[vec.len() - 14..],
|
||||
//H ä l l o _ W ö r l d ! ! !
|
||||
[72, 228, 108, 108, 111, 32, 87, 246, 114, 108, 100, 33, 33, 33]
|
||||
);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user