use charset encoding for send_string
This commit is contained in:
13
src/lib.rs
13
src/lib.rs
@@ -67,7 +67,18 @@
|
|||||||
//! .set("Transfer-Encoding", "chunked")
|
//! .set("Transfer-Encoding", "chunked")
|
||||||
//! .send_string("Hello world");
|
//! .send_string("Hello world");
|
||||||
//! ```
|
//! ```
|
||||||
|
//!
|
||||||
|
//! # Character encoding
|
||||||
|
//!
|
||||||
|
//! For [`response.into_string()`](struct.Response.html#method.into_string) we read the
|
||||||
|
//! header `Content-Type: text/plain; charset=iso-8859-1` and if it contains a charset
|
||||||
|
//! specification, we try to decode the body using that encoding. In the absence of, or failing
|
||||||
|
//! to interpret the charset, we fall back on `utf-8`.
|
||||||
|
//!
|
||||||
|
//! Similarly when using [`.send_string()`](struct.Request.html#method.send_string), to
|
||||||
|
//! we first check if the user has set a `; charset=<whatwg charset>` and attempt
|
||||||
|
//! to encode the request body using that.
|
||||||
|
//!
|
||||||
extern crate ascii;
|
extern crate ascii;
|
||||||
extern crate base64;
|
extern crate base64;
|
||||||
extern crate chunked_transfer;
|
extern crate chunked_transfer;
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ impl ::std::fmt::Debug for Request {
|
|||||||
|
|
||||||
enum Payload {
|
enum Payload {
|
||||||
Empty,
|
Empty,
|
||||||
Text(String),
|
Text(String, String),
|
||||||
JSON(SerdeValue),
|
JSON(SerdeValue),
|
||||||
Reader(Box<Read + 'static>),
|
Reader(Box<Read + 'static>),
|
||||||
}
|
}
|
||||||
@@ -73,8 +73,11 @@ impl Payload {
|
|||||||
fn into_read(self) -> SizedReader {
|
fn into_read(self) -> SizedReader {
|
||||||
match self {
|
match self {
|
||||||
Payload::Empty => SizedReader::new(None, Box::new(empty())),
|
Payload::Empty => SizedReader::new(None, Box::new(empty())),
|
||||||
Payload::Text(s) => {
|
Payload::Text(text, charset) => {
|
||||||
let bytes = s.into_bytes();
|
let encoding = encoding_from_whatwg_label(&charset)
|
||||||
|
.or_else(|| encoding_from_whatwg_label(DEFAULT_CHARACTER_SET))
|
||||||
|
.unwrap();
|
||||||
|
let bytes = encoding.encode(&text, EncoderTrap::Replace).unwrap();
|
||||||
let len = bytes.len();
|
let len = bytes.len();
|
||||||
let cursor = Cursor::new(bytes);
|
let cursor = Cursor::new(bytes);
|
||||||
SizedReader::new(Some(len), Box::new(cursor))
|
SizedReader::new(Some(len), Box::new(cursor))
|
||||||
@@ -183,10 +186,14 @@ impl Request {
|
|||||||
///
|
///
|
||||||
/// The `Content-Length` header is implicitly set to the length of the serialized value.
|
/// The `Content-Length` header is implicitly set to the length of the serialized value.
|
||||||
///
|
///
|
||||||
|
/// If a `Content-Type` header is present and it contains a charset specification, we
|
||||||
|
/// attempt to encode the string using that character set. If it fails, we fall back
|
||||||
|
/// on utf-8.
|
||||||
|
///
|
||||||
/// ```
|
/// ```
|
||||||
/// let r = ureq::post("/my_page")
|
/// let r = ureq::post("/my_page")
|
||||||
/// .set("Content-Type", "text/plain")
|
/// .set("Content-Type", "text/plain; charset=iso-8859-1")
|
||||||
/// .send_string("Hello World!");
|
/// .send_string("Hällo Wörld!");
|
||||||
/// println!("{:?}", r);
|
/// println!("{:?}", r);
|
||||||
/// ```
|
/// ```
|
||||||
pub fn send_string<S>(&mut self, data: S) -> Response
|
pub fn send_string<S>(&mut self, data: S) -> Response
|
||||||
@@ -194,7 +201,8 @@ impl Request {
|
|||||||
S: Into<String>,
|
S: Into<String>,
|
||||||
{
|
{
|
||||||
let text = data.into();
|
let text = data.into();
|
||||||
self.do_call(Payload::Text(text))
|
let charset = charset_from_content_type(self.header("content-type")).to_string();
|
||||||
|
self.do_call(Payload::Text(text, charset))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Send data from a reader.
|
/// Send data from a reader.
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
use ascii::AsciiString;
|
use ascii::AsciiString;
|
||||||
use chunked_transfer;
|
use chunked_transfer;
|
||||||
use encoding::label::encoding_from_whatwg_label;
|
use encoding::label::encoding_from_whatwg_label;
|
||||||
use encoding::DecoderTrap;
|
use encoding::{DecoderTrap, EncoderTrap};
|
||||||
use std::io::Error as IoError;
|
use std::io::Error as IoError;
|
||||||
use std::io::ErrorKind;
|
use std::io::ErrorKind;
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
@@ -208,15 +208,7 @@ impl Response {
|
|||||||
/// assert_eq!("ISO-8859-1", resp.charset());
|
/// assert_eq!("ISO-8859-1", resp.charset());
|
||||||
/// ```
|
/// ```
|
||||||
pub fn charset(&self) -> &str {
|
pub fn charset(&self) -> &str {
|
||||||
self.header("content-type")
|
charset_from_content_type(self.header("content-type"))
|
||||||
.and_then(|header| {
|
|
||||||
header.find(";").and_then(|semi| {
|
|
||||||
(&header[semi + 1..])
|
|
||||||
.find("=")
|
|
||||||
.map(|equal| (&header[semi + equal + 2..]).trim())
|
|
||||||
})
|
|
||||||
})
|
|
||||||
.unwrap_or(DEFAULT_CHARACTER_SET)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Turn this response into a `impl Read` of the body.
|
/// Turn this response into a `impl Read` of the body.
|
||||||
@@ -476,3 +468,16 @@ impl Read for LimitedRead {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn charset_from_content_type(header: Option<&str>) -> &str {
|
||||||
|
header
|
||||||
|
.and_then(|header| {
|
||||||
|
header.find(";").and_then(|semi| {
|
||||||
|
(&header[semi + 1..])
|
||||||
|
.find("=")
|
||||||
|
.map(|equal| (&header[semi + equal + 2..]).trim())
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.unwrap_or(DEFAULT_CHARACTER_SET)
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -55,3 +55,19 @@ fn content_length_and_chunked() {
|
|||||||
assert!(s.contains("Transfer-Encoding: chunked\r\n"));
|
assert!(s.contains("Transfer-Encoding: chunked\r\n"));
|
||||||
assert!(!s.contains("\r\nContent-Length:\r\n"));
|
assert!(!s.contains("\r\nContent-Length:\r\n"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn str_with_encoding() {
|
||||||
|
test::set_handler("/str_with_encoding", |_req, _url| {
|
||||||
|
test::make_response(200, "OK", vec![], vec![])
|
||||||
|
});
|
||||||
|
let resp = post("test://host/str_with_encoding")
|
||||||
|
.set("Content-Type", "text/plain; charset=iso-8859-1")
|
||||||
|
.send_string("Hällo Wörld!!!");
|
||||||
|
let vec = resp.to_write_vec();
|
||||||
|
assert_eq!(
|
||||||
|
&vec[vec.len() - 14..],
|
||||||
|
//H ä l l o _ W ö r l d ! ! !
|
||||||
|
[72, 228, 108, 108, 111, 32, 87, 246, 114, 108, 100, 33, 33, 33]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user