summaryrefslogtreecommitdiff
path: root/cli/text_encoding.rs
diff options
context:
space:
mode:
Diffstat (limited to 'cli/text_encoding.rs')
-rw-r--r--cli/text_encoding.rs11
1 files changed, 11 insertions, 0 deletions
diff --git a/cli/text_encoding.rs b/cli/text_encoding.rs
index 8d316909c..f61b877dc 100644
--- a/cli/text_encoding.rs
+++ b/cli/text_encoding.rs
@@ -6,6 +6,8 @@ use std::{
io::{Error, ErrorKind},
};
+pub const BOM_CHAR: char = '\u{FEFF}';
+
/// Attempts to detect the character encoding of the provided bytes.
///
/// Supports UTF-8, UTF-16 Little Endian and UTF-16 Big Endian.
@@ -43,6 +45,15 @@ pub fn convert_to_utf8<'a>(
}
}
+/// Strips the byte order mark from the provided text if it exists.
+pub fn strip_bom(text: &str) -> &str {
+ if text.starts_with(BOM_CHAR) {
+ &text[BOM_CHAR.len_utf8()..]
+ } else {
+ text
+ }
+}
+
#[cfg(test)]
mod tests {
use super::*;