diff options
Diffstat (limited to 'cli/text_encoding.rs')
-rw-r--r-- | cli/text_encoding.rs | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/cli/text_encoding.rs b/cli/text_encoding.rs index 8d316909c..f61b877dc 100644 --- a/cli/text_encoding.rs +++ b/cli/text_encoding.rs @@ -6,6 +6,8 @@ use std::{ io::{Error, ErrorKind}, }; +pub const BOM_CHAR: char = '\u{FEFF}'; + /// Attempts to detect the character encoding of the provided bytes. /// /// Supports UTF-8, UTF-16 Little Endian and UTF-16 Big Endian. @@ -43,6 +45,15 @@ pub fn convert_to_utf8<'a>( } } +/// Strips the byte order mark from the provided text if it exists. +pub fn strip_bom(text: &str) -> &str { + if text.starts_with(BOM_CHAR) { + &text[BOM_CHAR.len_utf8()..] + } else { + text + } +} + #[cfg(test)] mod tests { use super::*; |