/* * Copyright 2018 faddenSoft * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ using System; using System.Diagnostics; using System.Linq; using System.Text; using System.Text.RegularExpressions; namespace CommonUtil { /// /// Text utility functions. /// public static class TextUtil { // 100 spaces, useful when padding things out. private const string SPACES = " " + " "; private static readonly char[] CSV_ESCAPE_CHARS = { ',', '"' }; private const string NonPrintableAsciiPattern = @"[^\u0020-\u007e]"; private static Regex sNonPrintableAsciiRegex = new Regex(NonPrintableAsciiPattern); /// /// Converts a string to an ASCII-only string, replacing iso-latin characters /// with their ASCII equivalents. This may change the length of the string. /// /// For example, "¿Dónde está Über bären?" becomes "Donde esta Uber baren?". /// /// /// Converted string. public static string LatinToAscii(string inString) { // https://stackoverflow.com/questions/140422/ var newStringBuilder = new StringBuilder(); newStringBuilder.Append(inString.Normalize(NormalizationForm.FormKD) .Where(x => x < 128) .ToArray()); return newStringBuilder.ToString(); // Alternatively? // System.Text.Encoding.ASCII.GetString( // System.Text.Encoding.GetEncoding(1251).GetBytes(text)) } /// /// Returns true if the value is valid high- or low-ASCII. /// /// Value to test. /// True if val is valid ASCII. public static bool IsHiLoAscii(int val) { return (val >= 0x20 && val < 0x7f) || (val >= 0xa0 && val < 0xff); } /// /// Determines whether the character is printable ASCII. /// /// Character to evaluate. /// True if the character is printable ASCII. public static bool IsPrintableAscii(char ch) { return ch >= 0x20 && ch < 0x7f; } /// /// Determines whether the string has nothing but printable ASCII characters in it. /// /// String to evaluate. /// True if all characters are printable ASCII. public static bool IsPrintableAscii(string str) { // Linq version: return str.Any(c => c < 0x20 || c > 0x7e); MatchCollection matches = sNonPrintableAsciiRegex.Matches(str); return matches.Count == 0; } /// /// Converts high-ASCII bytes to a string. /// /// Array of bytes with ASCII data. /// Start offset. /// String length. /// Converted string. public static string HighAsciiToString(byte[] data, int offset, int length) { StringBuilder sb = new StringBuilder(length); for (int i = offset; i < offset + length; i++) { sb.Append((char)(data[i] & 0x7f)); } return sb.ToString(); } /// /// Trims whitespace off the end of a StringBuilder. /// /// StringBuilder reference. public static void TrimEnd(StringBuilder sb) { const string WSPC = " \t\r\n"; int len = sb.Length; while (WSPC.IndexOf(sb[len - 1]) >= 0) { len--; } sb.Length = len; } /// /// Replaces all occurrences of a string with another string, but only if they appear /// outside quoted text. Useful for replacing structural bits of a JSON string without /// messing with the quoted items. Assumes that quoted quotes (backslash-quote) only /// appear inside quoted text. /// /// Input string. /// Substring to find. /// If string found, replace with this. public static string NonQuoteReplace(string inStr, string findStr, string repStr) { // There's probably a better way to do this... StringBuilder sb = new StringBuilder(inStr.Length + inStr.Length / 20); int cmpLen = findStr.Length; bool findStrQuote = findStr.Contains('\"'); // find/rep str switches to in-quote Debug.Assert(findStrQuote == repStr.Contains('\"')); bool inQuote = false; bool lastBackSlash = false; for (int i = 0; i < inStr.Length; i++) { char ch = inStr[i]; if (inQuote) { if (lastBackSlash) { lastBackSlash = false; // JSON recognizes: \" \\ \/ \b \f \n \r \t \uXXXX // Ignore them all. Especially important for \", so we don't // mishandle quoted quotes, and for \\, so we don't fail to handle // quotes on a string that ends with a backslash. } else if (ch == '\\') { lastBackSlash = true; } else if (ch == '"') { // Un-quoted quote. End of string. inQuote = false; } else { // Still in quoted string, keep going. } sb.Append(ch); } else { if (string.Compare(inStr, i, findStr, 0, cmpLen) == 0) { sb.Append(repStr); i += cmpLen - 1; inQuote = findStrQuote; } else { if (ch == '"') { // There are no quoted-quotes outside of quotes, so we don't need // to check for a '\'. inQuote = true; } sb.Append(ch); } } } return sb.ToString(); } /// /// Appends a string to the string buffer. If the number of characters in the buffer /// is less than the desired start position, spaces will be added. At least one space /// will always be added if the start position is greater than zero and the string /// is non-empty. /// /// You must begin with an empty StringBuilder at the start of each line. /// /// StringBuilder to append to. /// String to add. null strings are treated as empty strings. /// Desired starting position. public static void AppendPaddedString(StringBuilder sb, string str, int startPos) { if (string.IsNullOrEmpty(str)) { return; } int toAdd = startPos - sb.Length; if (toAdd < 1 && startPos > 0) { // Already some text present, and we're adding more text, but we're past the // column start. Add a space so the columns don't run into each other. toAdd = 1; } if (toAdd > 0) { // would be nice to avoid this allocation/copy sb.Append(SPACES.Substring(0, toAdd)); } sb.Append(str); } /// /// Escapes a string for CSV. /// /// String to process. /// Escaped string, or an empty string if the input was null. public static string EscapeCSV(string str) { if (str == null) { return string.Empty; } bool needQuote = (str.IndexOfAny(CSV_ESCAPE_CHARS) >= 0); if (needQuote) { return '"' + str.Replace("\"", "\"\"") + '"'; } else { return str; } } /// /// Escapes a string for HTML. /// /// String to process. /// Escaped string. public static string EscapeHTML(string str) { StringBuilder sb = new StringBuilder(str.Length); for (int i = 0; i < str.Length; i++) { if (str[i] == '<') { sb.Append("<"); } else if (str[i] == '>') { sb.Append(">"); } else if (str[i] == '&') { sb.Append("&"); } else { sb.Append(str[i]); } } return sb.ToString(); } /// /// Serializes an integer array into a string. /// /// Array to serialize. /// Serialized data. public static string SerializeIntArray(int[] values) { StringBuilder sb = new StringBuilder(64); sb.Append("int[]"); for (int i = 0; i < values.Length; i++) { sb.Append(','); sb.Append(values[i]); } return sb.ToString(); } /// /// Deserializes an integer array from a string. Throws an exception if the format /// is incorrect. /// /// Serialized data. /// Integer array with contents. public static int[] DeserializeIntArray(string cereal) { string[] splitted = cereal.Split(','); if (splitted.Length == 0) { throw new Exception("Bad serialized int[]"); } if (splitted[0] != "int[]") { throw new Exception("Bad serialized int[], started with " + splitted[0]); } int[] arr = new int[splitted.Length - 1]; try { for (int i = 1; i < splitted.Length; i++) { arr[i - 1] = int.Parse(splitted[i]); } } catch (Exception ex) { throw new Exception("Bad serialized int[]: " + ex.Message); } return arr; } /// /// Converts a char[] to a string, inserting line numbers at the start of each line. /// Assumes lines end with '\n' (with or without a preceding '\r'). /// /// Character data to process. /// String with line numbers. public static string CharArrayToLineNumberedString(char[] data) { StringBuilder sb = new StringBuilder(data.Length + data.Length / 40); // guess int lineStart = 0; int lineNum = 0; for (int i = 0; i < data.Length; i++) { if (data[i] == '\n') { sb.AppendFormat("{0,4:D0} ", ++lineNum); sb.Append(data, lineStart, i - lineStart + 1); lineStart = i + 1; } } return sb.ToString(); } } }