diff --git a/src/SmartFormat.Tests/Core/CharSetTests.cs b/src/SmartFormat.Tests/Core/CharSetTests.cs new file mode 100644 index 00000000..968b446a --- /dev/null +++ b/src/SmartFormat.Tests/Core/CharSetTests.cs @@ -0,0 +1,54 @@ +using System; +using System.Linq; +using NUnit.Framework; +using SmartFormat.Core.Parsing; + +namespace SmartFormat.Tests.Core; + +[TestFixture] +internal class CharSetTests +{ + [Test] + public void CharSet_Add_Remove() + { + char[] asciiChars = ['A', 'B', 'C']; + char[] nonAsciiChars = ['Ā', 'Б', '中']; + var charSet = new CharSet(); + charSet.AddRange(asciiChars.AsEnumerable()); + charSet.AddRange(nonAsciiChars.AsSpan()); + var countBeforeRemoval = charSet.Count; + var existingRemoved = charSet.Remove('C'); + charSet.Remove('中'); + // trying to remove a not existing char returns false + var nonExistingRemoved = charSet.Remove('?'); + var count = charSet.Count; + + Assert.Multiple(() => + { + Assert.That(countBeforeRemoval, Is.EqualTo(asciiChars.Length + nonAsciiChars.Length)); + Assert.That(count, Is.EqualTo(countBeforeRemoval - 2)); + Assert.That(existingRemoved, Is.True); + Assert.That(nonExistingRemoved, Is.False); + }); + } + + [Test] + public void CharSet_CreateFromSpan_GetCharacters_Contains() + { + char[] asciiAndNonAscii = ['\0', 'A', 'B', 'C', 'Ā', 'Б', '中']; + var charSet = new CharSet(asciiAndNonAscii.AsSpan()); + + Assert.Multiple(() => + { + Assert.That(charSet, Has.Count.EqualTo(7)); + Assert.That(charSet.Contains('A'), Is.True); // ASCII + Assert.That(charSet.Contains('\0'), Is.True); // control character + Assert.That(charSet.Contains('中'), Is.True); // non-ASCII + Assert.That(charSet.Contains('?'), Is.False); + Assert.That(charSet.GetCharacters(), Is.EquivalentTo(asciiAndNonAscii)); + charSet.Clear(); + Assert.That(charSet, Has.Count.EqualTo(0)); + Assert.That(charSet.GetCharacters(), Is.Empty); + }); + } +} diff --git a/src/SmartFormat.Tests/Core/ParserTests.cs b/src/SmartFormat.Tests/Core/ParserTests.cs index f82eecf2..ab65df2c 100644 --- a/src/SmartFormat.Tests/Core/ParserTests.cs +++ b/src/SmartFormat.Tests/Core/ParserTests.cs @@ -155,6 +155,7 @@ public void Parser_Error_Action_Ignore() // | Literal | Erroneous | | Okay | var invalidTemplate = "Hello, I'm {Name from {City} {Street}"; + // settings must be set before parser instantiation var parser = GetRegularParser(new SmartSettings {Parser = new ParserSettings {ErrorAction = ParseErrorAction.Ignore}}); using var parsed = parser.ParseFormat(invalidTemplate); @@ -177,6 +178,7 @@ public void Parser_Error_Action_Ignore() [TestCase("Hello, I'm {Name from {City} {Street", false)] public void Parser_Error_Action_MaintainTokens(string invalidTemplate, bool lastItemIsPlaceholder) { + // settings must be set before parser instantiation var parser = GetRegularParser(new SmartSettings {Parser = new ParserSettings {ErrorAction = ParseErrorAction.MaintainTokens}}); using var parsed = parser.ParseFormat(invalidTemplate); @@ -203,14 +205,21 @@ public void Parser_Error_Action_MaintainTokens(string invalidTemplate, bool last public void Parser_Error_Action_OutputErrorInResult() { // | Literal | Erroneous | - // ▼ Selector must not contain { var invalidTemplate = "Hello, I'm {Name from {City}"; - - var parser = GetRegularParser(new SmartSettings {Parser = new ParserSettings {ErrorAction = ParseErrorAction.OutputErrorInResult}}); + + var parser = GetRegularParser(new SmartSettings + { + Parser = new ParserSettings + { + SelectorCharFilter = FilterType.Allowlist, // default + ErrorAction = ParseErrorAction.OutputErrorInResult + } + }); + using var parsed = parser.ParseFormat(invalidTemplate); Assert.That(parsed.Items, Has.Count.EqualTo(1)); - Assert.That(parsed.Items[0].RawText, Does.StartWith("The format string has 1 issue")); + Assert.That(parsed.Items[0].RawText, Does.StartWith("The format string has 3 issues")); } [Test] @@ -414,11 +423,11 @@ public void Parser_NotifyParsingError() }); formatter.Parser.OnParsingFailure += (o, args) => parsingError = args.Errors; - var res = formatter.Format("{NoName {Other} {Same", default(object)!); + var res = formatter.Format("{NoName {Other} {Same"); Assert.Multiple(() => { - Assert.That(parsingError!.Issues, Has.Count.EqualTo(2)); - Assert.That(parsingError.Issues[1].Issue, Is.EqualTo(new Parser.ParsingErrorText()[SmartFormat.Core.Parsing.Parser.ParsingError.MissingClosingBrace])); + Assert.That(parsingError!.Issues, Has.Count.EqualTo(3)); + Assert.That(parsingError.Issues[2].Issue, Is.EqualTo(new Parser.ParsingErrorText()[Parser.ParsingError.MissingClosingBrace])); }); } @@ -459,6 +468,18 @@ public void Escaping_TheEscapingCharacter_ShouldWork() Assert.That(result, Is.EqualTo(@"\\aaa\{}bbb ccc\x{}ddd\\")); } + [Test] + public void Parsing_Selector_With_CharFromBlocklist_ShouldThrow() + { + var settings = new SmartSettings { Parser = new ParserSettings { SelectorCharFilter = FilterType.Blocklist } }; + var parser = GetRegularParser(settings); + + // The newline character is in the default blocklist of disallowed characters + Assert.That(() => parser.ParseFormat("{A\nB}"), + Throws.Exception.InstanceOf().And.Message + .Contains(new Parser.ParsingErrorText()[Parser.ParsingError.InvalidCharactersInSelector])); + } + [Test] public void StringFormat_Escaping_In_Literal() { @@ -536,8 +557,10 @@ public void Parse_Unicode(string formatString, string unicodeLiteral, int itemIn [TestCase("{%C}", '%')] public void Selector_With_Custom_Selector_Character(string formatString, char customChar) { + // settings must be set before parser instantiation var settings = new SmartSettings(); - settings.Parser.AddCustomSelectorChars(new[]{customChar}); + settings.Parser.AddCustomSelectorChars([customChar]); + var x = settings.Parser.GetSelectorChars(); var parser = GetRegularParser(settings); var result = parser.ParseFormat(formatString); @@ -546,7 +569,7 @@ public void Selector_With_Custom_Selector_Character(string formatString, char cu Assert.That(placeholder!.Selectors, Has.Count.EqualTo(1)); Assert.Multiple(() => { - Assert.That(placeholder!.Selectors, Has.Count.EqualTo(placeholder!.GetSelectors().Count)); + Assert.That(placeholder.Selectors, Has.Count.EqualTo(placeholder.GetSelectors().Count)); Assert.That(placeholder.Selectors[0].ToString(), Is.EqualTo(formatString.Substring(1, 2))); }); } @@ -555,8 +578,10 @@ public void Selector_With_Custom_Selector_Character(string formatString, char cu [TestCase("{a°b}", '°')] public void Selectors_With_Custom_Operator_Character(string formatString, char customChar) { - var parser = GetRegularParser(); - parser.Settings.Parser.AddCustomOperatorChars(new[]{customChar}); + // settings must be set before parser instantiation + var settings = new SmartSettings(); + settings.Parser.AddCustomOperatorChars([customChar]); + var parser = GetRegularParser(settings); var result = parser.ParseFormat(formatString); var placeholder = result.Items[0] as Placeholder; @@ -583,10 +608,12 @@ public void Selector_WorksWithAllUnicodeChars(string selector) { // See https://github.com/axuno/SmartFormat/issues/454 + // settings must be set before parser instantiation + var settings = new SmartSettings { Parser = { SelectorCharFilter = FilterType.Blocklist } }; const string expected = "The Value"; // The default formatter with default settings should be able to handle any // Unicode characters in selectors except the "magic" disallowed ones - var formatter = Smart.CreateDefaultSmartFormat(); + var formatter = Smart.CreateDefaultSmartFormat(settings); // Use the Unicode string as a selector of the placeholder var template = $"{{{selector}}}"; var result = formatter.Format(template, new Dictionary { { selector, expected } }); @@ -647,10 +674,11 @@ public void Selector_With_Nullable_Operator_Character(string formatString) public void Selector_With_Other_Contiguous_Operator_Characters(string formatString, char customChar) { // contiguous operator characters are parsed as "ONE operator string" - - var parser = GetRegularParser(); + var settings = new SmartSettings(); + settings.Parser.AddCustomOperatorChars([customChar]); + var parser = GetRegularParser(settings); // adding '.' is ignored, as it's a standard operator - parser.Settings.Parser.AddCustomOperatorChars(new[]{customChar}); + parser.Settings.Parser.AddCustomOperatorChars([customChar]); var result = parser.ParseFormat(formatString); var placeholder = result.Items[0] as Placeholder; @@ -706,6 +734,12 @@ public void ParseInputAsHtml(string input) Assert.That(literalText!.RawText, Is.EqualTo(input)); } + #region * Parse HTML input without ParserSetting 'IsHtml' + + /// + /// is : + /// all characters are allowed in selectors + /// [TestCase("", "{Placeholder}")] [TestCase("", "{Placeholder}")] [TestCase("Something ! nice", "{ color : #000; }")] @@ -715,7 +749,12 @@ public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, string sel var parser = GetRegularParser(new SmartSettings { StringFormatCompatibility = false, - Parser = new ParserSettings { ErrorAction = ParseErrorAction.ThrowError, ParseInputAsHtml = false } + Parser = new ParserSettings + { + SelectorCharFilter = FilterType.Blocklist, + ErrorAction = ParseErrorAction.ThrowError, + ParseInputAsHtml = false + } }); var result = parser.ParseFormat(input); @@ -724,9 +763,45 @@ public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, string sel Assert.That(result.Items, Has.Count.EqualTo(3)); Assert.That(((Placeholder) result.Items[1]).RawText, Is.EqualTo(selector)); }); + } + /// + /// is : + /// Predefined set of allowed characters in selectors + /// + [TestCase("", false)] // should parse a placeholder + [TestCase("", false)] // should parse a placeholder + [TestCase("Something ! nice", true)] // illegal selector chars + [TestCase("Something ';}! nice", true)] // illegal selector chars + public void ParseHtmlInput_Without_ParserSetting_IsHtml(string input, bool shouldThrow) + { + var parser = GetRegularParser(new SmartSettings + { + StringFormatCompatibility = false, + Parser = new ParserSettings + { + SelectorCharFilter = FilterType.Allowlist, + ErrorAction = ParseErrorAction.ThrowError, + ParseInputAsHtml = false + } + }); + + switch (shouldThrow) + { + case true: + Assert.That(() => _ = parser.ParseFormat(input), Throws.TypeOf()); + break; + case false: + { + var result = parser.ParseFormat(input); + Assert.That(result.Items, Has.Count.EqualTo(3)); + break; + } + } } + #endregion + /// /// SmartFormat is able to parse script tags, if is /// diff --git a/src/SmartFormat.Tests/Core/SettingsTests.cs b/src/SmartFormat.Tests/Core/SettingsTests.cs index 9e60cc6c..bd87c6b3 100644 --- a/src/SmartFormat.Tests/Core/SettingsTests.cs +++ b/src/SmartFormat.Tests/Core/SettingsTests.cs @@ -23,14 +23,15 @@ public void ExistingSelectorCharacter_Should_Not_Be_Added() Assert.Multiple(() => { Assert.That(settings.Parser.CustomSelectorChars.Count(c => c == 'A'), Is.EqualTo(0)); - Assert.That(settings.Parser.CustomSelectorChars.Count(c => c == ' '), Is.EqualTo(0)); + Assert.That(settings.Parser.CustomSelectorChars.Count(c => c == ' '), Is.EqualTo(1)); }); } - [Test] - public void ControlCharacters_Should_Be_Added_As_SelectorChars() + [TestCase(FilterType.Allowlist)] + [TestCase(FilterType.Blocklist)] + public void ControlCharacters_Should_Be_Added_As_SelectorChars(FilterType filterType) { - var settings = new SmartSettings(); + var settings = new SmartSettings { Parser = { SelectorCharFilter = filterType } }; var controlChars = ParserSettings.ControlChars().ToList(); settings.Parser.AddCustomSelectorChars(controlChars); @@ -39,8 +40,8 @@ public void ControlCharacters_Should_Be_Added_As_SelectorChars() Assert.That(settings.Parser.CustomSelectorChars, Has.Count.EqualTo(controlChars.Count)); foreach (var c in settings.Parser.CustomSelectorChars) { - Assert.That(settings.Parser.DisallowedSelectorChars(), Does.Not.Contain(c), - $"Control char U+{(int)c:X4} should be allowed as selector char."); + Assert.That(settings.Parser.GetSelectorChars(), filterType == FilterType.Allowlist ? Does.Contain(c) : Does.Not.Contain(c), + $"Control char U+{(int) c:X4} should be allowed as selector char."); } }); } diff --git a/src/SmartFormat/Core/Parsing/CharSet.cs b/src/SmartFormat/Core/Parsing/CharSet.cs new file mode 100644 index 00000000..a4804294 --- /dev/null +++ b/src/SmartFormat/Core/Parsing/CharSet.cs @@ -0,0 +1,216 @@ +// Copyright SmartFormat Project maintainers and contributors. +// Licensed under the MIT license. + +using System; +using System.Collections; +using System.Collections.Generic; + +namespace SmartFormat.Core.Parsing; + +/// +/// Represents a set of characters that supports efficient storage and lookup +/// for both ASCII and non-ASCII characters. +/// +/// +/// The class is optimized for handling ASCII characters using a bitmap +/// representation, while non-ASCII characters are stored in a separate collection. +/// +/// The class provides methods to add characters individually or in bulk, remove characters, check for containment, and enumerate all +/// characters in the set. ASCII characters are enumerated first in numerical order, followed by non-ASCII characters in +/// no guaranteed order. +/// +/// This class is not thread-safe. +/// +internal class CharSet : IEnumerable +{ + private const int ASCII_LIMIT = 128; + private const int BITS_PER_UINT = 32; + private const int BITMAP_LENGTH = ASCII_LIMIT / BITS_PER_UINT; + + private readonly uint[] _asciiBitmap = new uint[BITMAP_LENGTH]; + private readonly HashSet _nonAsciiChars = []; + + /// + /// Gets or sets a value indicating whether the list is + /// an allowlist (, default) or a blocklist (). + /// + public bool IsAllowList { get; set; } + + /// + /// Initializes a new instance of the class that is empty. + /// + public CharSet() + {} + + /// + /// Initializes a new instance of the class that contains the characters + /// from the specified read-only span. + /// + /// The read-only span containing characters to add to the set. + public CharSet(ReadOnlySpan characters) + { + AddRange(characters); + } + + /// + /// Initializes a new instance of the class that contains the characters + /// from the specified collection. + /// + /// The collection of characters to add to the set. + /// Thrown when is null. + public CharSet(IEnumerable characters) + { + AddRange(characters); + } + + /// + /// Adds all characters from the specified read-only span to the current set. + /// Only adds characters that aren't already present in the set. + /// + /// The read-only span containing characters to add. + public void AddRange(ReadOnlySpan characters) + { + foreach (var ch in characters) + Add(ch); + } + + /// + /// Adds all characters from the specified collection to the current set. + /// Only adds characters that aren't already present in the set. + /// + /// The collection of characters to add. + /// Thrown when is null. + public void AddRange(IEnumerable characters) + { + foreach (var ch in characters) + Add(ch); + } + + /// + /// Adds the specified character to the current set. + /// Only adds a character that isn't already present in the set. + /// + /// The character to add. + public void Add(char c) + { + if (c < ASCII_LIMIT) + _asciiBitmap[c / BITS_PER_UINT] |= 1u << c % BITS_PER_UINT; + else + _nonAsciiChars.Add(c); + } + + /// + /// Removes the specified character from the current set. + /// + /// The character to remove. + /// + /// if the character was successfully found and removed; + /// otherwise, . + /// + public bool Remove(char c) + { + if (c < ASCII_LIMIT) + { + ref var bitmap = ref _asciiBitmap[c / BITS_PER_UINT]; + var mask = 1u << c % BITS_PER_UINT; + + if ((bitmap & mask) == 0) return false; + + bitmap &= ~mask; + return true; + } + + return _nonAsciiChars.Remove(c); + } + + /// + /// Determines whether the current set contains the specified character. + /// + /// The character to locate in the set. + /// + /// if the set contains the specified character; otherwise, . + /// + public bool Contains(char c) + { + if (c < ASCII_LIMIT) + return (_asciiBitmap[c / BITS_PER_UINT] & 1u << c % BITS_PER_UINT) != 0; + + return _nonAsciiChars.Contains(c); + } + + /// + /// Removes all characters from the current set. + /// + public void Clear() + { + Array.Clear(_asciiBitmap, 0, _asciiBitmap.Length); + _nonAsciiChars.Clear(); + } + + /// + /// Gets the number of characters contained in the set. + /// + /// The number of characters in the set. + public int Count + { + get + { + var count = 0; + + // Count ASCII characters using bit population count + foreach (var segment in _asciiBitmap) + count += BitCount(segment); + + return count + _nonAsciiChars.Count; + } + } + + /// + /// Returns an enumerator that iterates through the characters in the set. + /// + /// An enumerator that can be used to iterate through the characters in the set. + /// + /// The enumeration returns ASCII characters first (in numerical order), followed by non-ASCII characters + /// (in no guaranteed order). + /// + public IEnumerable GetCharacters() + { + for (var i = 0; i < ASCII_LIMIT; i++) + if ((_asciiBitmap[i / BITS_PER_UINT] & 1u << i % BITS_PER_UINT) != 0) + yield return (char) i; + + foreach (var c in _nonAsciiChars) + yield return c; + } + + /// + /// Helper method to count set bits in an uint (Hamming weight) + /// + /// The unsigned integer value to count bits in. + /// The number of bits set to 1 in the specified value. + private static int BitCount(uint value) + { + // SWAR (SIMD Within A Register) technique for counting the number + // of set bits (1s) in a 32-bit unsigned integer. + + // Count bits in pairs. + // Subtracts each pair of bits from itself shifted right by one, masked to isolate alternating bits. + value -= value >> 1 & 0x55555555; + // Count bits in 4-bit groups. Adds adjacent 2-bit counts to form 4-bit counts. + value = (value & 0x33333333) + (value >> 2 & 0x33333333); + // Aggregate all 4-bit counts into a single total. + return (int) ((value + (value >> 4) & 0x0F0F0F0F) * 0x01010101) >> 24; + } + + /// + public IEnumerator GetEnumerator() + { + foreach (var ch in GetCharacters()) yield return ch; + } + + /// + IEnumerator IEnumerable.GetEnumerator() + { + return GetEnumerator(); + } +} diff --git a/src/SmartFormat/Core/Parsing/Parser.cs b/src/SmartFormat/Core/Parsing/Parser.cs index 992c5381..2360cf9f 100644 --- a/src/SmartFormat/Core/Parsing/Parser.cs +++ b/src/SmartFormat/Core/Parsing/Parser.cs @@ -4,6 +4,7 @@ using System; using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; using System.Runtime.CompilerServices; using SmartFormat.Core.Settings; using SmartFormat.Pooling.SmartPools; @@ -33,11 +34,11 @@ public class Parser public SmartSettings Settings { get; } // Cache method results from settings - private readonly List _operatorChars; - private readonly List _customOperatorChars; + private readonly CharSet _operatorChars; + private readonly CharSet _customOperatorChars; private readonly ParserSettings _parserSettings; - private readonly HashSet _disallowedSelectorChars; - private readonly List _formatOptionsTerminatorChars; + private readonly CharSet _selectorChars; + private readonly CharSet _formatOptionsTerminatorChars; #endregion @@ -63,11 +64,11 @@ public Parser(SmartSettings? smartSettings = null) { Settings = smartSettings ?? new SmartSettings(); _parserSettings = Settings.Parser; - _operatorChars = ParserSettings.OperatorChars; - _customOperatorChars = _parserSettings.CustomOperatorChars; - _formatOptionsTerminatorChars = ParserSettings.FormatOptionsTerminatorChars; - - _disallowedSelectorChars = _parserSettings.DisallowedSelectorChars(); + _operatorChars = new CharSet(ParserSettings.OperatorChars.AsSpan()) ; + _customOperatorChars = new CharSet(_parserSettings.CustomOperatorChars); + _formatOptionsTerminatorChars = new CharSet(ParserSettings.FormatOptionsTerminatorChars.AsSpan()); + // Selector chars can be an allowlist or blocklist: + _selectorChars = _parserSettings.GetSelectorChars(); } #endregion @@ -78,6 +79,7 @@ public Parser(SmartSettings? smartSettings = null) /// Includes a-z and A-Z in the list of allowed selector chars. /// [Obsolete("Alphanumeric selectors are always enabled", true)] + [ExcludeFromCodeCoverage] public void AddAlphanumericSelectors() { // Do nothing - this is the standard behavior @@ -88,6 +90,7 @@ public void AddAlphanumericSelectors() /// /// [Obsolete("Use 'Settings.Parser.AddCustomSelectorChars' instead.", true)] + [ExcludeFromCodeCoverage] public void AddAdditionalSelectorChars(string chars) { _parserSettings.AddCustomSelectorChars(chars.ToCharArray()); @@ -100,6 +103,7 @@ public void AddAdditionalSelectorChars(string chars) /// /// [Obsolete("Use 'Settings.Parser.AddCustomOperatorChars' instead.", true)] + [ExcludeFromCodeCoverage] public void AddOperators(string chars) { _parserSettings.AddCustomOperatorChars(chars.ToCharArray()); @@ -112,6 +116,7 @@ public void AddOperators(string chars) /// /// Defaults to backslash [Obsolete("Use 'Settings.StringFormatCompatibility' instead.", true)] + [ExcludeFromCodeCoverage] public void UseAlternativeEscapeChar(char alternativeEscapeChar = '\\') { if (alternativeEscapeChar != _parserSettings.CharLiteralEscapeChar) @@ -129,6 +134,7 @@ public void UseAlternativeEscapeChar(char alternativeEscapeChar = '\\') /// backslash. /// [Obsolete("Use 'Settings.StringFormatCompatibility' instead.", true)] + [ExcludeFromCodeCoverage] public void UseBraceEscaping() { throw new NotSupportedException($"Init-only property {nameof(Settings)}.{nameof(Settings.StringFormatCompatibility)} can only be set in an object initializer"); @@ -140,6 +146,7 @@ public void UseBraceEscaping() /// /// [Obsolete("This feature has been removed", true)] + [ExcludeFromCodeCoverage] public void UseAlternativeBraces(char opening, char closing) { throw new NotSupportedException("This feature has been removed"); @@ -323,11 +330,28 @@ private void ProcessSelector(char inputChar, ParserState state, ParsingErrors pa else { // Ensure the selector characters are valid: - if (_disallowedSelectorChars.Contains(inputChar)) - parsingErrors.AddIssue(state.ResultFormat, - $"'0x{Convert.ToUInt32(inputChar):X}': " + - _parsingErrorText[ParsingError.InvalidCharactersInSelector], - state.Index.Current, state.Index.SafeAdd(state.Index.Current, 1)); + if (_selectorChars.IsAllowList) + { + // Only allow specific characters + if (!_selectorChars.Contains(inputChar)) + { + parsingErrors.AddIssue(state.ResultFormat, + $"'0x{Convert.ToUInt32(inputChar):X}': " + + _parsingErrorText[ParsingError.InvalidCharactersInSelector], + state.Index.Current, state.Index.SafeAdd(state.Index.Current, 1)); + } + } + else + { + // Blocklist: Disallow specific characters + if (_selectorChars.Contains(inputChar)) + { + parsingErrors.AddIssue(state.ResultFormat, + $"'0x{Convert.ToUInt32(inputChar):X}': " + + _parsingErrorText[ParsingError.InvalidCharactersInSelector], + state.Index.Current, state.Index.SafeAdd(state.Index.Current, 1)); + } + } } } diff --git a/src/SmartFormat/Core/Settings/FilterType.cs b/src/SmartFormat/Core/Settings/FilterType.cs new file mode 100644 index 00000000..9d12c5b9 --- /dev/null +++ b/src/SmartFormat/Core/Settings/FilterType.cs @@ -0,0 +1,23 @@ +// +// Copyright SmartFormat Project maintainers and contributors. +// Licensed under the MIT license. + +namespace SmartFormat.Core.Settings; + +/// +/// Determines the filter type for allowed or disallowed characters. +/// +public enum FilterType +{ + /// + /// Use a list of characters that are allowed. The default characters are
+ /// alphanumeric characters (upper and lower case), plus '_' and '-'.
+ ///
+ Allowlist, + + /// + /// All Unicode characters are allowed, except those in the blocklist. + /// The default blocklist characters are all control characters (ASCII 0-31 and 127). + /// + Blocklist +} diff --git a/src/SmartFormat/Core/Settings/ParserSettings.cs b/src/SmartFormat/Core/Settings/ParserSettings.cs index 8fa41ca9..c0e3eab0 100644 --- a/src/SmartFormat/Core/Settings/ParserSettings.cs +++ b/src/SmartFormat/Core/Settings/ParserSettings.cs @@ -18,6 +18,9 @@ public class ParserSettings { private readonly List _customSelectorChars = []; private readonly List _customOperatorChars = []; + private FilterType _selectorCharFilter = FilterType.Allowlist; + + private const string StandardAllowlist = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-"; /// /// Gets or sets the to use for the . @@ -31,85 +34,66 @@ public class ParserSettings internal List CustomSelectorChars => _customSelectorChars; /// - /// The list of characters which are delimiting a selector. + /// Gets a list of the custom operator characters, which were set with . + /// Contiguous operator characters are parsed as one operator (e.g. '?.'). /// - internal static readonly HashSet SelectorDelimitingChars = - [ - FormatterNameSeparator, - PlaceholderBeginChar, PlaceholderEndChar, - FormatterOptionsBeginChar, FormatterOptionsEndChar - ]; + internal List CustomOperatorChars => _customOperatorChars; /// - /// Gets the set of control characters (ASCII 0-31 and 127). + /// When (default) is set, an allowlist of selector characters is used. + /// The allowlist contains alphanumeric characters (upper and lower case), plus '_' and '-'. + /// On top, any custom selector characters added with are included. + /// + /// When , all Unicode characters are allowed in a selector, + /// except control characters (ASCII 0-31 and 127). Excluded control characters can be added back + /// using . + /// + /// Changing this setting clears any custom operator characters added with . /// - internal static IEnumerable ControlChars() + public FilterType SelectorCharFilter { - for (var i = 0; i <= 31; i++) yield return (char) i; - yield return (char) 127; // delete character + get + { + return _selectorCharFilter; + } + set + { + _selectorCharFilter = value; + _customOperatorChars.Clear(); + } } /// - /// The list of characters which are disallowed in a selector. + /// The list of characters for a selector. + /// This can be an allowlist, which contains explicitly allowed characters, + /// or a blocklist, when all Unicode characters are allowed, except those from the blocklist. /// - internal HashSet DisallowedSelectorChars() + internal CharSet GetSelectorChars() => SelectorCharFilter == FilterType.Allowlist ? CreateAllowlist() : CreateBlocklist(); + + private CharSet CreateBlocklist() { - var chars = new HashSet { + var chars = new CharSet { CharLiteralEscapeChar // avoid confusion with escape sequences }; - chars.UnionWith(SelectorDelimitingChars); - chars.UnionWith(OperatorChars); // no overlaps - chars.UnionWith(CustomOperatorChars); // no overlaps + chars.IsAllowList = false; + chars.AddRange(SelectorDelimitingChars.AsSpan()); + chars.AddRange(OperatorChars.AsSpan()); // no overlaps + chars.AddRange(_customOperatorChars); // no overlaps // Hard to visualize and debug, disallow by default - can be added back as custom selector chars - chars.UnionWith(ControlChars()); + chars.AddRange(ControlChars()); - // Remove characters used as custom selector chars. - // Note: Using chars.ExceptWith(_customOperatorChars) would not remove char 0. - foreach (var c in _customSelectorChars) chars.Remove(c); + // Remove characters used as custom selector chars from the blocklist + foreach (var c in _customSelectorChars) chars.Remove(c); return chars; } - /// - /// Gets a list of the custom operator characters, which were set with . - /// Contiguous operator characters are parsed as one operator (e.g. '?.'). - /// - internal List CustomOperatorChars => _customOperatorChars; - - /// - /// Add a list of allowable selector characters on top of the default selector characters. - /// This can be useful to add control characters (ASCII 0-31 and 127) that are excluded by default. - /// Operator chars and selector chars must be different. - /// - public void AddCustomSelectorChars(IList characters) - { - var controlChars = ControlChars().ToList(); - - foreach (var c in characters) - { - // Explicitly disallow certain characters - if (SelectorDelimitingChars.Contains(c) || c == CharLiteralEscapeChar - || OperatorChars.Contains(c) || CustomOperatorChars.Contains(c)) - throw new ArgumentException($"Cannot add '{c}' as a custom selector character. It is disallowed or in use as an operator character."); - - if (controlChars.Contains(c)) - _customSelectorChars.Add(c); - } - } - - /// - /// Add a list of allowable operator characters on top of the standard setting. - /// Operator chars and selector chars must be different. - /// - public void AddCustomOperatorChars(IList characters) + private CharSet CreateAllowlist() { - foreach (var c in characters) - { - if (SelectorDelimitingChars.Contains(c) || CustomSelectorChars.Contains(c)) - throw new ArgumentException($"Cannot add '{c}' as a custom operator character. It is disallowed or in use as a selector."); - - if (!OperatorChars.Contains(c) && !_customOperatorChars.Contains(c)) - _customOperatorChars.Add(c); - } + var chars = new CharSet {IsAllowList = true}; + chars.AddRange(StandardAllowlist.AsSpan()); + // Add characters used as custom selector chars to the allowlist + chars.AddRange(_customSelectorChars); + return chars; } /// @@ -148,15 +132,6 @@ public void AddCustomOperatorChars(IList characters) /// internal const char FormatterNameSeparator = ':'; - /// - /// The standard operator characters. - /// Contiguous operator characters are parsed as one operator (e.g. '?.'). - /// - internal static readonly List OperatorChars = - [ - SelectorOperator, NullableOperator, AlignmentOperator, ListIndexBeginChar, ListIndexEndChar - ]; - /// /// The character which separates the selector for alignment. E.g.: Smart.Format("Name: {name,10}") /// @@ -208,9 +183,84 @@ public void AddCustomOperatorChars(IList characters) /// Characters which terminate parsing of format options. /// To use them as options, they must be escaped (preceded) by the . /// - internal static readonly List FormatOptionsTerminatorChars = + internal static readonly char[] FormatOptionsTerminatorChars = [ FormatterNameSeparator, FormatterOptionsBeginChar, FormatterOptionsEndChar, PlaceholderBeginChar, PlaceholderEndChar ]; + + /// + /// The standard operator characters. + /// Contiguous operator characters are parsed as one operator (e.g. '?.'). + /// + internal static readonly char[] OperatorChars = + [ + SelectorOperator, NullableOperator, AlignmentOperator, ListIndexBeginChar, ListIndexEndChar + ]; + + /// + /// The list of characters which are delimiting a selector. + /// + internal static readonly char[] SelectorDelimitingChars = + [ + FormatterNameSeparator, + PlaceholderBeginChar, PlaceholderEndChar, + FormatterOptionsBeginChar, FormatterOptionsEndChar + ]; + + /// + /// Gets the set of control characters (ASCII 0-31 and 127). + /// + internal static IEnumerable ControlChars() + { + for (var i = 0; i <= 31; i++) yield return (char) i; + yield return (char) 127; // delete character + } + + /// + /// Add a list of allowable selector characters on top of the default selector characters. + /// + /// When is (default), an allowlist of selector characters is used. + /// The allowlist contains alphanumeric characters (upper and lower case), plus '_' and '-'. + /// On top, any custom selector characters added with are included. + /// + /// When is , all Unicode characters are allowed in a selector, + /// except control characters (ASCII 0-31 and 127). Excluded control characters can be added back + /// using . + /// + /// Operator chars and selector chars must be different. + /// + public void AddCustomSelectorChars(IList characters) + { + var controlChars = ControlChars().ToList(); + + foreach (var c in characters) + { + // Explicitly disallow certain characters + if (SelectorDelimitingChars.Contains(c) || c == CharLiteralEscapeChar + || OperatorChars.Contains(c) || CustomOperatorChars.Contains(c)) + throw new ArgumentException($"Cannot add '{c}' as a custom selector character. It is disallowed or in use as an operator character."); + + if (controlChars.Contains(c)) + _customSelectorChars.Add(c); + + if (SelectorCharFilter == FilterType.Allowlist && !(StandardAllowlist.Contains(c) || _customSelectorChars.Contains(c))) _customSelectorChars.Add(c); + } + } + + /// + /// Add a list of allowable operator characters on top of the standard setting. + /// Operator chars and selector chars must be different. + /// + public void AddCustomOperatorChars(IList characters) + { + foreach (var c in characters) + { + if (SelectorDelimitingChars.Contains(c) || CustomSelectorChars.Contains(c)) + throw new ArgumentException($"Cannot add '{c}' as a custom operator character. It is disallowed or in use as a selector."); + + if (!OperatorChars.Contains(c) && !_customOperatorChars.Contains(c)) + _customOperatorChars.Add(c); + } + } }