Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 3 additions & 7 deletions Models/Dtos/Editor/EditorAreaRequestParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@ internal static class EditorAreaRequestParser
{
private static readonly string[] SaveServerOwnedFields = { "id", "tripId", "displayOrder", "capabilities" };
private static readonly Regex FillHexRegex = new("^#[0-9a-fA-F]{6}$", RegexOptions.Compiled);
private static readonly Regex DataImageSourceRegex = new(
@"<img\b[^>]*?\bsrc\s*=\s*[""']?\s*data:image/",
RegexOptions.Compiled | RegexOptions.IgnoreCase);

/// <summary>
/// Attempts to parse a complete-draft area create request.
/// </summary>
Expand Down Expand Up @@ -46,7 +42,7 @@ public static bool TryParseCreate(JsonElement request, out EditorAreaSaveRequest

update = new EditorAreaSaveRequest(
string.IsNullOrWhiteSpace(name) ? "Area" : name!.Trim(),
notesHtml,
EditorRichNotesRequestHtml.NormalizeForPersistence(notesHtml),
string.IsNullOrWhiteSpace(fillHex) ? "#ff6600" : fillHex!.Trim().ToLowerInvariant(),
geometry!);
return true;
Expand Down Expand Up @@ -78,7 +74,7 @@ public static bool TryParseUpdate(JsonElement request, out EditorAreaSaveRequest
return false;
}

update = new EditorAreaSaveRequest(name!.Trim(), notesHtml, fillHex!.Trim().ToLowerInvariant(), geometry!);
update = new EditorAreaSaveRequest(name!.Trim(), EditorRichNotesRequestHtml.NormalizeForPersistence(notesHtml), fillHex!.Trim().ToLowerInvariant(), geometry!);
return true;
}

Expand Down Expand Up @@ -350,7 +346,7 @@ private static void ValidateFillHex(string? fillHex, Dictionary<string, string[]

private static void ValidateNotes(string? notesHtml, Dictionary<string, string[]> errors)
{
if (!string.IsNullOrEmpty(notesHtml) && DataImageSourceRegex.IsMatch(notesHtml))
if (EditorRichNotesRequestHtml.ContainsDataImageSource(notesHtml))
{
errors["notesHtml"] = new[] { "Notes cannot contain data image sources." };
}
Expand Down
11 changes: 3 additions & 8 deletions Models/Dtos/Editor/EditorPlaceRequestParser.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System.Text.Json;
using System.Text.RegularExpressions;

namespace Wayfarer.Models.Dtos.Editor;

Expand All @@ -17,10 +16,6 @@ internal static class EditorPlaceRequestParser
"capabilities"
};

private static readonly Regex DataImageSourceRegex = new(
@"<img\b[^>]*?\bsrc\s*=\s*[""']?\s*data:image/",
RegexOptions.Compiled | RegexOptions.IgnoreCase);

/// <summary>
/// Attempts to parse a complete-draft place create request.
/// </summary>
Expand Down Expand Up @@ -51,7 +46,7 @@ public static bool TryParseCreate(

update = new EditorPlaceCreateRequest(
fields.Name!,
fields.NotesHtml,
EditorRichNotesRequestHtml.NormalizeForPersistence(fields.NotesHtml),
fields.Address,
fields.Location,
fields.IconName!,
Expand Down Expand Up @@ -90,7 +85,7 @@ public static bool TryParseUpdate(
update = new EditorPlaceUpdateRequest(
regionId!.Value,
fields.Name!,
fields.NotesHtml,
EditorRichNotesRequestHtml.NormalizeForPersistence(fields.NotesHtml),
fields.Address,
fields.Location,
fields.IconName!,
Expand Down Expand Up @@ -337,7 +332,7 @@ private static void RejectPathOwnedField(JsonElement request, string field, Dict
}

private static bool ContainsDataImageSource(string? notesHtml) =>
!string.IsNullOrEmpty(notesHtml) && DataImageSourceRegex.IsMatch(notesHtml);
EditorRichNotesRequestHtml.ContainsDataImageSource(notesHtml);

private sealed record PlaceSaveFields(
string? Name,
Expand Down
9 changes: 2 additions & 7 deletions Models/Dtos/Editor/EditorRegionRequestParser.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System.Text.Json;
using System.Text.RegularExpressions;

namespace Wayfarer.Models.Dtos.Editor;

Expand All @@ -20,10 +19,6 @@ internal static class EditorRegionRequestParser
"capabilities"
};

private static readonly Regex DataImageSourceRegex = new(
@"<img\b[^>]*?\bsrc\s*=\s*[""']?\s*data:image/",
RegexOptions.Compiled | RegexOptions.IgnoreCase);

/// <summary>
/// Attempts to parse a complete-draft region save request.
/// </summary>
Expand Down Expand Up @@ -57,7 +52,7 @@ public static bool TryParseSave(
return false;
}

update = new EditorRegionSaveRequest(name!, notesHtml, coverImage, center);
update = new EditorRegionSaveRequest(name!, EditorRichNotesRequestHtml.NormalizeForPersistence(notesHtml), coverImage, center);
return true;
}

Expand Down Expand Up @@ -276,7 +271,7 @@ private static void ValidateCoverImage(string? rawUrl, Dictionary<string, string

private static void ValidateNotesHtml(string? notesHtml, Dictionary<string, string[]> errors)
{
if (!string.IsNullOrEmpty(notesHtml) && DataImageSourceRegex.IsMatch(notesHtml))
if (EditorRichNotesRequestHtml.ContainsDataImageSource(notesHtml))
{
errors["notesHtml"] = new[] { "Notes images must use external image URLs, not data:image sources." };
}
Expand Down
230 changes: 230 additions & 0 deletions Models/Dtos/Editor/EditorRichNotesRequestHtml.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
using System.Net;
using System.Text.RegularExpressions;
using AngleSharp.Dom;
using AngleSharp.Html.Parser;
using Microsoft.AspNetCore.WebUtilities;

namespace Wayfarer.Models.Dtos.Editor;

/// <summary>
/// Normalizes Trip Editor rich-notes request HTML before editor mutations persist it.
/// </summary>
internal static class EditorRichNotesRequestHtml
{
private static readonly Regex DataImageSourceRegex = new(
@"<img\b[^>]*?\bsrc\s*=\s*[""']?\s*data:image/",
RegexOptions.Compiled | RegexOptions.IgnoreCase);

private static readonly Regex ImageSourceRegex = new(
@"(?<prefix><img\b[^>]*?\bsrc\s*=\s*[""'])(?<url>[^""']+)(?<suffix>[""'])",
RegexOptions.Compiled | RegexOptions.IgnoreCase);

private static readonly HtmlParser Parser = new();
private static readonly HashSet<string> AllowedTags = new(StringComparer.OrdinalIgnoreCase)
{
"a", "blockquote", "br", "em", "h1", "h2", "h3", "h4", "h5", "h6", "img", "li", "ol", "p", "span", "strong", "u", "ul"
};

private static readonly HashSet<string> RemovedTags = new(StringComparer.OrdinalIgnoreCase)
{
"base", "button", "embed", "form", "iframe", "input", "link", "meta", "object", "option", "script", "select", "style", "textarea"
};

private static readonly HashSet<string> AllowedAlignmentClasses = new(StringComparer.Ordinal)
{
"ql-align-center", "ql-align-right"
};

private static readonly HashSet<string> AllowedFontClasses = new(StringComparer.Ordinal)
{
"ql-font-monospace", "ql-font-serif"
};

private static readonly HashSet<string> QuillBlockTags = new(StringComparer.OrdinalIgnoreCase)
{
"blockquote", "h1", "h2", "h3", "h4", "h5", "h6", "li", "p"
};

private static readonly HashSet<string> AllowedListKinds = new(StringComparer.Ordinal)
{
"bullet", "ordered"
};

/// <summary>
/// Returns true when request HTML contains a direct embedded data image source.
/// </summary>
public static bool ContainsDataImageSource(string? notesHtml) =>
!string.IsNullOrEmpty(notesHtml) && DataImageSourceRegex.IsMatch(notesHtml);

/// <summary>
/// Canonicalizes and sanitizes rich-notes HTML accepted by Trip Editor mutation requests.
/// </summary>
public static string? NormalizeForPersistence(string? notesHtml)
{
if (string.IsNullOrWhiteSpace(notesHtml))
{
return string.Empty;
}

var canonicalImages = ImageSourceRegex.Replace(notesHtml.Trim(), match =>
{
var source = CanonicalImageSource(match.Groups["url"].Value);
return $"{match.Groups["prefix"].Value}{WebUtility.HtmlEncode(source)}{match.Groups["suffix"].Value}";
});

var document = Parser.ParseDocument(canonicalImages);
var body = document.Body;
if (body == null)
{
return string.Empty;
}

foreach (var element in body.QuerySelectorAll("span.ql-ui").ToArray())
{
element.Remove();
}

foreach (var element in body.QuerySelectorAll("*").Reverse().ToArray())
{
NormalizeElement(element);
}

RemoveTrailingBlankParagraphs(body);
var html = body.InnerHtml.Trim();
return string.Equals(html, "<p><br></p>", StringComparison.OrdinalIgnoreCase)
? string.Empty
: html;
}

private static void NormalizeElement(IElement element)
{
if (RemovedTags.Contains(element.TagName))
{
element.Remove();
return;
}

if (!AllowedTags.Contains(element.TagName))
{
element.Replace(element.ChildNodes.ToArray());
return;
}

foreach (var attribute in element.Attributes.ToArray())
{
if (!IsAllowedAttribute(element, attribute))
{
element.RemoveAttribute(attribute.Name);
}
}

if (string.Equals(element.TagName, "img", StringComparison.OrdinalIgnoreCase))
{
NormalizeImage(element);
}
}

private static bool IsAllowedAttribute(IElement element, IAttr attribute)
{
var name = attribute.Name.ToLowerInvariant();
if (name == "class")
{
return NormalizeClassAttribute(element);
}

if (name == "href" && string.Equals(element.TagName, "a", StringComparison.OrdinalIgnoreCase))
{
return IsAllowedLink(attribute.Value);
}

if (name == "src" && string.Equals(element.TagName, "img", StringComparison.OrdinalIgnoreCase))
{
return true;
}

return name == "data-list"
&& string.Equals(element.TagName, "li", StringComparison.OrdinalIgnoreCase)
&& AllowedListKinds.Contains(attribute.Value);
}

private static bool NormalizeClassAttribute(IElement element)
{
var allowed = element.ClassList.Where(className => IsAllowedClass(element, className)).ToArray();
if (allowed.Length == 0)
{
return false;
}

element.SetAttribute("class", string.Join(" ", allowed));
return true;
}

private static bool IsAllowedClass(IElement element, string className) =>
(string.Equals(element.TagName, "span", StringComparison.OrdinalIgnoreCase) && AllowedFontClasses.Contains(className))
|| (QuillBlockTags.Contains(element.TagName) && AllowedAlignmentClasses.Contains(className));

private static void NormalizeImage(IElement element)
{
var source = CanonicalImageSource(element.GetAttribute("src") ?? string.Empty);
if (!IsAllowedAbsoluteHttpUrl(source))
{
element.Remove();
return;
}

element.SetAttribute("src", source);
}

private static bool IsAllowedLink(string value)
{
var compact = CompactUrlScheme(value);
return !compact.StartsWith("javascript:", StringComparison.Ordinal)
&& !compact.StartsWith("data:", StringComparison.Ordinal)
&& !compact.StartsWith("vbscript:", StringComparison.Ordinal);
}

private static bool IsAllowedAbsoluteHttpUrl(string value) =>
Uri.TryCreate(StripUrlBoundaryControls(value), UriKind.Absolute, out var uri)
&& (uri.Scheme == Uri.UriSchemeHttp || uri.Scheme == Uri.UriSchemeHttps);

private static void RemoveTrailingBlankParagraphs(IElement body)
{
while (body.LastElementChild != null
&& string.Equals(body.LastElementChild.TagName, "p", StringComparison.OrdinalIgnoreCase)
&& IsBlankParagraph(body.LastElementChild))
{
body.LastElementChild.Remove();
}
}

private static bool IsBlankParagraph(IElement element)
{
var text = (element.TextContent ?? string.Empty).Replace('\u00a0', ' ');
return string.IsNullOrWhiteSpace(text)
&& element.QuerySelector("img") == null
&& element.QuerySelector("video") == null
&& element.QuerySelector("iframe") == null;
}

private static string CanonicalImageSource(string value)
{
var trimmed = StripUrlBoundaryControls(WebUtility.HtmlDecode(value));
if (!Uri.TryCreate(trimmed, UriKind.RelativeOrAbsolute, out var uri)
|| !string.Equals(uri.IsAbsoluteUri ? uri.AbsolutePath : uri.OriginalString.Split('?')[0], "/Public/ProxyImage", StringComparison.OrdinalIgnoreCase))
{
return trimmed;
}

var query = uri.IsAbsoluteUri ? uri.Query : new Uri(new Uri("https://wayfarer.local"), uri).Query;
var values = QueryHelpers.ParseQuery(query);
return values.TryGetValue("url", out var target) && target.Count > 0
? StripUrlBoundaryControls(target[0] ?? trimmed)
: trimmed;
}

private static string StripUrlBoundaryControls(string value) =>
Regex.Replace(value, @"^[\u0000-\u0020\u007f-\u009f]+|[\u0000-\u0020\u007f-\u009f]+$", string.Empty);

private static string CompactUrlScheme(string value) =>
Regex.Replace(StripUrlBoundaryControls(value)[..Math.Min(64, StripUrlBoundaryControls(value).Length)], @"[\u0000-\u0020\u007f-\u009f]+", string.Empty).ToLowerInvariant();
}
9 changes: 2 additions & 7 deletions Models/Dtos/Editor/EditorSegmentRequestParser.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System.Text.Json;
using System.Text.RegularExpressions;
using NetTopologySuite.Geometries;
using Wayfarer.Services;

Expand All @@ -11,10 +10,6 @@ namespace Wayfarer.Models.Dtos.Editor;
internal static class EditorSegmentRequestParser
{
private static readonly string[] ServerOwnedFields = { "id", "tripId", "displayOrder", "capabilities" };
private static readonly Regex DataImageSourceRegex = new(
@"<img\b[^>]*?\bsrc\s*=\s*[""']?\s*data:image/",
RegexOptions.Compiled | RegexOptions.IgnoreCase);

/// <summary>
/// Attempts to parse a complete-draft segment save request.
/// </summary>
Expand Down Expand Up @@ -49,7 +44,7 @@ public static bool TryParseSave(JsonElement request, out EditorSegmentSaveReques
CanonicalMode(mode),
distance,
duration,
notesHtml,
EditorRichNotesRequestHtml.NormalizeForPersistence(notesHtml),
route);
return true;
}
Expand Down Expand Up @@ -288,7 +283,7 @@ private static string CanonicalMode(string? mode) =>

private static void ValidateNotes(string? notesHtml, Dictionary<string, string[]> errors)
{
if (!string.IsNullOrEmpty(notesHtml) && DataImageSourceRegex.IsMatch(notesHtml))
if (EditorRichNotesRequestHtml.ContainsDataImageSource(notesHtml))
{
errors["notesHtml"] = new[] { "Notes cannot contain data image sources." };
}
Expand Down
Loading
Loading