// Copyright Epic Games, Inc. All Rights Reserved. #include "Framework/Text/RichTextMarkupProcessing.h" #if WITH_FANCY_TEXT struct FUnescapeHelper { TArray EscapeSequences; TArray UnescapedCharacters; FUnescapeHelper() { EscapeSequences.Add(TEXT("quot")); UnescapedCharacters.Add(TEXT("\"")); EscapeSequences.Add(TEXT("lt")); UnescapedCharacters.Add(TEXT("<")); EscapeSequences.Add(TEXT("gt")); UnescapedCharacters.Add(TEXT(">")); EscapeSequences.Add(TEXT("amp")); UnescapedCharacters.Add(TEXT("&")); } } static const UnescapeHelper; struct FEscapeSequenceRegexPatternString { // Generate a regular expression pattern string that matches each of the escape sequences as alternatives, each in its own capture group. static FString Get(const TArray& EscapeSequences) { FString EscapeSequenceRegexPatternString; for (const FString& EscapeSequence : EscapeSequences) { // Add alternation operator to regex. if (!(EscapeSequenceRegexPatternString.IsEmpty())) { EscapeSequenceRegexPatternString += TEXT("|"); } // Add capture group for escape sequence. EscapeSequenceRegexPatternString += TEXT("("); EscapeSequenceRegexPatternString += TEXT("&"); EscapeSequenceRegexPatternString += EscapeSequence; EscapeSequenceRegexPatternString += TEXT(";"); EscapeSequenceRegexPatternString += TEXT(")"); } return EscapeSequenceRegexPatternString; } }; TSharedRef< FDefaultRichTextMarkupParser > FDefaultRichTextMarkupParser::Create() { return MakeShareable(new FDefaultRichTextMarkupParser()); } TSharedRef< FDefaultRichTextMarkupParser > FDefaultRichTextMarkupParser::GetStaticInstance() { static TSharedRef< FDefaultRichTextMarkupParser > Parser = MakeShareable( new FDefaultRichTextMarkupParser() ); return Parser; } FDefaultRichTextMarkupParser::FDefaultRichTextMarkupParser() : EscapeSequenceRegexPattern(FRegexPattern(FEscapeSequenceRegexPatternString::Get(UnescapeHelper.EscapeSequences))) , ElementRegexPattern( FRegexPattern(TEXT("<([\\w\\d\\.-]+)((?: (?:[\\w\\d\\.-]+=(?>\".*?\")))+)?(?:(?:/>)|(?:>(.*?)))") )) , AttributeRegexPattern( FRegexPattern(TEXT("([\\w\\d\\.]+)=(?>\"(.*?)\")")) ) { } void FDefaultRichTextMarkupParser::Process(TArray& Results, const FString& Input, FString& Output) { #if UE_ENABLE_ICU TArray LineRanges; FTextRange::CalculateLineRangesFromString(Input, LineRanges); ParseLineRanges(Input, LineRanges, Results); HandleEscapeSequences(Input, Results, Output); #else Output = TEXT("WARNING: Rich text formatting is disabled."); FTextLineParseResults FakeLineParseResults(FTextRange(0, Output.Len())); FTextRunParseResults FakeRunParseResults(TEXT(""), FTextRange(0, Output.Len())); FakeLineParseResults.Runs.Add(FakeRunParseResults); Results.Add(FakeLineParseResults); #endif } void FDefaultRichTextMarkupParser::ParseLineRanges(const FString& Input, const TArray& LineRanges, TArray& LineParseResultsArray) const { if (LineRanges.IsEmpty()) { return; } // Special regular expression pattern for matching rich text markup elements. IE: Content FRegexMatcher ElementRegexMatcher(ElementRegexPattern, Input); // Lazily constructed attribute parser TOptional LazyAttributeRegexMatcher; // Parse line ranges, creating line parse results and run parse results. for(int32 i = 0; i < LineRanges.Num(); ++i) { FTextLineParseResults LineParseResults; LineParseResults.Range = LineRanges[i]; // Limit the element regex matcher to the current line. ElementRegexMatcher.SetLimits(LineParseResults.Range.BeginIndex, LineParseResults.Range.EndIndex); // Iterate through the line, each time trying to find a match for the element regex, adding it as a run and any intervening text as another run. int32 LastRunEnd = LineParseResults.Range.BeginIndex; while(ElementRegexMatcher.FindNext()) { int32 ElementBegin = ElementRegexMatcher.GetMatchBeginning(); int32 ElementEnd = ElementRegexMatcher.GetMatchEnding(); FTextRange OriginalRange(ElementBegin, ElementEnd); // Capture Group 1 is the element name. int32 ElementNameBegin = ElementRegexMatcher.GetCaptureGroupBeginning(1); int32 ElementNameEnd = ElementRegexMatcher.GetCaptureGroupEnding(1); // Name FString ElementName = Input.Mid(ElementNameBegin, ElementNameEnd - ElementNameBegin); // Capture Group 2 is the attribute list. int32 AttributeListBegin = ElementRegexMatcher.GetCaptureGroupBeginning(2); int32 AttributeListEnd = ElementRegexMatcher.GetCaptureGroupEnding(2); // Capture Group 3 is the content. int32 ElementContentBegin = ElementRegexMatcher.GetCaptureGroupBeginning(3); int32 ElementContentEnd = ElementRegexMatcher.GetCaptureGroupEnding(3); FTextRange ContentRange(ElementContentBegin, ElementContentEnd); TMap Attributes; if(AttributeListBegin != INDEX_NONE && AttributeListEnd != INDEX_NONE) { if (!LazyAttributeRegexMatcher.IsSet()) { LazyAttributeRegexMatcher.Emplace(AttributeRegexPattern, Input); } FRegexMatcher& AttributeRegexMatcher = *LazyAttributeRegexMatcher; AttributeRegexMatcher.SetLimits(AttributeListBegin, AttributeListEnd); // Iterate through the attribute list, each time trying to find a match for the attribute regex. while(AttributeRegexMatcher.FindNext()) { // Capture Group 1 is the attribute key. int32 AttributeKeyBegin = AttributeRegexMatcher.GetCaptureGroupBeginning(1); int32 AttributeKeyEnd = AttributeRegexMatcher.GetCaptureGroupEnding(1); // Capture Group 2 is the attribute value. int32 AttributeValueBegin = AttributeRegexMatcher.GetCaptureGroupBeginning(2); int32 AttributeValueEnd = AttributeRegexMatcher.GetCaptureGroupEnding(2); // Attribute Attributes.Add(Input.Mid(AttributeKeyBegin, AttributeKeyEnd - AttributeKeyBegin), FTextRange(AttributeValueBegin, AttributeValueEnd)); } } // Add intervening run to line. FTextRange InterveningRunRange(LastRunEnd, ElementBegin); if(!InterveningRunRange.IsEmpty()) { FTextRunParseResults InterveningRunParseResults(FString(), InterveningRunRange); LastRunEnd = ElementBegin; LineParseResults.Runs.Add(InterveningRunParseResults); } // Add element run to line. FTextRunParseResults RunParseResults(ElementName, OriginalRange, ContentRange); RunParseResults.MetaData = Attributes; LineParseResults.Runs.Add(RunParseResults); LastRunEnd = ElementEnd; } // Add dangling run to line. FTextRange InterveningRunRange(LastRunEnd, LineParseResults.Range.EndIndex); if(!InterveningRunRange.IsEmpty()) { FTextRunParseResults InterveningRunParseResults(FString(), InterveningRunRange); LastRunEnd = LineParseResults.Range.EndIndex; LineParseResults.Runs.Add(InterveningRunParseResults); } // Add blank, empty run if none are present. if(LineParseResults.Runs.Num() == 0) { FTextRunParseResults EmptyRunParseResults(FString(), LineParseResults.Range); LastRunEnd = EmptyRunParseResults.OriginalRange.EndIndex; LineParseResults.Runs.Add(EmptyRunParseResults); } LineParseResultsArray.Add(LineParseResults); } } void FDefaultRichTextMarkupParser::HandleEscapeSequences(const FString& Input, TArray& LineParseResultsArray, FString& ConcatenatedUnescapedLines) const { if (LineParseResultsArray.IsEmpty()) { return; } FRegexMatcher EscapeSequenceRegexMatcher(EscapeSequenceRegexPattern, Input); // Modify original string to handle escape sequences that need to be replaced while updating run ranges. for(int32 i = 0; i < LineParseResultsArray.Num(); ++i) { FTextLineParseResults& LineParseResults = LineParseResultsArray[i]; // Adjust begin indices for previous substitutions. LineParseResults.Range.BeginIndex = ConcatenatedUnescapedLines.Len(); for(int32 j = 0; j < LineParseResults.Runs.Num(); ++j) { FTextRunParseResults& RunParseResults = LineParseResults.Runs[j]; TArray IndicesToUpdate; IndicesToUpdate.Add(&RunParseResults.OriginalRange.BeginIndex); for(TPair& Pair : RunParseResults.MetaData) { IndicesToUpdate.Add(&Pair.Value.BeginIndex); IndicesToUpdate.Add(&Pair.Value.EndIndex); } if(RunParseResults.ContentRange.BeginIndex != INDEX_NONE && RunParseResults.ContentRange.EndIndex != INDEX_NONE) { IndicesToUpdate.Add(&RunParseResults.ContentRange.BeginIndex); IndicesToUpdate.Add(&RunParseResults.ContentRange.EndIndex); } IndicesToUpdate.Add(&RunParseResults.OriginalRange.EndIndex); { const auto GetUnescapedString = [&]() { int32 LastCopiedIndex = EscapeSequenceRegexMatcher.GetBeginLimit(); while(EscapeSequenceRegexMatcher.FindNext()) { // Copy intervening characters between the end of the last copy and the beginning of this match. ConcatenatedUnescapedLines += Input.Mid(LastCopiedIndex, EscapeSequenceRegexMatcher.GetMatchBeginning() - LastCopiedIndex); LastCopiedIndex = EscapeSequenceRegexMatcher.GetMatchBeginning(); // Identify which escape sequence was captured based on which capture group has a valid range. for(int32 k = 0; k < UnescapeHelper.EscapeSequences.Num(); ++k) { const int32 GroupOrdinal = 1 + k; // Groups are accessed by ordinal, not index. const int32 EscapeSequenceBeginIndex = EscapeSequenceRegexMatcher.GetCaptureGroupBeginning(GroupOrdinal); const int32 EscapeSequenceEndIndex = EscapeSequenceRegexMatcher.GetCaptureGroupEnding(GroupOrdinal); // Lookup and copy unescaped character in place of the escape sequence. if(EscapeSequenceBeginIndex != INDEX_NONE && EscapeSequenceEndIndex != INDEX_NONE) { ConcatenatedUnescapedLines += UnescapeHelper.UnescapedCharacters[k]; break; } } LastCopiedIndex = EscapeSequenceRegexMatcher.GetMatchEnding(); } // Copy intervening characters between the end of the last copy and the end of the run. ConcatenatedUnescapedLines += Input.Mid(LastCopiedIndex, EscapeSequenceRegexMatcher.GetEndLimit() - LastCopiedIndex); }; int32 k; for(k = 0; k + 1 < IndicesToUpdate.Num(); ++k) { EscapeSequenceRegexMatcher.SetLimits(*(IndicesToUpdate[k]), *(IndicesToUpdate[k + 1])); *(IndicesToUpdate[k]) = ConcatenatedUnescapedLines.Len(); GetUnescapedString(); } *(IndicesToUpdate[k]) = ConcatenatedUnescapedLines.Len(); } } // Adjust end indices for previous substitutions. LineParseResults.Range.EndIndex = ConcatenatedUnescapedLines.Len(); } } TSharedRef< FDefaultRichTextMarkupWriter > FDefaultRichTextMarkupWriter::Create() { return MakeShareable( new FDefaultRichTextMarkupWriter() ); } TSharedRef< FDefaultRichTextMarkupWriter > FDefaultRichTextMarkupWriter::GetStaticInstance() { static TSharedRef< FDefaultRichTextMarkupWriter > Writer = MakeShareable(new FDefaultRichTextMarkupWriter()); return Writer; } void FDefaultRichTextMarkupWriter::Write(const TArray& InLines, FString& Output) { for (int32 LineIndex = 0; LineIndex < InLines.Num(); ++LineIndex) { const FRichTextLine& Line = InLines[LineIndex]; // Append line terminator to the end of the previous line if(LineIndex > 0) { Output += LINE_TERMINATOR; } for (const FRichTextRun& Run : Line.Runs) { // Our rich-text format takes the form of The Text const bool bHasTag = !Run.Info.Name.IsEmpty(); if (bHasTag) { Output.AppendChar('<'); Output.Append(Run.Info.Name); for(const TPair& MetaDataEntry : Run.Info.MetaData) { Output.AppendChar(' '); Output.Append(MetaDataEntry.Key); Output.AppendChar('='); Output.AppendChar('"'); Output.Append(MetaDataEntry.Value); Output.AppendChar('"'); } Output.AppendChar('>'); } FString RunText = Run.Text; EscapeText(RunText); Output.Append(RunText); if (bHasTag) { Output.Append(TEXT("")); } } } } void FDefaultRichTextMarkupWriter::EscapeText(FString& TextToEscape) { // List of characters that we have to escape to avoid accidental rich-text formatting static const TPair EscapeCharacters[] = { TPair('&', "&"), TPair('"', """), TPair('<', "<"), TPair('>', ">"), }; // First count the extra space needed int32 EscapedStringLen = TextToEscape.Len(); for (int32 TextIndex = 0; TextIndex < TextToEscape.Len(); ++TextIndex) { const TCHAR CharToEscape = TextToEscape[TextIndex]; for (const auto& EscapeCharacter : EscapeCharacters) { if (CharToEscape == EscapeCharacter.Key) { EscapedStringLen += (EscapeCharacter.Value.Len() - 1); break; } } } if (EscapedStringLen == TextToEscape.Len()) { return; } TextToEscape.Reserve(EscapedStringLen); // Then perform the actual escape (backwards to make iteration simpler) for (int32 TextIndex = TextToEscape.Len() - 1; TextIndex >= 0; --TextIndex) { const TCHAR CharToEscape = TextToEscape[TextIndex]; for (const auto& EscapeCharacter : EscapeCharacters) { if (CharToEscape == EscapeCharacter.Key) { TextToEscape.RemoveAt(TextIndex, EAllowShrinking::No); TextToEscape.InsertAt(TextIndex, EscapeCharacter.Value); break; } } } } #endif //WITH_FANCY_TEXT