| | 1 | | using Cysharp.Threading.Tasks; |
| | 2 | | using System.Collections.Generic; |
| | 3 | | using System.Diagnostics; |
| | 4 | | using System.Linq; |
| | 5 | | using System.Text; |
| | 6 | | using System.Text.RegularExpressions; |
| | 7 | | using System.Threading; |
| | 8 | | using System.Threading.Tasks; |
| | 9 | |
|
| | 10 | | namespace DCL.ProfanityFiltering |
| | 11 | | { |
| | 12 | | public class ThrottledRegexProfanityFilter : IProfanityFilter |
| | 13 | | { |
| | 14 | | private readonly IProfanityWordProvider wordProvider; |
| | 15 | | private readonly int partitionSize; |
| 115 | 16 | | private readonly List<Regex> regexSteps = new (); |
| | 17 | |
|
| | 18 | | /// <param name="wordProvider"></param> |
| | 19 | | /// <param name="partitionSize">Check https://github.com/decentraland/unity-renderer/issues/2201 for more info a |
| 115 | 20 | | public ThrottledRegexProfanityFilter(IProfanityWordProvider wordProvider, int partitionSize = 1) |
| | 21 | | { |
| 115 | 22 | | this.wordProvider = wordProvider; |
| 115 | 23 | | this.partitionSize = partitionSize; |
| 115 | 24 | | } |
| | 25 | |
|
| | 26 | | public void Dispose() |
| | 27 | | { |
| 115 | 28 | | regexSteps.Clear(); |
| 115 | 29 | | } |
| | 30 | |
|
| | 31 | | public void Initialize() |
| | 32 | | { |
| 115 | 33 | | List<string> explicitWords = wordProvider.GetExplicitWords().ToList(); |
| 115 | 34 | | List<string> nonExplicitWords = wordProvider.GetNonExplicitWords().ToList(); |
| | 35 | |
|
| 115 | 36 | | var explicitWordsChunks = ToChunks(explicitWords, partitionSize); |
| 115 | 37 | | var nonExplicitWordsChunks = ToChunks(nonExplicitWords, partitionSize); |
| | 38 | |
|
| 690 | 39 | | for (var i = 0; i < explicitWordsChunks.Count; i++) |
| | 40 | | { |
| 230 | 41 | | var explicitWordsRegex = ToRegex(explicitWordsChunks[i]); |
| 230 | 42 | | var regex = new Regex(@$"\b({explicitWordsRegex})\b", RegexOptions.IgnoreCase | RegexOptions.Compiled); |
| 230 | 43 | | regexSteps.Add(regex); |
| | 44 | | } |
| | 45 | |
|
| 4600 | 46 | | for (var i = 0; i < nonExplicitWordsChunks.Count; i++) |
| | 47 | | { |
| 2185 | 48 | | var nonExplicitWordsRegex = ToRegex(nonExplicitWordsChunks[i]); |
| 2185 | 49 | | var regex = new Regex(@$"\\b|({nonExplicitWordsRegex})", RegexOptions.IgnoreCase | RegexOptions.Compiled |
| 2185 | 50 | | regexSteps.Add(regex); |
| | 51 | | } |
| 115 | 52 | | } |
| | 53 | |
|
| | 54 | | public async UniTask<string> Filter(string message, CancellationToken cancellationToken = default) |
| | 55 | | { |
| 0 | 56 | | if (string.IsNullOrEmpty(message)) |
| 0 | 57 | | return message; |
| | 58 | |
|
| 0 | 59 | | var stopwatch = new Stopwatch(); |
| 0 | 60 | | stopwatch.Start(); |
| | 61 | |
|
| 0 | 62 | | foreach (Regex regexStep in regexSteps) |
| | 63 | | { |
| 0 | 64 | | await CheckTimerAndSkipFrame(stopwatch, cancellationToken); |
| 0 | 65 | | message = regexStep.Replace(message, match => new StringBuilder().Append('*', match.Value.Length).ToStri |
| 0 | 66 | | } |
| | 67 | |
|
| 0 | 68 | | return message; |
| 0 | 69 | | } |
| | 70 | |
|
| | 71 | | private async Task CheckTimerAndSkipFrame(Stopwatch stopwatch, CancellationToken cancellationToken) |
| | 72 | | { |
| 0 | 73 | | if (stopwatch.ElapsedMilliseconds > 1) |
| | 74 | | { |
| 0 | 75 | | await UniTask.WaitForEndOfFrame(cancellationToken: cancellationToken); |
| 0 | 76 | | stopwatch.Restart(); |
| | 77 | | } |
| 0 | 78 | | } |
| | 79 | |
|
| 2415 | 80 | | private string ToRegex(IEnumerable<string> words) => string.Join("|", words); |
| | 81 | |
|
| | 82 | | private List<List<T>> ToChunks<T>(List<T> source, int chunkSize) |
| | 83 | | { |
| 230 | 84 | | return source |
| | 85 | | .Select((value, index) => (index, value)) |
| | 86 | | .GroupBy(x => x.index / chunkSize) |
| | 87 | | .Select(x => x.Select(v => v.value).ToList()) |
| | 88 | | .ToList(); |
| | 89 | | } |
| | 90 | | } |
| | 91 | | } |