diff options
| author | Jake Mannens <jake@asger.xyz> | 2026-06-05 00:37:02 +1000 |
|---|---|---|
| committer | Jake Mannens <jake@asger.xyz> | 2026-06-11 01:13:31 +1000 |
| commit | 81a0570c1b64891f286ee86d34d6f77090d525e3 (patch) | |
| tree | 1c98593ddec2eb64029f4fabe024cb5323050bb1 /Services/OcrService.cs | |
| parent | 03055cb1b262a2b9a0516ad3aa523e503edeb36b (diff) | |
Deleted server-specific files
Diffstat (limited to 'Services/OcrService.cs')
| -rw-r--r-- | Services/OcrService.cs | 128 |
1 files changed, 0 insertions, 128 deletions
diff --git a/Services/OcrService.cs b/Services/OcrService.cs deleted file mode 100644 index d43db2e..0000000 --- a/Services/OcrService.cs +++ /dev/null @@ -1,128 +0,0 @@ -using HyperBooru.Util; -using Microsoft.EntityFrameworkCore; -using System.Diagnostics; -using System.Runtime.InteropServices; -using System.Text.RegularExpressions; -using Tesseract; - -namespace HyperBooru.Services; - -public class OcrService : IHostedService { - private readonly string[] InvalidMimeTypes = [ "image/heic", "image/webp" ]; - - private readonly TimeSpan ProcessInterval = TimeSpan.FromMinutes(30); - private readonly TimeSpan StartupDelay = TimeSpan.FromSeconds(30); - - private readonly Regex SpaceRegex = new(@"[^0-9a-z]+", RegexOptions.Compiled); - - private Task? task; - private CancellationTokenSource cts = new(); - - private Timer timer; - - private IConfigService configService; - private IServiceScopeFactory scopeFactory; - private ILogger<OcrService> logger; - private IDbContextFactory<HBContext> dbFactory; - - public OcrService( - IConfigService configService, - IServiceScopeFactory scopeFactory, - ILogger<OcrService> logger, - IDbContextFactory<HBContext> dbFactory) { - - this.configService = configService; - this.scopeFactory = scopeFactory; - this.logger = logger; - this.dbFactory = dbFactory; - - timer = new((object? state) => { - if(task is not null && !task.IsCompleted) - return; - cts = new(); - task = ProcessAllAsync(cts.Token); - }); - } - - public Task StartAsync(CancellationToken ct) { - if(configService.EnableOcr) { - logger.LogInformation("Service starting..."); - timer.Change(StartupDelay, ProcessInterval); - } - - return Task.CompletedTask; - } - - public Task StopAsync(CancellationToken ct) { - logger.LogInformation("Service stopping..."); - timer.Change(Timeout.Infinite, Timeout.Infinite); - cts.Cancel(); - return Task.CompletedTask; - } - - async Task ProcessAllAsync(CancellationToken ct) { - using var scope = scopeFactory.CreateScope(); - var mediaService = scope.ServiceProvider - .GetRequiredService<IMediaService>(); - - using var db = dbFactory.CreateDbContext(); - Guid[] guids = db.Media - .AsNoTracking() - .Include(m => m.CurrentUploadedFile) - .Include(m => m.OcrData) - .Where(m => m.OcrData == null) - .Where(m => m.CurrentUploadedFile!.MimeType.Contains("image/")) - .Where(m => !InvalidMimeTypes.Contains(m.CurrentUploadedFile!.MimeType)) - .Select(m => m.Guid) - .ToArray(); - db.Dispose(); - - logger.LogInformation($"Performing OCR pass on {guids.Count()} media items"); - - var factory = new TaskFactory(new LimitedConcurrencyTaskScheduler()); - var tasks = new List<Task>(); - - var stopwatch = Stopwatch.StartNew(); - - foreach(var guid in guids) - tasks.Add(factory.StartNew(() => Process(guid, mediaService), ct)); - - await Task.WhenAll(tasks); - stopwatch.Stop(); - - var time = stopwatch.Elapsed.ToStringHumanReadable(); - logger.LogInformation( - $"Performed OCR pass on {guids.Count()} media items in {time}"); - } - - private void Process(Guid media, IMediaService mediaService) { - logger.LogDebug($"Performing OCR on media item {media}"); - - using var db = dbFactory.CreateDbContext(); - var m = db.Media - .Include(m => m.OcrData) - .First(m => m.Guid == media); - - OcrData o = m.OcrData ?? new(); - - using var engine = new TesseractEngine("tessdata", "eng", EngineMode.Default); - using var image = Pix.LoadFromFile(mediaService.GetPath(m)); - engine.SetVariable("debug_file", NullFile); - - o.Timestamp = DateTime.UtcNow; - o.Text = engine.Process(image).GetText().Trim(); - o.SearchableText = SpaceRegex.Replace(o.Text.ToLower(), " ").Trim(); - - m.OcrData = o; - db.SaveChanges(); - } - - private string NullFile { - get { - if(RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - return "NUL"; - else - return "/dev/null"; - } - } -} |
