summaryrefslogtreecommitdiff
path: root/Services/OcrService.cs
diff options
context:
space:
mode:
authorJake Mannens <jake@asger.xyz>2026-06-05 00:37:02 +1000
committerJake Mannens <jake@asger.xyz>2026-06-11 01:13:31 +1000
commit81a0570c1b64891f286ee86d34d6f77090d525e3 (patch)
tree1c98593ddec2eb64029f4fabe024cb5323050bb1 /Services/OcrService.cs
parent03055cb1b262a2b9a0516ad3aa523e503edeb36b (diff)
Deleted server-specific files
Diffstat (limited to 'Services/OcrService.cs')
-rw-r--r--Services/OcrService.cs128
1 files changed, 0 insertions, 128 deletions
diff --git a/Services/OcrService.cs b/Services/OcrService.cs
deleted file mode 100644
index d43db2e..0000000
--- a/Services/OcrService.cs
+++ /dev/null
@@ -1,128 +0,0 @@
-using HyperBooru.Util;
-using Microsoft.EntityFrameworkCore;
-using System.Diagnostics;
-using System.Runtime.InteropServices;
-using System.Text.RegularExpressions;
-using Tesseract;
-
-namespace HyperBooru.Services;
-
-public class OcrService : IHostedService {
- private readonly string[] InvalidMimeTypes = [ "image/heic", "image/webp" ];
-
- private readonly TimeSpan ProcessInterval = TimeSpan.FromMinutes(30);
- private readonly TimeSpan StartupDelay = TimeSpan.FromSeconds(30);
-
- private readonly Regex SpaceRegex = new(@"[^0-9a-z]+", RegexOptions.Compiled);
-
- private Task? task;
- private CancellationTokenSource cts = new();
-
- private Timer timer;
-
- private IConfigService configService;
- private IServiceScopeFactory scopeFactory;
- private ILogger<OcrService> logger;
- private IDbContextFactory<HBContext> dbFactory;
-
- public OcrService(
- IConfigService configService,
- IServiceScopeFactory scopeFactory,
- ILogger<OcrService> logger,
- IDbContextFactory<HBContext> dbFactory) {
-
- this.configService = configService;
- this.scopeFactory = scopeFactory;
- this.logger = logger;
- this.dbFactory = dbFactory;
-
- timer = new((object? state) => {
- if(task is not null && !task.IsCompleted)
- return;
- cts = new();
- task = ProcessAllAsync(cts.Token);
- });
- }
-
- public Task StartAsync(CancellationToken ct) {
- if(configService.EnableOcr) {
- logger.LogInformation("Service starting...");
- timer.Change(StartupDelay, ProcessInterval);
- }
-
- return Task.CompletedTask;
- }
-
- public Task StopAsync(CancellationToken ct) {
- logger.LogInformation("Service stopping...");
- timer.Change(Timeout.Infinite, Timeout.Infinite);
- cts.Cancel();
- return Task.CompletedTask;
- }
-
- async Task ProcessAllAsync(CancellationToken ct) {
- using var scope = scopeFactory.CreateScope();
- var mediaService = scope.ServiceProvider
- .GetRequiredService<IMediaService>();
-
- using var db = dbFactory.CreateDbContext();
- Guid[] guids = db.Media
- .AsNoTracking()
- .Include(m => m.CurrentUploadedFile)
- .Include(m => m.OcrData)
- .Where(m => m.OcrData == null)
- .Where(m => m.CurrentUploadedFile!.MimeType.Contains("image/"))
- .Where(m => !InvalidMimeTypes.Contains(m.CurrentUploadedFile!.MimeType))
- .Select(m => m.Guid)
- .ToArray();
- db.Dispose();
-
- logger.LogInformation($"Performing OCR pass on {guids.Count()} media items");
-
- var factory = new TaskFactory(new LimitedConcurrencyTaskScheduler());
- var tasks = new List<Task>();
-
- var stopwatch = Stopwatch.StartNew();
-
- foreach(var guid in guids)
- tasks.Add(factory.StartNew(() => Process(guid, mediaService), ct));
-
- await Task.WhenAll(tasks);
- stopwatch.Stop();
-
- var time = stopwatch.Elapsed.ToStringHumanReadable();
- logger.LogInformation(
- $"Performed OCR pass on {guids.Count()} media items in {time}");
- }
-
- private void Process(Guid media, IMediaService mediaService) {
- logger.LogDebug($"Performing OCR on media item {media}");
-
- using var db = dbFactory.CreateDbContext();
- var m = db.Media
- .Include(m => m.OcrData)
- .First(m => m.Guid == media);
-
- OcrData o = m.OcrData ?? new();
-
- using var engine = new TesseractEngine("tessdata", "eng", EngineMode.Default);
- using var image = Pix.LoadFromFile(mediaService.GetPath(m));
- engine.SetVariable("debug_file", NullFile);
-
- o.Timestamp = DateTime.UtcNow;
- o.Text = engine.Process(image).GetText().Trim();
- o.SearchableText = SpaceRegex.Replace(o.Text.ToLower(), " ").Trim();
-
- m.OcrData = o;
- db.SaveChanges();
- }
-
- private string NullFile {
- get {
- if(RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
- return "NUL";
- else
- return "/dev/null";
- }
- }
-}