summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJake Mannens <jake@asger.xyz>2023-09-01 13:03:57 +1000
committerJake Mannens <jake@asger.xyz>2025-08-20 00:48:44 +1000
commitb286a0b0f1fcdb511d2dbb8886039cfb0182c89b (patch)
tree83d8d8ec7a575c36f22bfab01fa2708881c3f086
parent5b93706343484914370a87fddea29874c8156321 (diff)
Merged OCR functionality
-rw-r--r--Controllers/MediaController.cs3
-rw-r--r--HBContext.cs1
-rw-r--r--Media.cs15
-rw-r--r--Pages/TagDefinitions.razor7
-rw-r--r--Pages/ViewMedia.razor26
-rw-r--r--Program.cs3
-rw-r--r--Server.csproj2
-rw-r--r--Services/OcrService.cs117
-rw-r--r--Services/SearchService.cs60
-rw-r--r--Util.cs99
-rw-r--r--appsettings.Development.json3
-rw-r--r--tessdata/eng.traineddatabin0 -> 15400601 bytes
-rw-r--r--wwwroot/styles/global.css11
13 files changed, 321 insertions, 26 deletions
diff --git a/Controllers/MediaController.cs b/Controllers/MediaController.cs
index 85dfc65..8070199 100644
--- a/Controllers/MediaController.cs
+++ b/Controllers/MediaController.cs
@@ -1,9 +1,6 @@
using HyperBooru.Services;
using HyperBooru.Util;
-using ImageMagick;
using Microsoft.AspNetCore.Mvc;
-using MimeDetective;
-using System.Security.Cryptography;
namespace HyperBooru.Controllers;
diff --git a/HBContext.cs b/HBContext.cs
index f6bc15c..15dad6d 100644
--- a/HBContext.cs
+++ b/HBContext.cs
@@ -15,6 +15,7 @@ public class HBContext : DbContext {
public DbSet<Tag> Tags { get; set; }
public DbSet<Media> Media { get; set; }
public DbSet<UploadedFile> UploadedFiles { get; set; }
+ public DbSet<OcrData> OcrData { get; set; }
private IConfigService config;
diff --git a/Media.cs b/Media.cs
index e2598a9..2a4dab6 100644
--- a/Media.cs
+++ b/Media.cs
@@ -13,6 +13,7 @@ public class Media : HBObject {
public string? LongDescription { get; set; }
public int Width { get; set; }
public int Height { get; set; }
+ public virtual OcrData? OcrData { get; set; }
public virtual List<UploadedFile> UploadedFiles { get; set; } = new();
public bool IsIngest => Tags
@@ -26,7 +27,7 @@ public class Media : HBObject {
return UploadedFiles
.OrderBy(f => f.UploadTime)
- .First()?.Filename;
+ .First()?.Filename ?? Guid.ToString().ToUpper();
}
}
}
@@ -40,4 +41,16 @@ public class UploadedFile : HBObject {
public DateTime? LastWriteTime { get; set; }
public DateTime? CreateTime { get; set; }
public virtual Media Media { get; set; }
+}
+
+public class OcrData {
+ [Key]
+ [DatabaseGenerated(DatabaseGeneratedOption.Identity)]
+ public int OcrDataId { get; set; }
+ [ForeignKey("ObjectId")]
+ public int MediaId { get; set; }
+ public string Text { get; set; }
+ public string SearchableText { get; set; }
+ public DateTime Timestamp { get; set; }
+ public virtual Media Media { get; set; }
} \ No newline at end of file
diff --git a/Pages/TagDefinitions.razor b/Pages/TagDefinitions.razor
index f5339e7..f32e803 100644
--- a/Pages/TagDefinitions.razor
+++ b/Pages/TagDefinitions.razor
@@ -43,12 +43,7 @@
@(", ")
}
}
-
-@* @(string.Join(", ", tagDef.ImplicitTags
- .Where(it => it.Source == TagSource.UserTag)
- .Select(it => it.Name)
- .Order()))
-*@ </i>
+ </i>
</td>
<td class="actions">
<a href="javascript:;" @onclick=@(() => PromptToEdit(tagDef))>Edit</a>
diff --git a/Pages/ViewMedia.razor b/Pages/ViewMedia.razor
index bb6a207..eb49b15 100644
--- a/Pages/ViewMedia.razor
+++ b/Pages/ViewMedia.razor
@@ -62,17 +62,18 @@
<ButtonContainer>
<button @onclick=@(() => deleteDialog.Show()) class="warning">Delete</button>
<button @onclick=@(() => tagDialog.Show()) class="secondary">Add Tag</button>
- @if(media.IsIngest) {
- <button @onclick=@(() => SetIngest(false))>Mark Tagging Complete</button>
- } else {
- <button class="secondary" @onclick=@(() => SetIngest(true))>Mark Tagging Incomplete</button>
- }
+ <button @onclick=@(() => ocrDialog.Show()) class="secondary">View OCR</button>
@if(infoEditMode) {
<button @onclick=@(() => ApplyInfoEdit(false)) class="secondary">Cancel</button>
<button @onclick=@(() => ApplyInfoEdit(true))>Apply</button>
} else {
<button @onclick=@(() => InfoEditMode = true) class="secondary">Edit Info</button>
}
+ @if(media.IsIngest) {
+ <button @onclick=@(() => SetIngest(false))>Mark Tagging Complete</button>
+ } else {
+ <button class="secondary" @onclick=@(() => SetIngest(true))>Mark Tagging Incomplete</button>
+ }
</ButtonContainer>
</div>
</div>
@@ -85,6 +86,17 @@
</ButtonContainer>
</Dialog>
+<Dialog Title="OCR Data" @ref=ocrDialog>
+ @if(media.OcrData is null) {
+ <p><center>This media item hasn't been scanned yet!</center></p>
+ } else {
+ <code style="max-height:400px;">@media.OcrData?.Text</code>
+ }
+ <ButtonContainer>
+ <button @onclick=@(() => ocrDialog.Hide())>Close</button>
+ </ButtonContainer>
+</Dialog>
+
<TagSelectDialog
Title="Select one or more tag(s) to add"
OnSubmit=AddTags
@@ -103,9 +115,10 @@
private string? shortDescription;
private string? longDescription;
+ private MediaTagTable mediaTagTable;
private Dialog deleteDialog;
+ private Dialog ocrDialog;
private TagSelectDialog tagDialog;
- private MediaTagTable mediaTagTable;
private HBContext db;
@@ -119,6 +132,7 @@
.Include(m => m.Tags)
.ThenInclude(t => t.TagDefinition)
.Include(m => m.UploadedFiles)
+ .Include(m => m.OcrData)
.First(m => m.Guid == MediaId);
title = media.DisplayName ?? "Media View";
diff --git a/Program.cs b/Program.cs
index 90375b0..564ab30 100644
--- a/Program.cs
+++ b/Program.cs
@@ -14,13 +14,14 @@ public class Program {
builder.Services.AddRazorPages();
builder.Services.AddServerSideBlazor();
- // Add out custom services
+ // Add our custom services
builder.Services.AddSingleton<IConfigService, ConfigService>();
builder.Services.AddDbContextFactory<HBContext>();
builder.Services.AddScoped<ISearchService, SearchService>();
builder.Services.AddScoped<ITagService, TagService>();
builder.Services.AddScoped<IMediaService, MediaService>();
builder.Services.AddSingleton<IUserService, UserService>();
+ builder.Services.AddHostedService<OcrService>();
var app = builder.Build();
diff --git a/Server.csproj b/Server.csproj
index fab6521..c14aa24 100644
--- a/Server.csproj
+++ b/Server.csproj
@@ -30,6 +30,8 @@
<PackageReference Include="Mime-Detective" Version="23.6.1" />
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="7.0.4" />
<PackageReference Include="Swashbuckle.AspNetCore" Version="6.5.0" />
+ <PackageReference Include="System.Drawing.Common" Version="7.0.0" />
+ <PackageReference Include="Tesseract" Version="5.2.0" />
</ItemGroup>
</Project>
diff --git a/Services/OcrService.cs b/Services/OcrService.cs
new file mode 100644
index 0000000..2f65e43
--- /dev/null
+++ b/Services/OcrService.cs
@@ -0,0 +1,117 @@
+using HyperBooru.Util;
+using Microsoft.EntityFrameworkCore;
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+using System.Text.RegularExpressions;
+using Tesseract;
+
+namespace HyperBooru.Services;
+
+public class OcrService : IHostedService {
+ private readonly TimeSpan ProcessInterval = TimeSpan.FromMinutes(30);
+ private readonly TimeSpan StartupDelay = TimeSpan.FromSeconds(30);
+
+ private readonly Regex SpaceRegex = new(@"[^0-9a-z]+", RegexOptions.Compiled);
+
+ private Task? task;
+ private CancellationTokenSource cts = new();
+
+ private Timer timer;
+
+ private IServiceScopeFactory scopeFactory;
+ private ILogger<OcrService> logger;
+ private IDbContextFactory<HBContext> dbFactory;
+
+ public OcrService(
+ IServiceScopeFactory scopeFactory,
+ ILogger<OcrService> logger,
+ IDbContextFactory<HBContext> dbFactory) {
+
+ this.scopeFactory = scopeFactory;
+ this.logger = logger;
+ this.dbFactory = dbFactory;
+
+ timer = new((object? state) => {
+ if(task is not null && !task.IsCompleted)
+ return;
+ cts = new();
+ task = ProcessAllAsync(cts.Token);
+ });
+ }
+
+ public Task StartAsync(CancellationToken ct) {
+ logger.LogInformation("Service starting...");
+ timer.Change(StartupDelay, ProcessInterval);
+ return Task.CompletedTask;
+ }
+
+ public Task StopAsync(CancellationToken ct) {
+ logger.LogInformation("Service stopping...");
+ timer.Change(Timeout.Infinite, Timeout.Infinite);
+ cts.Cancel();
+ return Task.CompletedTask;
+ }
+
+ async Task ProcessAllAsync(CancellationToken ct) {
+ using var scope = scopeFactory.CreateScope();
+ var mediaService = scope.ServiceProvider
+ .GetRequiredService<IMediaService>();
+
+ using var db = dbFactory.CreateDbContext();
+ Guid[] guids = db.Media
+ .Include(m => m.OcrData)
+ .Where(m => m.OcrData == null)
+ .Where(m => m.MimeType.Contains("image/"))
+ .Select(m => m.Guid)
+ .ToArray();
+ db.Dispose();
+
+ logger.LogInformation($"Performing OCR pass on {guids.Count()} media items");
+
+ var factory = new TaskFactory(new LimitedConcurrencyTaskScheduler());
+ var tasks = new List<Task>();
+
+ var stopwatch = Stopwatch.StartNew();
+
+ foreach(var guid in guids)
+ tasks.Add(factory.StartNew(() => Process(guid, mediaService), ct));
+
+ await Task.WhenAll(tasks);
+ stopwatch.Stop();
+
+ var time = stopwatch.Elapsed.ToStringHumanReadable();
+ logger.LogInformation(
+ $"Performed OCR pass on {guids.Count()} media items in {time}");
+ }
+
+ private void Process(Guid media, IMediaService mediaService) {
+ logger.LogDebug($"Performing OCR on media item {media}");
+
+ using var db = dbFactory.CreateDbContext();
+ var m = db.Media
+ .Include(m => m.OcrData)
+ .First(m => m.Guid == media);
+
+ OcrData o = m.OcrData ?? new();
+
+ using var engine = new TesseractEngine("tessdata", "eng", EngineMode.Default);
+ using var image = Pix.LoadFromFile(mediaService.GetPath(m));
+ engine.SetVariable("debug_file", NullFile);
+
+ o.Timestamp = DateTime.UtcNow;
+ o.Text = engine.Process(image).GetText().Trim();
+ o.SearchableText = SpaceRegex.Replace(o.Text.ToLower(), " ").Trim();
+
+ m.OcrData = o;
+ db.SaveChanges();
+ }
+
+ private string NullFile {
+ get {
+ if(RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
+ return "NUL";
+ else
+ return "/dev/null";
+ }
+ }
+}
diff --git a/Services/SearchService.cs b/Services/SearchService.cs
index e8e497d..bb2963d 100644
--- a/Services/SearchService.cs
+++ b/Services/SearchService.cs
@@ -24,33 +24,77 @@ public class SearchService : ISearchService {
query = query.ToLower();
+ int[] descriptionResults = SearchDescription(query);
+ int[] ocrResults = SearchOcr(query);
+
var matchedTag = db.TagDefinitions
.FirstOrDefault(td => td.Name.ToLower() == query);
int[] tags;
-
if(matchedTag is not null) {
tags = tagService
.TagsThatImply(matchedTag)
.Select(td => td.ObjectId)
.ToArray();
} else {
- // TODO: expand scope to all tags that imply
+ // TODO: Expand scope to all tags that imply
tags = db.TagDefinitions
.Where(td => td.Name.ToLower().Contains(query))
.Select(td => td.ObjectId)
.ToArray();
}
+ int[] tagResults = SearchTags(tags);
+
+ int[] mediaIds = descriptionResults
+ .Union(ocrResults)
+ .Union(tagResults)
+ .OrderDescending()
+ .ToArray();
+
return db.Media
.Include(m => m.Tags)
- .AsEnumerable()
- .Where(m => m.Tags.IntersectBy(tags, t => t.TagDefinitionId).Any())
- .Concat(db.Media
+ .Where(m => mediaIds.Contains(m.ObjectId))
+ .ToArray();
+ }
+
+ // TODO: Make asynchronous
+ private int[] SearchTags(int[] tags) {
+ return Task.Run(() => {
+ using var db = dbFactory.CreateDbContext();
+ return db.Media
+ .Include(m => m.Tags)
+ .AsEnumerable()
+ .Where(m => m.Tags.IntersectBy(tags, t => t.TagDefinitionId).Any())
+ .Select(m => m.ObjectId)
+ .ToArray();
+ }).GetAwaiter().GetResult();
+ }
+
+ // TODO: Make asynchronous
+ private int[] SearchDescription(string query) {
+ return Task.Run(() => {
+ using var db = dbFactory.CreateDbContext();
+ query = query.ToLower();
+ return db.Media
.Where(m =>
(m.ShortDescription != null && m.ShortDescription.ToLower().Contains(query)) ||
- (m.LongDescription != null && m.LongDescription.ToLower().Contains(query))))
- .DistinctBy(m => m.ObjectId)
- .ToArray();
+ (m.LongDescription != null && m.LongDescription.ToLower().Contains(query)))
+ .Select(m => m.ObjectId)
+ .ToArray();
+ }).GetAwaiter().GetResult();
+ }
+
+ // TODO: Make asynchronous
+ private int[] SearchOcr(string query) {
+ return Task.Run(() => {
+ using var db = dbFactory.CreateDbContext();
+ query = query.ToLower();
+ return db.OcrData
+ .Include(o => o.Media)
+ .Where(o => o.SearchableText.Contains(query))
+ .Select(o => o.Media.ObjectId)
+ .ToArray();
+ }).GetAwaiter().GetResult();
}
}
diff --git a/Util.cs b/Util.cs
index 31a2e84..6af6c81 100644
--- a/Util.cs
+++ b/Util.cs
@@ -18,4 +18,103 @@ public static class Extensions {
double n = x / Math.Pow(10, exp / 3 * 3);
return $"{Math.Round(n, 2 - (exp % 3))} {suffix}B";
}
+
+ public static string ToStringHumanReadable(this TimeSpan t) {
+ if(t.TotalMilliseconds < 1000)
+ return string.Format("{0:0}ms", t.TotalMilliseconds);
+ if(t.TotalSeconds < 60)
+ return string.Format("{0:0.00}s", t.TotalSeconds);
+ if(t.TotalMinutes < 60)
+ return string.Format("{0:0}m{0:0}s", t.TotalMinutes, t.Seconds);
+ if(t.TotalHours < 24)
+ return string.Format("{0:0}h{0:0}m", t.TotalHours, t.Minutes);
+ return string.Format("{0:0.00}d", t.TotalDays);
+ }
+}
+
+public class LimitedConcurrencyTaskScheduler : TaskScheduler {
+ public sealed override int MaximumConcurrencyLevel =>
+ maxConcurrency;
+
+ private int maxConcurrency;
+
+ [ThreadStatic]
+ private static bool threadIsProcessingItems;
+
+ private readonly LinkedList<Task> tasks = new();
+
+ private int delegatesQueuedOrRunning = 0;
+
+ public LimitedConcurrencyTaskScheduler() {
+ maxConcurrency = Environment.ProcessorCount;
+ }
+
+ public LimitedConcurrencyTaskScheduler(int maxConcurrency) {
+ if(maxConcurrency < 1)
+ throw new ArgumentOutOfRangeException("maxConcurrency must be greater than 0");
+ this.maxConcurrency = (int) maxConcurrency;
+ }
+
+ protected sealed override void QueueTask(Task task) {
+ lock(tasks) {
+ tasks.AddLast(task);
+ if(delegatesQueuedOrRunning < maxConcurrency) {
+ delegatesQueuedOrRunning++;
+ NotifyThreadPoolOfPendingWork();
+ }
+ }
+ }
+
+ private void NotifyThreadPoolOfPendingWork() {
+ ThreadPool.UnsafeQueueUserWorkItem(_ => {
+ threadIsProcessingItems = true;
+ try {
+ while(true) {
+ Task item;
+ lock(tasks) {
+ if(tasks.Count == 0) {
+ delegatesQueuedOrRunning--;
+ break;
+ } else {
+ item = tasks.First.Value;
+ tasks.RemoveFirst();
+ }
+ }
+ TryExecuteTask(item);
+ }
+ } finally {
+ threadIsProcessingItems = false;
+ }
+ }, null);
+ }
+
+ protected sealed override bool TryExecuteTaskInline(Task task, bool taskWasPreviouslyQueued) {
+ if(!threadIsProcessingItems)
+ return false;
+
+ if(taskWasPreviouslyQueued)
+ return TryDequeue(task) ? TryExecuteTask(task) : false;
+ else
+ return TryExecuteTask(task);
+ }
+
+ protected sealed override bool TryDequeue(Task task) {
+ lock(tasks) {
+ return tasks.Remove(task);
+ }
+ }
+
+ protected sealed override IEnumerable<Task> GetScheduledTasks() {
+ bool lockTaken = false;
+ try {
+ Monitor.TryEnter(tasks, ref lockTaken);
+ if(lockTaken)
+ return tasks;
+ else
+ throw new NotSupportedException();
+ } finally {
+ if(lockTaken)
+ Monitor.Exit(tasks);
+ }
+ }
}
diff --git a/appsettings.Development.json b/appsettings.Development.json
index 770d3e9..6860045 100644
--- a/appsettings.Development.json
+++ b/appsettings.Development.json
@@ -3,7 +3,8 @@
"Logging": {
"LogLevel": {
"Default": "Information",
- "Microsoft.AspNetCore": "Warning"
+ "Microsoft.AspNetCore": "Warning",
+ "HyperBooru.Services.OcrService": "Debug"
}
}
}
diff --git a/tessdata/eng.traineddata b/tessdata/eng.traineddata
new file mode 100644
index 0000000..176dc32
--- /dev/null
+++ b/tessdata/eng.traineddata
Binary files differ
diff --git a/wwwroot/styles/global.css b/wwwroot/styles/global.css
index c0dbe3f..b694fe4 100644
--- a/wwwroot/styles/global.css
+++ b/wwwroot/styles/global.css
@@ -59,6 +59,17 @@ a.nondecorated:hover {
color: #999;
}
+code {
+ background: #222;
+ border-radius: 10px;
+ box-sizing: border-box;
+ font-family: 'Lucida Console';
+ font-size: 8pt;
+ overflow-y: auto;
+ padding: 20px;
+ white-space: pre-line;
+}
+
button, input[type=submit] {
color: white;
background: var(--col-button-pri);