detect/detect.gui/Services/OCRService.cs

295 lines
13 KiB
C#
Raw Normal View History

2025-02-07 12:16:01 +08:00
using System;
using System.Collections.Generic;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text.Encodings.Web;
using System.Text.Json;
2025-04-21 13:08:40 +08:00
using System.Text.RegularExpressions;
2025-02-07 12:16:01 +08:00
using System.Text.Unicode;
using System.Threading.Tasks;
using detect.gui.Models;
using PaddleOCRSharp;
namespace detect.gui.Services;
public class OCRService
{
// private static OCRService? _instance;
//
// public static OCRService Instance()
// {
// return _instance ??= new OCRService();
// }
private readonly PaddleOCREngine? _engine;
public OCRService()
{
OCRModelConfig? config = null;
var parameter = new OCRParameter();
_engine = new PaddleOCREngine(config, parameter);
}
private ApiResponse<string?> ImportImageOCR(string base64ImageString)
{
// var fileName = DateTime.Now.ToString("yyyyMMddHHmmss");
// fileName = GetImage(base64ImageString, fileName);
var result = "";
// using (var ocrEngine = new TesseractEngine(@".", "chi_sim", EngineMode.Default))
// {
// using (var img = Pix.LoadFromFile(fileName))
// {
// using (var page = ocrEngine.Process(img))
// {
// var text = page.GetText().Replace("\n\n", "");
// var start = text.IndexOf('\n');
// if (start < 0) return new ApiResponse<string?>(0, "success", result);
// text = text.Substring(start).Replace("”", "").Replace("|", "");
// while (text.Contains(" "))
// {
// text = text.Replace(" ", " ");
// }
//
// var lines = text.Split('\n');
// var data = new List<dynamic>();
// foreach (var line in lines)
// {
// if (string.IsNullOrWhiteSpace(line)) continue;
// var items = line.Split(" ");
// try
// {
// data.Add(new
// {
// code = ReplaceChars(items[1]),
// type = ReplaceChars(items[2]),
// x = ReplaceChars(items[3]),
// y = ReplaceChars(items[4]),
// center = ReplaceChars(items[5]),
// w = ReplaceChars(items[2].Split("x")[0]),
// h = ReplaceChars(items[2].Split("x")[1]),
// });
// }
// catch
// {
// }
// }
//
// if (data.Count > 0)
// result = JsonSerializer.Serialize(data,
// new JsonSerializerOptions
// { WriteIndented = true, Encoder = JavaScriptEncoder.Create(UnicodeRanges.All) });
// }
// }
// }
return new ApiResponse<string?>(0, "success", result);
}
2025-04-23 15:55:51 +08:00
public Task<string> ImportByOCR(string base64ImageString)
2025-02-07 12:16:01 +08:00
{
return Task.Run(() =>
{
2025-04-23 15:55:51 +08:00
// var fileName = DateTime.Now.ToString("yyyyMMddHHmmss");
// fileName = GetImage(base64ImageString, fileName);
// var result = _engine?.DetectText(new Bitmap(fileName));
var image = GetImage(base64ImageString);
if (image is null) return "";
var result = _engine?.DetectText(image);
2025-02-07 12:16:01 +08:00
if (result == null) return "";
var resultList = JsonHelper.DeserializeObject<List<OCRResultEntity>>(result.JsonText);
resultList = resultList.Select((d, index) =>
{
d.Index = index;
return d;
}).ToList();
2025-04-23 15:55:51 +08:00
// var items = resultList.Where(d => !string.IsNullOrEmpty(d.Text) && d.Text.ToUpper().Contains("定位信息表")).ToList();
// if (items.Any() && !string.IsNullOrEmpty(items.First().Text))
// {
// wallCode = items.First().Text!.Replace("定位信息表", "").Replace("预埋件", "").Replace("墙", "");
// }
// var tempList = new List<OCRResultEntity>();
// if (!string.IsNullOrWhiteSpace(wallCode))
// {
// tempList = resultList.Where(d => !string.IsNullOrEmpty(d.Text) && d.Text.ToUpper().Contains(wallCode.ToUpper())).ToList();
// }
// else
// {
var tempList = resultList.Where(d => !string.IsNullOrEmpty(d.Text) &&
(d.Text.ToUpper().Contains("PA") ||
d.Text.ToUpper().Contains("PB") ||
d.Text.ToUpper().Contains("PC") ||
d.Text.ToUpper().Contains("PD") ||
d.Text.ToUpper().Contains("PE") ||
d.Text.ToUpper().Contains("PF") ||
d.Text.ToUpper().Contains("PG") ||
d.Text.ToUpper().Contains("PH") ||
d.Text.ToUpper().Contains("PI") ||
d.Text.ToUpper().Contains("PJ") ||
d.Text.ToUpper().Contains("PK") ||
d.Text.ToUpper().Contains("PL") ||
d.Text.ToUpper().Contains("PM") ||
d.Text.ToUpper().Contains("PN") ||
d.Text.ToUpper().Contains("PO") ||
d.Text.ToUpper().Contains("PP") ||
d.Text.ToUpper().Contains("PO") ||
d.Text.ToUpper().Contains("PR") ||
d.Text.ToUpper().Contains("PS") ||
d.Text.ToUpper().Contains("PT") ||
d.Text.ToUpper().Contains("PU") ||
d.Text.ToUpper().Contains("PV") ||
d.Text.ToUpper().Contains("PW") ||
d.Text.ToUpper().Contains("PX") ||
d.Text.ToUpper().Contains("PY") ||
d.Text.ToUpper().Contains("PZ") ||
d.Text.ToUpper().Contains("VB") ||
d.Text.ToUpper().Contains("BS")))
.ToList();
// }
2025-02-07 12:16:01 +08:00
var data = new List<dynamic>();
foreach (var item in tempList)
{
try
{
2025-03-05 11:41:37 +08:00
var w = "0";
var h = "0";
var type = ReplaceChars(resultList[item.Index + 1].Text);
if (string.IsNullOrWhiteSpace(type)) continue;
2025-04-21 13:08:40 +08:00
// if (type.Contains('P', StringComparison.CurrentCultureIgnoreCase) && type.Contains('L', StringComparison.CurrentCultureIgnoreCase))
// {
type = Regex.Replace(type, "[A-Z]", string.Empty);
if (type.Contains('-', StringComparison.CurrentCultureIgnoreCase))
2025-03-05 11:41:37 +08:00
{
2025-04-21 13:08:40 +08:00
type = type.Split("-")[0];
if (type.Contains('x', StringComparison.CurrentCultureIgnoreCase))
2025-03-05 11:41:37 +08:00
{
2025-04-23 11:00:11 +08:00
w = type.Split("x")[0];
h = type.Split("x")[1];
2025-04-21 13:08:40 +08:00
}
else
{
w = type;
h = type;
}
2025-03-05 11:41:37 +08:00
}
2025-04-23 15:55:51 +08:00
else if (type.Contains('x', StringComparison.CurrentCultureIgnoreCase))
{
w = type.Split("x")[0];
h = type.Split("x")[1];
}
2025-03-05 11:41:37 +08:00
else
2025-04-21 13:08:40 +08:00
continue;
2025-04-23 15:55:51 +08:00
2025-04-21 13:08:40 +08:00
// }
// else
// {
// w = type.Split("x")[1];
// h = type.Split("x")[0];
// }
2025-02-07 12:16:01 +08:00
data.Add(new
{
2025-04-23 15:55:51 +08:00
sn = ReplaceChars(resultList[item.Index - 1].Text),
2025-02-14 14:09:10 +08:00
code = ReplaceChars(resultList[item.Index].Text),
type = ReplaceChars(resultList[item.Index + 1].Text),
2025-02-07 12:16:01 +08:00
x = ReplaceChars(resultList[item.Index + 2].Text),
y = ReplaceChars(resultList[item.Index + 3].Text),
center = ReplaceChars(resultList[item.Index + 4].Text),
2025-03-05 11:41:37 +08:00
w = w,
h = h,
// w = ReplaceChars(resultList[item.Index + 1].Text).Split("x")[1],
// h = ReplaceChars(resultList[item.Index + 1].Text).Split("x")[0],
2025-02-07 12:16:01 +08:00
angle = ReplaceChars(resultList[item.Index + 5].Text),
});
}
catch
{
2025-04-23 15:55:51 +08:00
// data.Add(new
// {
// sn = "",
// code = "",
// type = "",
// x = "0",
// y = "0",
// center = "0",
// w = "0",
// h = "0",
// angle = "0",
// });
2025-02-07 12:16:01 +08:00
}
}
2025-04-23 15:55:51 +08:00
2025-02-07 12:16:01 +08:00
return data.Count > 0
? JsonSerializer.Serialize(data,
new JsonSerializerOptions
{ WriteIndented = true, Encoder = JavaScriptEncoder.Create(UnicodeRanges.All) })
: "";
});
}
private string ReplaceChars(string? text)
{
2025-02-14 14:09:10 +08:00
return string.IsNullOrEmpty(text) ? "" : text.Replace("\u00d7", "x").Replace("_", "").Replace(",", "").Replace(":", "").Replace(";", "").Replace(" ", "").Replace("", "");
2025-02-07 12:16:01 +08:00
}
2025-04-23 15:55:51 +08:00
private Image? GetImage(string base64ImageString)
2025-02-07 12:16:01 +08:00
{
var imageType = System.Drawing.Imaging.ImageFormat.Gif;
if (base64ImageString.IndexOf("data:image/jpeg", StringComparison.Ordinal) >= 0)
{
imageType = System.Drawing.Imaging.ImageFormat.Jpeg;
base64ImageString = base64ImageString.Replace("data:image/jpeg;base64,", "");
2025-04-23 15:55:51 +08:00
// fileName += ".jpg";
2025-02-07 12:16:01 +08:00
}
if (base64ImageString.IndexOf("data:image/png", StringComparison.Ordinal) >= 0)
{
imageType = System.Drawing.Imaging.ImageFormat.Png;
base64ImageString = base64ImageString.Replace("data:image/png;base64,", "");
2025-04-23 15:55:51 +08:00
// fileName += ".png";
2025-02-07 12:16:01 +08:00
}
if (base64ImageString.IndexOf("data:image/bmp", StringComparison.Ordinal) >= 0)
{
imageType = System.Drawing.Imaging.ImageFormat.Bmp;
base64ImageString = base64ImageString.Replace("data:image/bmp;base64,", "");
2025-04-23 15:55:51 +08:00
// fileName += ".bmp";
2025-02-07 12:16:01 +08:00
}
if (Equals(imageType, System.Drawing.Imaging.ImageFormat.Gif))
{
2025-04-23 15:55:51 +08:00
return null;
2025-02-07 12:16:01 +08:00
}
try
{
var imageBytes = Convert.FromBase64String(base64ImageString.Replace("data:image/jpeg;base64,", ""));
using var ms = new MemoryStream(imageBytes);
var image = System.Drawing.Image.FromStream(ms);
2025-04-23 15:55:51 +08:00
using var bmpStream = new MemoryStream();
image.Save(bmpStream, imageType);
// var bmpBytes = bmpStream.ToArray();
// File.WriteAllBytes(fileName, bmpBytes);
2025-02-07 12:16:01 +08:00
2025-04-23 15:55:51 +08:00
return image;
// image.Dispose();
// return fileName;
2025-02-07 12:16:01 +08:00
}
catch
{
2025-04-23 15:55:51 +08:00
return null;
2025-02-07 12:16:01 +08:00
}
}
public class OCRResultEntity
{
public int Index { get; set; }
public List<BoxPoint>? BoxPoints { get; set; }
public double Score { get; set; }
public string? Text { get; set; }
public int cls_label { get; set; }
public double cls_score { get; set; }
}
public class BoxPoint
{
public int X { get; set; }
public int Y { get; set; }
}
}