From 050fca14a0154284cd82615d0939e2fa702f9b74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=81=8D=E5=85=AE=E6=83=9A=E5=85=AE?= <1173718158@qq.com> Date: Tue, 5 Nov 2024 00:28:12 +0800 Subject: [PATCH] direction --- src/LunaTranslator/ocrengines/local.py | 15 +++--- src/plugins/CMakeLists.txt | 2 +- src/plugins/LunaOCR/OCR.cpp | 69 +++++++++++++++++++------- 3 files changed, 57 insertions(+), 29 deletions(-) diff --git a/src/LunaTranslator/ocrengines/local.py b/src/LunaTranslator/ocrengines/local.py index 68e14b22..f559003e 100644 --- a/src/LunaTranslator/ocrengines/local.py +++ b/src/LunaTranslator/ocrengines/local.py @@ -8,12 +8,9 @@ from ctypes import ( c_size_t, c_void_p, c_int32, - POINTER, Structure, - pointer, c_char_p, c_wchar_p, - c_bool, CFUNCTYPE, ) import os @@ -50,7 +47,7 @@ class ocrwrapper: _OcrInit.argtypes = c_wchar_p, c_wchar_p, c_wchar_p, c_int32 self.pOcrObj = _OcrInit(szDetModel, szRecModel, szKeyPath, nThreads) - def __OcrDetect(self, data: bytes, rotate: bool): + def __OcrDetect(self, data: bytes, mode: int): texts = [] pss = [] @@ -64,21 +61,21 @@ class ocrwrapper: c_void_p, c_void_p, c_size_t, - c_bool, + c_int32, c_void_p, ) _OcrDetect( self.pOcrObj, data, len(data), - rotate, + mode, CFUNCTYPE(None, ocrpoints, c_char_p)(cb), ) return pss, texts - def ocr(self, data, rotate=False): + def ocr(self, data, mode): try: - return self.__OcrDetect(data, rotate) + return self.__OcrDetect(data, mode) except: print_exc() return [], [] @@ -202,6 +199,6 @@ class OCR(baseocr): pss, texts = self._ocr.ocr( imagebinary, - globalconfig["verticalocr"] == 1, + globalconfig["verticalocr"], ) return {"box": pss, "text": texts} diff --git a/src/plugins/CMakeLists.txt b/src/plugins/CMakeLists.txt index 28a0bbe3..a92d8758 100644 --- a/src/plugins/CMakeLists.txt +++ b/src/plugins/CMakeLists.txt @@ -29,7 +29,7 @@ include(generate_product_version) set(VERSION_MAJOR 5) set(VERSION_MINOR 55) -set(VERSION_PATCH 3) +set(VERSION_PATCH 4) add_library(pch pch.cpp) target_precompile_headers(pch PUBLIC pch.h) diff --git a/src/plugins/LunaOCR/OCR.cpp b/src/plugins/LunaOCR/OCR.cpp index 031b5e56..e559f5c7 100644 --- a/src/plugins/LunaOCR/OCR.cpp +++ b/src/plugins/LunaOCR/OCR.cpp @@ -6,6 +6,12 @@ typedef std::vector TextBox; typedef std::string TextLine; typedef std::pair TextBlock; +enum class Directional +{ + H, + V, + Auto +}; struct ScaleParam { @@ -115,12 +121,11 @@ private: class DbNet : public CommonOnnxModel { public: - DbNet(const std::wstring &pathStr, int numOfThread): CommonOnnxModel(pathStr, {0.485 * 255, 0.456 * 255, 0.406 * 255}, {1.0 / 0.229 / 255.0, 1.0 / 0.224 / 255.0, 1.0 / 0.225 / 255.0}, numOfThread) -{ -} + DbNet(const std::wstring &pathStr, int numOfThread) : CommonOnnxModel(pathStr, {0.485 * 255, 0.456 * 255, 0.406 * 255}, {1.0 / 0.229 / 255.0, 1.0 / 0.224 / 255.0, 1.0 / 0.225 / 255.0}, numOfThread) + { + } std::vector getTextBoxes(cv::Mat &src, ScaleParam &s, float boxScoreThresh, float boxThresh, float unClipRatio); - }; // onnxruntime init windows @@ -568,7 +573,7 @@ public: std::vector detect(const void *binptr, size_t size, int padding, int maxSideLen, - float boxScoreThresh, float boxThresh, float unClipRatio, bool rotate); + float boxScoreThresh, float boxThresh, float unClipRatio, Directional); private: DbNet dbNet; @@ -578,7 +583,8 @@ private: std::vector detect_internal(cv::Mat &src, cv::Rect &originRect, ScaleParam &scale, float boxScoreThresh = 0.6f, float boxThresh = 0.3f, - float unClipRatio = 2.0f, bool rotate = true); + float unClipRatio = 2.0f, Directional mode = Directional::H); + bool guess_V(const std::vector &); }; cv::Mat makePadding(cv::Mat &src, const int padding) @@ -593,7 +599,7 @@ cv::Mat makePadding(cv::Mat &src, const int padding) std::vector OcrLite::detect(const void *binptr, size_t size, const int padding, const int maxSideLen, - float boxScoreThresh, float boxThresh, float unClipRatio, bool rotate) + float boxScoreThresh, float boxThresh, float unClipRatio, Directional mode) { std::vector bytes{(uchar *)binptr, (uchar *)binptr + size}; cv::Mat originSrc = imdecode(bytes, cv::IMREAD_COLOR); // default : BGR @@ -612,7 +618,7 @@ std::vector OcrLite::detect(const void *binptr, size_t size, cv::Mat paddingSrc = makePadding(originSrc, padding); ScaleParam scale = getScaleParam(paddingSrc, resize); return detect_internal(paddingSrc, paddingRect, scale, - boxScoreThresh, boxThresh, unClipRatio, rotate); + boxScoreThresh, boxThresh, unClipRatio, mode); } std::vector OcrLite::getPartImages(cv::Mat &src, std::vector &textBoxes) @@ -626,32 +632,57 @@ std::vector OcrLite::getPartImages(cv::Mat &src, std::vector & return partImages; } -cv::Mat matRotateClockWise180(cv::Mat src) +void matRotateClockWise180(cv::Mat& src) { flip(src, src, 0); flip(src, src, 1); - return src; } -cv::Mat matRotateClockWise90(cv::Mat src) +void matRotateClockWise90(cv::Mat& src) { transpose(src, src); flip(src, src, 1); - return src; } - +bool OcrLite::guess_V(const std::vector &textBoxes) +{ + auto whs = 1.0f; + for (auto &box : textBoxes) + { + int minX = std::numeric_limits::max(); + int minY = std::numeric_limits::max(); + int maxX = std::numeric_limits::min(); + int maxY = std::numeric_limits::min(); + for (auto &point : box) + { + if (point.x < minX) + minX = point.x; + if (point.y < minY) + minY = point.y; + if (point.x > maxX) + maxX = point.x; + if (point.y > maxY) + maxY = point.y; + } + auto w = maxX - minX; + auto h = maxY - minY; + if (h == 0 || w == 0) + continue; + whs *= w / h; + } + return whs < 1; +} std::vector OcrLite::detect_internal(cv::Mat &src, cv::Rect &originRect, ScaleParam &scale, - float boxScoreThresh, float boxThresh, float unClipRatio, bool rotate) + float boxScoreThresh, float boxThresh, float unClipRatio, Directional mode) { std::vector textBoxes = dbNet.getTextBoxes(src, scale, boxScoreThresh, boxThresh, unClipRatio); std::vector partImages = getPartImages(src, textBoxes); for (size_t i = 0; i < partImages.size(); ++i) { - if (rotate) + if (mode == Directional::V || (mode == Directional::Auto && guess_V(textBoxes))) { - partImages.at(i) = matRotateClockWise180(partImages[i]); - partImages.at(i) = matRotateClockWise90(partImages[i]); + matRotateClockWise180(partImages[i]); + matRotateClockWise90(partImages[i]); } } @@ -699,14 +730,14 @@ DECLARE_API OcrLite *OcrInit(const wchar_t *szDetModel, const wchar_t *szRecMode } } -DECLARE_API void OcrDetect(OcrLite *pOcrObj, const void *binptr, size_t size, bool rotate, void (*cb)(ocrpoints, const char *)) +DECLARE_API void OcrDetect(OcrLite *pOcrObj, const void *binptr, size_t size, Directional mode, void (*cb)(ocrpoints, const char *)) { if (!pOcrObj) return; try { - auto result = pOcrObj->detect(binptr, size, 50, 1024, 0.1, 0.1, 2.0, rotate); + auto result = pOcrObj->detect(binptr, size, 50, 1024, 0.1, 0.1, 2.0, mode); for (auto item : result) {