direction

This commit is contained in:
恍兮惚兮 2024-11-05 00:28:12 +08:00
parent b493017148
commit 050fca14a0
3 changed files with 57 additions and 29 deletions

View File

@ -8,12 +8,9 @@ from ctypes import (
c_size_t, c_size_t,
c_void_p, c_void_p,
c_int32, c_int32,
POINTER,
Structure, Structure,
pointer,
c_char_p, c_char_p,
c_wchar_p, c_wchar_p,
c_bool,
CFUNCTYPE, CFUNCTYPE,
) )
import os import os
@ -50,7 +47,7 @@ class ocrwrapper:
_OcrInit.argtypes = c_wchar_p, c_wchar_p, c_wchar_p, c_int32 _OcrInit.argtypes = c_wchar_p, c_wchar_p, c_wchar_p, c_int32
self.pOcrObj = _OcrInit(szDetModel, szRecModel, szKeyPath, nThreads) self.pOcrObj = _OcrInit(szDetModel, szRecModel, szKeyPath, nThreads)
def __OcrDetect(self, data: bytes, rotate: bool): def __OcrDetect(self, data: bytes, mode: int):
texts = [] texts = []
pss = [] pss = []
@ -64,21 +61,21 @@ class ocrwrapper:
c_void_p, c_void_p,
c_void_p, c_void_p,
c_size_t, c_size_t,
c_bool, c_int32,
c_void_p, c_void_p,
) )
_OcrDetect( _OcrDetect(
self.pOcrObj, self.pOcrObj,
data, data,
len(data), len(data),
rotate, mode,
CFUNCTYPE(None, ocrpoints, c_char_p)(cb), CFUNCTYPE(None, ocrpoints, c_char_p)(cb),
) )
return pss, texts return pss, texts
def ocr(self, data, rotate=False): def ocr(self, data, mode):
try: try:
return self.__OcrDetect(data, rotate) return self.__OcrDetect(data, mode)
except: except:
print_exc() print_exc()
return [], [] return [], []
@ -202,6 +199,6 @@ class OCR(baseocr):
pss, texts = self._ocr.ocr( pss, texts = self._ocr.ocr(
imagebinary, imagebinary,
globalconfig["verticalocr"] == 1, globalconfig["verticalocr"],
) )
return {"box": pss, "text": texts} return {"box": pss, "text": texts}

View File

@ -29,7 +29,7 @@ include(generate_product_version)
set(VERSION_MAJOR 5) set(VERSION_MAJOR 5)
set(VERSION_MINOR 55) set(VERSION_MINOR 55)
set(VERSION_PATCH 3) set(VERSION_PATCH 4)
add_library(pch pch.cpp) add_library(pch pch.cpp)
target_precompile_headers(pch PUBLIC pch.h) target_precompile_headers(pch PUBLIC pch.h)

View File

@ -6,6 +6,12 @@
typedef std::vector<cv::Point> TextBox; typedef std::vector<cv::Point> TextBox;
typedef std::string TextLine; typedef std::string TextLine;
typedef std::pair<TextBox, TextLine> TextBlock; typedef std::pair<TextBox, TextLine> TextBlock;
enum class Directional
{
H,
V,
Auto
};
struct ScaleParam struct ScaleParam
{ {
@ -115,12 +121,11 @@ private:
class DbNet : public CommonOnnxModel class DbNet : public CommonOnnxModel
{ {
public: public:
DbNet(const std::wstring &pathStr, int numOfThread): CommonOnnxModel(pathStr, {0.485 * 255, 0.456 * 255, 0.406 * 255}, {1.0 / 0.229 / 255.0, 1.0 / 0.224 / 255.0, 1.0 / 0.225 / 255.0}, numOfThread) DbNet(const std::wstring &pathStr, int numOfThread) : CommonOnnxModel(pathStr, {0.485 * 255, 0.456 * 255, 0.406 * 255}, {1.0 / 0.229 / 255.0, 1.0 / 0.224 / 255.0, 1.0 / 0.225 / 255.0}, numOfThread)
{ {
} }
std::vector<TextBox> getTextBoxes(cv::Mat &src, ScaleParam &s, float boxScoreThresh, std::vector<TextBox> getTextBoxes(cv::Mat &src, ScaleParam &s, float boxScoreThresh,
float boxThresh, float unClipRatio); float boxThresh, float unClipRatio);
}; };
// onnxruntime init windows // onnxruntime init windows
@ -568,7 +573,7 @@ public:
std::vector<TextBlock> detect(const void *binptr, size_t size, std::vector<TextBlock> detect(const void *binptr, size_t size,
int padding, int maxSideLen, int padding, int maxSideLen,
float boxScoreThresh, float boxThresh, float unClipRatio, bool rotate); float boxScoreThresh, float boxThresh, float unClipRatio, Directional);
private: private:
DbNet dbNet; DbNet dbNet;
@ -578,7 +583,8 @@ private:
std::vector<TextBlock> detect_internal(cv::Mat &src, cv::Rect &originRect, ScaleParam &scale, std::vector<TextBlock> detect_internal(cv::Mat &src, cv::Rect &originRect, ScaleParam &scale,
float boxScoreThresh = 0.6f, float boxThresh = 0.3f, float boxScoreThresh = 0.6f, float boxThresh = 0.3f,
float unClipRatio = 2.0f, bool rotate = true); float unClipRatio = 2.0f, Directional mode = Directional::H);
bool guess_V(const std::vector<TextBox> &);
}; };
cv::Mat makePadding(cv::Mat &src, const int padding) cv::Mat makePadding(cv::Mat &src, const int padding)
@ -593,7 +599,7 @@ cv::Mat makePadding(cv::Mat &src, const int padding)
std::vector<TextBlock> OcrLite::detect(const void *binptr, size_t size, std::vector<TextBlock> OcrLite::detect(const void *binptr, size_t size,
const int padding, const int maxSideLen, const int padding, const int maxSideLen,
float boxScoreThresh, float boxThresh, float unClipRatio, bool rotate) float boxScoreThresh, float boxThresh, float unClipRatio, Directional mode)
{ {
std::vector<uchar> bytes{(uchar *)binptr, (uchar *)binptr + size}; std::vector<uchar> bytes{(uchar *)binptr, (uchar *)binptr + size};
cv::Mat originSrc = imdecode(bytes, cv::IMREAD_COLOR); // default : BGR cv::Mat originSrc = imdecode(bytes, cv::IMREAD_COLOR); // default : BGR
@ -612,7 +618,7 @@ std::vector<TextBlock> OcrLite::detect(const void *binptr, size_t size,
cv::Mat paddingSrc = makePadding(originSrc, padding); cv::Mat paddingSrc = makePadding(originSrc, padding);
ScaleParam scale = getScaleParam(paddingSrc, resize); ScaleParam scale = getScaleParam(paddingSrc, resize);
return detect_internal(paddingSrc, paddingRect, scale, return detect_internal(paddingSrc, paddingRect, scale,
boxScoreThresh, boxThresh, unClipRatio, rotate); boxScoreThresh, boxThresh, unClipRatio, mode);
} }
std::vector<cv::Mat> OcrLite::getPartImages(cv::Mat &src, std::vector<TextBox> &textBoxes) std::vector<cv::Mat> OcrLite::getPartImages(cv::Mat &src, std::vector<TextBox> &textBoxes)
@ -626,32 +632,57 @@ std::vector<cv::Mat> OcrLite::getPartImages(cv::Mat &src, std::vector<TextBox> &
return partImages; return partImages;
} }
cv::Mat matRotateClockWise180(cv::Mat src) void matRotateClockWise180(cv::Mat& src)
{ {
flip(src, src, 0); flip(src, src, 0);
flip(src, src, 1); flip(src, src, 1);
return src;
} }
cv::Mat matRotateClockWise90(cv::Mat src) void matRotateClockWise90(cv::Mat& src)
{ {
transpose(src, src); transpose(src, src);
flip(src, src, 1); flip(src, src, 1);
return src;
} }
bool OcrLite::guess_V(const std::vector<TextBox> &textBoxes)
{
auto whs = 1.0f;
for (auto &box : textBoxes)
{
int minX = std::numeric_limits<int>::max();
int minY = std::numeric_limits<int>::max();
int maxX = std::numeric_limits<int>::min();
int maxY = std::numeric_limits<int>::min();
for (auto &point : box)
{
if (point.x < minX)
minX = point.x;
if (point.y < minY)
minY = point.y;
if (point.x > maxX)
maxX = point.x;
if (point.y > maxY)
maxY = point.y;
}
auto w = maxX - minX;
auto h = maxY - minY;
if (h == 0 || w == 0)
continue;
whs *= w / h;
}
return whs < 1;
}
std::vector<TextBlock> OcrLite::detect_internal(cv::Mat &src, cv::Rect &originRect, ScaleParam &scale, std::vector<TextBlock> OcrLite::detect_internal(cv::Mat &src, cv::Rect &originRect, ScaleParam &scale,
float boxScoreThresh, float boxThresh, float unClipRatio, bool rotate) float boxScoreThresh, float boxThresh, float unClipRatio, Directional mode)
{ {
std::vector<TextBox> textBoxes = dbNet.getTextBoxes(src, scale, boxScoreThresh, boxThresh, unClipRatio); std::vector<TextBox> textBoxes = dbNet.getTextBoxes(src, scale, boxScoreThresh, boxThresh, unClipRatio);
std::vector<cv::Mat> partImages = getPartImages(src, textBoxes); std::vector<cv::Mat> partImages = getPartImages(src, textBoxes);
for (size_t i = 0; i < partImages.size(); ++i) for (size_t i = 0; i < partImages.size(); ++i)
{ {
if (rotate) if (mode == Directional::V || (mode == Directional::Auto && guess_V(textBoxes)))
{ {
partImages.at(i) = matRotateClockWise180(partImages[i]); matRotateClockWise180(partImages[i]);
partImages.at(i) = matRotateClockWise90(partImages[i]); matRotateClockWise90(partImages[i]);
} }
} }
@ -699,14 +730,14 @@ DECLARE_API OcrLite *OcrInit(const wchar_t *szDetModel, const wchar_t *szRecMode
} }
} }
DECLARE_API void OcrDetect(OcrLite *pOcrObj, const void *binptr, size_t size, bool rotate, void (*cb)(ocrpoints, const char *)) DECLARE_API void OcrDetect(OcrLite *pOcrObj, const void *binptr, size_t size, Directional mode, void (*cb)(ocrpoints, const char *))
{ {
if (!pOcrObj) if (!pOcrObj)
return; return;
try try
{ {
auto result = pOcrObj->detect(binptr, size, 50, 1024, 0.1, 0.1, 2.0, rotate); auto result = pOcrObj->detect(binptr, size, 50, 1024, 0.1, 0.1, 2.0, mode);
for (auto item : result) for (auto item : result)
{ {