758 lines
25 KiB
C++
Raw Permalink Normal View History

2024-11-04 23:10:41 +08:00
#include <onnxruntime/core/session/onnxruntime_cxx_api.h>
#include <opencv2/opencv.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/imgcodecs.hpp>
#include <clipper2/clipper.h>
typedef std::vector<cv::Point> TextBox;
typedef std::string TextLine;
typedef std::pair<TextBox, TextLine> TextBlock;
2024-11-15 04:03:18 +08:00
enum class Directional
{
H,
V,
Auto
};
2024-11-05 00:03:53 +08:00
struct ScaleParam
{
2024-11-04 23:10:41 +08:00
int srcWidth;
int srcHeight;
int dstWidth;
int dstHeight;
float ratioWidth;
float ratioHeight;
};
class CommonOnnxModel
{
std::vector<Ort::AllocatedStringPtr> inputNamesPtr;
std::vector<Ort::AllocatedStringPtr> outputNamesPtr;
std::unique_ptr<Ort::Session> session;
Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR);
Ort::SessionOptions sessionOptions = Ort::SessionOptions();
const std::array<float, 3> meanValues;
const std::array<float, 3> normValues;
std::vector<float> substractMeanNormalize(cv::Mat &src, const float *meanVals, const float *normVals)
{
auto inputTensorSize = src.cols * src.rows * src.channels();
std::vector<float> inputTensorValues(inputTensorSize);
size_t numChannels = src.channels();
size_t imageSize = src.cols * src.rows;
for (size_t pid = 0; pid < imageSize; pid++)
{
for (size_t ch = 0; ch < numChannels; ++ch)
{
float data = (float)(src.data[pid * numChannels + ch] * normVals[ch] - meanVals[ch] * normVals[ch]);
inputTensorValues[ch * imageSize + pid] = data;
}
}
return inputTensorValues;
}
void setNumThread(int numOfThread)
{
sessionOptions.SetInterOpNumThreads(numOfThread);
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
}
2024-11-05 00:03:53 +08:00
template <typename T, typename Func, typename Func2>
void getinputoutputNames(T &vec, Func func, Func2 func2)
{
Ort::AllocatorWithDefaultOptions allocator;
const size_t numInputNodes = ((*session.get()).*func)();
vec.reserve(numInputNodes);
std::vector<int64_t> input_node_dims;
for (size_t i = 0; i < numInputNodes; i++)
{
auto inputName = ((*session.get()).*func2)(i, allocator);
vec.push_back(std::move(inputName));
}
}
2024-11-04 23:10:41 +08:00
public:
std::pair<std::vector<float>, std::vector<int64_t>> RunSession(cv::Mat src)
{
auto inputTensorValues = substractMeanNormalize(src, meanValues.data(), normValues.data());
std::array<int64_t, 4> inputShape{1, src.channels(), src.rows, src.cols};
auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(),
inputTensorValues.size(), inputShape.data(),
inputShape.size());
assert(inputTensor.IsTensor());
std::vector<const char *> inputNames = {inputNamesPtr.data()->get()};
std::vector<const char *> outputNames = {outputNamesPtr.data()->get()};
auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor,
inputNames.size(), outputNames.data(), outputNames.size());
assert(outputTensor.size() == 1 && outputTensor.front().IsTensor());
std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
auto outputCount = outputTensor.front().GetTensorTypeAndShapeInfo().GetElementCount();
float *floatArray = outputTensor.front().GetTensorMutableData<float>();
std::vector<float> outputData(floatArray, floatArray + outputCount);
return {outputData, outputShape};
}
CommonOnnxModel(const std::wstring &path, const std::array<float, 3> &_meanValues, const std::array<float, 3> &_normValues, int numOfThread = 4) : meanValues(_meanValues), normValues(_normValues)
{
setNumThread(numOfThread);
session = std::make_unique<Ort::Session>(env, path.c_str(), sessionOptions);
2024-11-05 00:03:53 +08:00
getinputoutputNames(inputNamesPtr, &Ort::Session::GetInputCount, &Ort::Session::GetInputNameAllocated);
getinputoutputNames(outputNamesPtr, &Ort::Session::GetOutputCount, &Ort::Session::GetOutputNameAllocated);
2024-11-04 23:10:41 +08:00
}
};
2024-11-05 00:03:53 +08:00
class CrnnNet : public CommonOnnxModel
{
2024-11-04 23:10:41 +08:00
public:
2024-11-05 00:03:53 +08:00
CrnnNet(const std::wstring &pathStr, const std::wstring &keysPath, int numOfThread);
2024-11-04 23:10:41 +08:00
std::vector<TextLine> getTextLines(std::vector<cv::Mat> &partImg);
2024-11-05 00:03:53 +08:00
private:
2024-11-04 23:10:41 +08:00
const int dstHeight = 48;
std::vector<std::string> keys;
TextLine scoreToTextLine(const std::vector<float> &outputData, size_t h, size_t w);
TextLine getTextLine(const cv::Mat &src);
};
2024-11-05 00:03:53 +08:00
class DbNet : public CommonOnnxModel
{
public:
2024-11-05 00:28:12 +08:00
DbNet(const std::wstring &pathStr, int numOfThread) : CommonOnnxModel(pathStr, {0.485 * 255, 0.456 * 255, 0.406 * 255}, {1.0 / 0.229 / 255.0, 1.0 / 0.224 / 255.0, 1.0 / 0.225 / 255.0}, numOfThread)
{
}
2024-11-04 23:10:41 +08:00
std::vector<TextBox> getTextBoxes(cv::Mat &src, ScaleParam &s, float boxScoreThresh,
float boxThresh, float unClipRatio);
};
2024-11-05 00:03:53 +08:00
// onnxruntime init windows
ScaleParam getScaleParam(cv::Mat &src, const float scale)
{
2024-11-04 23:10:41 +08:00
int srcWidth = src.cols;
int srcHeight = src.rows;
2024-11-05 00:03:53 +08:00
int dstWidth = int((float)srcWidth * scale);
int dstHeight = int((float)srcHeight * scale);
if (dstWidth % 32 != 0)
{
2024-11-04 23:10:41 +08:00
dstWidth = (dstWidth / 32 - 1) * 32;
dstWidth = (std::max)(dstWidth, 32);
}
2024-11-05 00:03:53 +08:00
if (dstHeight % 32 != 0)
{
2024-11-04 23:10:41 +08:00
dstHeight = (dstHeight / 32 - 1) * 32;
dstHeight = (std::max)(dstHeight, 32);
}
2024-11-05 00:03:53 +08:00
float scaleWidth = (float)dstWidth / (float)srcWidth;
float scaleHeight = (float)dstHeight / (float)srcHeight;
2024-11-04 23:10:41 +08:00
return {srcWidth, srcHeight, dstWidth, dstHeight, scaleWidth, scaleHeight};
}
2024-11-05 00:03:53 +08:00
ScaleParam getScaleParam(cv::Mat &src, const int targetSize)
{
2024-11-04 23:10:41 +08:00
int srcWidth, srcHeight, dstWidth, dstHeight;
srcWidth = dstWidth = src.cols;
srcHeight = dstHeight = src.rows;
float ratio = 1.f;
2024-11-05 00:03:53 +08:00
if (srcWidth > srcHeight)
{
2024-11-04 23:10:41 +08:00
ratio = float(targetSize) / float(srcWidth);
2024-11-05 00:03:53 +08:00
}
else
{
2024-11-04 23:10:41 +08:00
ratio = float(targetSize) / float(srcHeight);
}
dstWidth = int(float(srcWidth) * ratio);
dstHeight = int(float(srcHeight) * ratio);
2024-11-05 00:03:53 +08:00
if (dstWidth % 32 != 0)
{
2024-11-04 23:10:41 +08:00
dstWidth = (dstWidth / 32) * 32;
dstWidth = (std::max)(dstWidth, 32);
}
2024-11-05 00:03:53 +08:00
if (dstHeight % 32 != 0)
{
2024-11-04 23:10:41 +08:00
dstHeight = (dstHeight / 32) * 32;
dstHeight = (std::max)(dstHeight, 32);
}
2024-11-05 00:03:53 +08:00
float ratioWidth = (float)dstWidth / (float)srcWidth;
float ratioHeight = (float)dstHeight / (float)srcHeight;
2024-11-04 23:10:41 +08:00
return {srcWidth, srcHeight, dstWidth, dstHeight, ratioWidth, ratioHeight};
}
2024-11-05 00:03:53 +08:00
std::vector<cv::Point2f> getBox(const cv::RotatedRect &rect)
{
2024-11-04 23:10:41 +08:00
cv::Point2f vertices[4];
rect.points(vertices);
2024-11-05 00:03:53 +08:00
// std::vector<cv::Point2f> ret(4);
2024-11-04 23:10:41 +08:00
std::vector<cv::Point2f> ret2(vertices, vertices + sizeof(vertices) / sizeof(vertices[0]));
2024-11-05 00:03:53 +08:00
// memcpy(vertices, &ret[0], ret.size() * sizeof(ret[0]));
2024-11-04 23:10:41 +08:00
return ret2;
}
2024-11-05 00:03:53 +08:00
cv::Mat getRotateCropImage(const cv::Mat &src, std::vector<cv::Point> box)
{
2024-11-04 23:10:41 +08:00
cv::Mat image;
src.copyTo(image);
std::vector<cv::Point> points = box;
int collectX[4] = {box[0].x, box[1].x, box[2].x, box[3].x};
int collectY[4] = {box[0].y, box[1].y, box[2].y, box[3].y};
int left = int(*std::min_element(collectX, collectX + 4));
int right = int(*std::max_element(collectX, collectX + 4));
int top = int(*std::min_element(collectY, collectY + 4));
int bottom = int(*std::max_element(collectY, collectY + 4));
cv::Mat imgCrop;
image(cv::Rect(left, top, right - left, bottom - top)).copyTo(imgCrop);
2024-11-05 00:03:53 +08:00
for (auto &point : points)
{
2024-11-04 23:10:41 +08:00
point.x -= left;
point.y -= top;
}
int imgCropWidth = int(sqrt(pow(points[0].x - points[1].x, 2) +
pow(points[0].y - points[1].y, 2)));
int imgCropHeight = int(sqrt(pow(points[0].x - points[3].x, 2) +
pow(points[0].y - points[3].y, 2)));
cv::Point2f ptsDst[4];
ptsDst[0] = cv::Point2f(0., 0.);
ptsDst[1] = cv::Point2f(imgCropWidth, 0.);
ptsDst[2] = cv::Point2f(imgCropWidth, imgCropHeight);
ptsDst[3] = cv::Point2f(0.f, imgCropHeight);
cv::Point2f ptsSrc[4];
ptsSrc[0] = cv::Point2f(points[0].x, points[0].y);
ptsSrc[1] = cv::Point2f(points[1].x, points[1].y);
ptsSrc[2] = cv::Point2f(points[2].x, points[2].y);
ptsSrc[3] = cv::Point2f(points[3].x, points[3].y);
cv::Mat M = cv::getPerspectiveTransform(ptsSrc, ptsDst);
cv::Mat partImg;
cv::warpPerspective(imgCrop, partImg, M,
cv::Size(imgCropWidth, imgCropHeight),
cv::BORDER_REPLICATE);
// if (float(partImg.rows) >= float(partImg.cols) * 1.5) {
// cv::Mat srcCopy = cv::Mat(partImg.rows, partImg.cols, partImg.depth());
// cv::transpose(partImg, srcCopy);
// cv::flip(srcCopy, srcCopy, 0);
// return srcCopy;
// } else {
// return partImg;
// }
return partImg;
}
2024-11-05 00:03:53 +08:00
bool cvPointCompare(const cv::Point &a, const cv::Point &b)
{
2024-11-04 23:10:41 +08:00
return a.x < b.x;
}
2024-11-05 00:03:53 +08:00
std::vector<cv::Point2f> getMinBoxes(const cv::RotatedRect &boxRect, float &maxSideLen)
{
2024-11-04 23:10:41 +08:00
maxSideLen = std::max(boxRect.size.width, boxRect.size.height);
std::vector<cv::Point2f> boxPoint = getBox(boxRect);
std::sort(boxPoint.begin(), boxPoint.end(), cvPointCompare);
int index1, index2, index3, index4;
2024-11-05 00:03:53 +08:00
if (boxPoint[1].y > boxPoint[0].y)
{
2024-11-04 23:10:41 +08:00
index1 = 0;
index4 = 1;
2024-11-05 00:03:53 +08:00
}
else
{
2024-11-04 23:10:41 +08:00
index1 = 1;
index4 = 0;
}
2024-11-05 00:03:53 +08:00
if (boxPoint[3].y > boxPoint[2].y)
{
2024-11-04 23:10:41 +08:00
index2 = 2;
index3 = 3;
2024-11-05 00:03:53 +08:00
}
else
{
2024-11-04 23:10:41 +08:00
index2 = 3;
index3 = 2;
}
std::vector<cv::Point2f> minBox(4);
minBox[0] = boxPoint[index1];
minBox[1] = boxPoint[index2];
minBox[2] = boxPoint[index3];
minBox[3] = boxPoint[index4];
return minBox;
}
template <class T>
inline T clamp(T x, T min, T max)
{
if (x > max)
return max;
if (x < min)
return min;
return x;
}
2024-11-05 00:03:53 +08:00
float boxScoreFast(const std::vector<cv::Point2f> &boxes, const cv::Mat &pred)
{
2024-11-04 23:10:41 +08:00
int width = pred.cols;
int height = pred.rows;
float arrayX[4] = {boxes[0].x, boxes[1].x, boxes[2].x, boxes[3].x};
float arrayY[4] = {boxes[0].y, boxes[1].y, boxes[2].y, boxes[3].y};
int minX = clamp(int(std::floor(*(std::min_element(arrayX, arrayX + 4)))), 0, width - 1);
int maxX = clamp(int(std::ceil(*(std::max_element(arrayX, arrayX + 4)))), 0, width - 1);
int minY = clamp(int(std::floor(*(std::min_element(arrayY, arrayY + 4)))), 0, height - 1);
int maxY = clamp(int(std::ceil(*(std::max_element(arrayY, arrayY + 4)))), 0, height - 1);
cv::Mat mask = cv::Mat::zeros(maxY - minY + 1, maxX - minX + 1, CV_8UC1);
cv::Point box[4];
box[0] = cv::Point(int(boxes[0].x) - minX, int(boxes[0].y) - minY);
box[1] = cv::Point(int(boxes[1].x) - minX, int(boxes[1].y) - minY);
box[2] = cv::Point(int(boxes[2].x) - minX, int(boxes[2].y) - minY);
box[3] = cv::Point(int(boxes[3].x) - minX, int(boxes[3].y) - minY);
const cv::Point *pts[1] = {box};
int npts[] = {4};
cv::fillPoly(mask, pts, npts, 1, cv::Scalar(1));
cv::Mat croppedImg;
pred(cv::Rect(minX, minY, maxX - minX + 1, maxY - minY + 1))
2024-11-05 00:03:53 +08:00
.copyTo(croppedImg);
2024-11-04 23:10:41 +08:00
2024-11-05 00:03:53 +08:00
auto score = (float)cv::mean(croppedImg, mask)[0];
2024-11-04 23:10:41 +08:00
return score;
}
2024-11-05 00:03:53 +08:00
float getContourArea(const std::vector<cv::Point2f> &box, float unClipRatio)
{
2024-11-04 23:10:41 +08:00
size_t size = box.size();
float area = 0.0f;
float dist = 0.0f;
2024-11-05 00:03:53 +08:00
for (size_t i = 0; i < size; i++)
{
2024-11-04 23:10:41 +08:00
area += box[i].x * box[(i + 1) % size].y -
box[i].y * box[(i + 1) % size].x;
dist += sqrtf((box[i].x - box[(i + 1) % size].x) *
2024-11-05 00:03:53 +08:00
(box[i].x - box[(i + 1) % size].x) +
2024-11-04 23:10:41 +08:00
(box[i].y - box[(i + 1) % size].y) *
2024-11-05 00:03:53 +08:00
(box[i].y - box[(i + 1) % size].y));
2024-11-04 23:10:41 +08:00
}
area = fabs(float(area / 2.0));
return area * unClipRatio / dist;
}
2024-11-05 00:03:53 +08:00
cv::RotatedRect unClip(std::vector<cv::Point2f> box, float unClipRatio)
{
2024-11-04 23:10:41 +08:00
float distance = getContourArea(box, unClipRatio);
Clipper2Lib::ClipperOffset offset;
Clipper2Lib::Path64 p;
p.push_back(Clipper2Lib::Point64(int(box[0].x), int(box[0].y)));
p.push_back(Clipper2Lib::Point64(int(box[1].x), int(box[1].y)));
p.push_back(Clipper2Lib::Point64(int(box[2].x), int(box[2].y)));
p.push_back(Clipper2Lib::Point64(int(box[3].x), int(box[3].y)));
offset.AddPath(p, Clipper2Lib::JoinType::Round, Clipper2Lib::EndType::Polygon);
Clipper2Lib::Paths64 soln;
offset.Execute(distance, soln);
std::vector<cv::Point2f> points;
2024-11-05 00:03:53 +08:00
for (size_t j = 0; j < soln.size(); j++)
{
for (size_t i = 0; i < soln[soln.size() - 1].size(); i++)
{
2024-11-04 23:10:41 +08:00
points.emplace_back(cv::Point2f{float(soln[j][i].x), float(soln[j][i].y)});
}
}
cv::RotatedRect res;
2024-11-05 00:03:53 +08:00
if (points.empty())
{
2024-11-04 23:10:41 +08:00
res = cv::RotatedRect(cv::Point2f(0, 0), cv::Size2f(1, 1), 0);
2024-11-05 00:03:53 +08:00
}
else
{
2024-11-04 23:10:41 +08:00
res = cv::minAreaRect(points);
}
return res;
}
CrnnNet::CrnnNet(const std::wstring &pathStr, const std::wstring &keysPath, int numOfThread) : CommonOnnxModel(pathStr, {127.5, 127.5, 127.5}, {1.0 / 127.5, 1.0 / 127.5, 1.0 / 127.5}, numOfThread)
{
// load keys
std::ifstream in(keysPath.c_str());
std::string line;
if (in)
{
while (getline(in, line))
{ // line中不包括每行的换行符
keys.push_back(line);
}
}
else
{
return;
}
keys.insert(keys.begin(), "#");
keys.emplace_back(" ");
}
template <class ForwardIterator>
inline static size_t argmax(ForwardIterator first, ForwardIterator last)
{
return std::distance(first, std::max_element(first, last));
}
TextLine CrnnNet::scoreToTextLine(const std::vector<float> &outputData, size_t h, size_t w)
{
auto keySize = keys.size();
auto dataSize = outputData.size();
std::string strRes;
std::vector<float> scores;
size_t lastIndex = 0;
size_t maxIndex;
float maxValue;
for (size_t i = 0; i < h; i++)
{
size_t start = i * w;
size_t stop = (i + 1) * w;
if (stop > dataSize - 1)
{
stop = (i + 1) * w - 1;
}
maxIndex = int(argmax(&outputData[start], &outputData[stop]));
maxValue = float(*std::max_element(&outputData[start], &outputData[stop]));
if (maxIndex > 0 && maxIndex < keySize && (!(i > 0 && maxIndex == lastIndex)))
{
scores.emplace_back(maxValue);
strRes.append(keys[maxIndex]);
}
lastIndex = maxIndex;
}
return strRes;
}
TextLine CrnnNet::getTextLine(const cv::Mat &src)
{
float scale = (float)dstHeight / (float)src.rows;
int dstWidth = int((float)src.cols * scale);
cv::Mat srcResize;
resize(src, srcResize, cv::Size(dstWidth, dstHeight));
auto &&[outputData, outputShape] = RunSession(srcResize);
return scoreToTextLine(outputData, outputShape[1], outputShape[2]);
}
std::vector<TextLine> CrnnNet::getTextLines(std::vector<cv::Mat> &partImg)
{
int size = partImg.size();
std::vector<TextLine> textLines(size);
for (int i = 0; i < size; ++i)
{
TextLine textLine = getTextLine(partImg[i]);
textLines[i] = textLine;
}
return textLines;
}
std::vector<TextBox> findRsBoxes(const cv::Mat &predMat, const cv::Mat &dilateMat, ScaleParam &s,
const float boxScoreThresh, const float unClipRatio)
{
const int longSideThresh = 3; // minBox 长边门限
const int maxCandidates = 1000;
std::vector<std::vector<cv::Point>> contours;
std::vector<cv::Vec4i> hierarchy;
cv::findContours(dilateMat, contours, hierarchy, cv::RETR_LIST,
cv::CHAIN_APPROX_SIMPLE);
size_t numContours = contours.size() >= maxCandidates ? maxCandidates : contours.size();
std::vector<TextBox> rsBoxes;
for (size_t i = 0; i < numContours; i++)
{
if (contours[i].size() <= 2)
{
continue;
}
cv::RotatedRect minAreaRect = cv::minAreaRect(contours[i]);
float longSide;
std::vector<cv::Point2f> minBoxes = getMinBoxes(minAreaRect, longSide);
if (longSide < longSideThresh)
{
continue;
}
float boxScore = boxScoreFast(minBoxes, predMat);
if (boxScore < boxScoreThresh)
continue;
//-----unClip-----
cv::RotatedRect clipRect = unClip(minBoxes, unClipRatio);
if (clipRect.size.height < 1.001 && clipRect.size.width < 1.001)
{
continue;
}
//-----unClip-----
std::vector<cv::Point2f> clipMinBoxes = getMinBoxes(clipRect, longSide);
if (longSide < longSideThresh + 2)
continue;
std::vector<cv::Point> intClipMinBoxes;
for (auto &clipMinBox : clipMinBoxes)
{
float x = clipMinBox.x / s.ratioWidth;
float y = clipMinBox.y / s.ratioHeight;
int ptX = (std::min)((std::max)(int(x), 0), s.srcWidth - 1);
int ptY = (std::min)((std::max)(int(y), 0), s.srcHeight - 1);
cv::Point point{ptX, ptY};
intClipMinBoxes.push_back(point);
}
rsBoxes.push_back(intClipMinBoxes);
}
reverse(rsBoxes.begin(), rsBoxes.end());
return rsBoxes;
}
std::vector<TextBox> DbNet::getTextBoxes(cv::Mat &src, ScaleParam &s, float boxScoreThresh, float boxThresh, float unClipRatio)
{
cv::Mat srcResize;
resize(src, srcResize, cv::Size(s.dstWidth, s.dstHeight));
auto &&[outputData, outputShape] = RunSession(srcResize);
//-----Data preparation-----
int outHeight = (int)outputShape[2];
int outWidth = (int)outputShape[3];
size_t area = outHeight * outWidth;
std::vector<float> predData(area, 0.0);
std::vector<unsigned char> cbufData(area, ' ');
for (int i = 0; i < area; i++)
{
predData[i] = float(outputData[i]);
cbufData[i] = (unsigned char)((outputData[i]) * 255);
}
cv::Mat predMat(outHeight, outWidth, CV_32F, (float *)predData.data());
cv::Mat cBufMat(outHeight, outWidth, CV_8UC1, (unsigned char *)cbufData.data());
//-----boxThresh-----
const double maxValue = 255;
const double threshold = boxThresh * 255;
cv::Mat thresholdMat;
cv::threshold(cBufMat, thresholdMat, threshold, maxValue, cv::THRESH_BINARY);
//-----dilate-----
cv::Mat dilateMat;
cv::Mat dilateElement = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
cv::dilate(thresholdMat, dilateMat, dilateElement);
return findRsBoxes(predMat, dilateMat, s, boxScoreThresh, unClipRatio);
}
class OcrLite
{
public:
OcrLite(const std::wstring &detPath,
const std::wstring &recPath, const std::wstring &keysPath, int numOfThread) : crnnNet(recPath, keysPath, numOfThread), dbNet(detPath, numOfThread)
{
}
std::vector<TextBlock> detect(const void *binptr, size_t size,
int padding, int maxSideLen,
2024-11-05 00:28:12 +08:00
float boxScoreThresh, float boxThresh, float unClipRatio, Directional);
2024-11-04 23:10:41 +08:00
private:
DbNet dbNet;
CrnnNet crnnNet;
std::vector<cv::Mat> getPartImages(cv::Mat &src, std::vector<TextBox> &textBoxes);
std::vector<TextBlock> detect_internal(cv::Mat &src, cv::Rect &originRect, ScaleParam &scale,
float boxScoreThresh = 0.6f, float boxThresh = 0.3f,
2024-11-05 00:28:12 +08:00
float unClipRatio = 2.0f, Directional mode = Directional::H);
bool guess_V(const std::vector<TextBox> &);
2024-11-04 23:10:41 +08:00
};
cv::Mat makePadding(cv::Mat &src, const int padding)
{
if (padding <= 0)
return src;
cv::Scalar paddingScalar = {255, 255, 255};
cv::Mat paddingSrc;
cv::copyMakeBorder(src, paddingSrc, padding, padding, padding, padding, cv::BORDER_ISOLATED, paddingScalar);
return paddingSrc;
}
std::vector<TextBlock> OcrLite::detect(const void *binptr, size_t size,
const int padding, const int maxSideLen,
2024-11-05 00:28:12 +08:00
float boxScoreThresh, float boxThresh, float unClipRatio, Directional mode)
2024-11-04 23:10:41 +08:00
{
std::vector<uchar> bytes{(uchar *)binptr, (uchar *)binptr + size};
cv::Mat originSrc = imdecode(bytes, cv::IMREAD_COLOR); // default : BGR
int originMaxSide = (std::max)(originSrc.cols, originSrc.rows);
int resize;
if (maxSideLen <= 0 || maxSideLen > originMaxSide)
{
resize = originMaxSide;
}
else
{
resize = maxSideLen;
}
resize += 2 * padding;
cv::Rect paddingRect(padding, padding, originSrc.cols, originSrc.rows);
cv::Mat paddingSrc = makePadding(originSrc, padding);
ScaleParam scale = getScaleParam(paddingSrc, resize);
return detect_internal(paddingSrc, paddingRect, scale,
2024-11-05 00:28:12 +08:00
boxScoreThresh, boxThresh, unClipRatio, mode);
2024-11-04 23:10:41 +08:00
}
std::vector<cv::Mat> OcrLite::getPartImages(cv::Mat &src, std::vector<TextBox> &textBoxes)
{
std::vector<cv::Mat> partImages;
for (size_t i = 0; i < textBoxes.size(); ++i)
{
cv::Mat partImg = getRotateCropImage(src, textBoxes[i]);
partImages.emplace_back(partImg);
}
return partImages;
}
2024-11-15 03:44:19 +08:00
void matRotateClockWise180(cv::Mat &src)
2024-11-05 00:03:53 +08:00
{
2024-11-04 23:10:41 +08:00
flip(src, src, 0);
flip(src, src, 1);
}
2024-11-15 03:44:19 +08:00
void matRotateClockWise90(cv::Mat &src)
2024-11-05 00:03:53 +08:00
{
2024-11-04 23:10:41 +08:00
transpose(src, src);
flip(src, src, 1);
}
2024-11-05 00:28:12 +08:00
bool OcrLite::guess_V(const std::vector<TextBox> &textBoxes)
{
auto whs = 1.0f;
for (auto &box : textBoxes)
{
int minX = std::numeric_limits<int>::max();
int minY = std::numeric_limits<int>::max();
int maxX = std::numeric_limits<int>::min();
int maxY = std::numeric_limits<int>::min();
for (auto &point : box)
{
if (point.x < minX)
minX = point.x;
if (point.y < minY)
minY = point.y;
if (point.x > maxX)
maxX = point.x;
if (point.y > maxY)
maxY = point.y;
}
auto w = maxX - minX;
auto h = maxY - minY;
if (h == 0 || w == 0)
continue;
whs *= w / h;
}
return whs < 1;
}
2024-11-04 23:10:41 +08:00
std::vector<TextBlock> OcrLite::detect_internal(cv::Mat &src, cv::Rect &originRect, ScaleParam &scale,
2024-11-05 00:28:12 +08:00
float boxScoreThresh, float boxThresh, float unClipRatio, Directional mode)
2024-11-04 23:10:41 +08:00
{
std::vector<TextBox> textBoxes = dbNet.getTextBoxes(src, scale, boxScoreThresh, boxThresh, unClipRatio);
std::vector<cv::Mat> partImages = getPartImages(src, textBoxes);
for (size_t i = 0; i < partImages.size(); ++i)
{
2024-11-05 00:28:12 +08:00
if (mode == Directional::V || (mode == Directional::Auto && guess_V(textBoxes)))
2024-11-04 23:10:41 +08:00
{
2024-11-05 00:28:12 +08:00
matRotateClockWise180(partImages[i]);
matRotateClockWise90(partImages[i]);
2024-11-04 23:10:41 +08:00
}
}
std::vector<TextLine> textLines = crnnNet.getTextLines(partImages);
std::vector<TextBlock> textBlocks;
for (size_t i = 0; i < textLines.size(); ++i)
{
std::vector<cv::Point> boxPoint = std::vector<cv::Point>(4);
int padding = originRect.x; // padding conversion
boxPoint[0] = cv::Point(textBoxes[i][0].x - padding, textBoxes[i][0].y - padding);
boxPoint[1] = cv::Point(textBoxes[i][1].x - padding, textBoxes[i][1].y - padding);
boxPoint[2] = cv::Point(textBoxes[i][2].x - padding, textBoxes[i][2].y - padding);
boxPoint[3] = cv::Point(textBoxes[i][3].x - padding, textBoxes[i][3].y - padding);
TextBlock textBlock{boxPoint, textLines[i]};
textBlocks.emplace_back(textBlock);
}
return textBlocks;
}
2024-11-15 04:03:18 +08:00
2024-11-04 23:10:41 +08:00
struct ocrpoints
{
int x1, y1, x2, y2, x3, y3, x4, y4;
};
DECLARE_API OcrLite *OcrInit(const wchar_t *szDetModel, const wchar_t *szRecModel, const wchar_t *szKeyPath, int nThreads)
{
OcrLite *pOcrObj = nullptr;
try
{
pOcrObj = new OcrLite(szDetModel, szRecModel, szKeyPath, nThreads);
}
catch (...)
{
}
if (pOcrObj)
{
return pOcrObj;
}
else
{
return nullptr;
}
}
2024-11-05 00:28:12 +08:00
DECLARE_API void OcrDetect(OcrLite *pOcrObj, const void *binptr, size_t size, Directional mode, void (*cb)(ocrpoints, const char *))
2024-11-04 23:10:41 +08:00
{
if (!pOcrObj)
return;
try
{
2024-11-05 00:28:12 +08:00
auto result = pOcrObj->detect(binptr, size, 50, 1024, 0.1, 0.1, 2.0, mode);
2024-11-04 23:10:41 +08:00
for (auto item : result)
{
cb({item.first[0].x, item.first[0].y,
item.first[1].x, item.first[1].y,
item.first[2].x, item.first[2].y,
item.first[3].x, item.first[3].y},
item.second.c_str());
}
}
catch (...)
{
}
}
DECLARE_API void OcrDestroy(OcrLite *pOcrObj)
{
if (pOcrObj)
delete pOcrObj;
}