mirror of
https://github.com/HIllya51/LunaTranslator.git
synced 2024-12-26 15:14:13 +08:00
758 lines
25 KiB
C++
758 lines
25 KiB
C++
#include <onnxruntime/core/session/onnxruntime_cxx_api.h>
|
|
#include <opencv2/opencv.hpp>
|
|
#include <opencv2/imgproc.hpp>
|
|
#include <opencv2/imgcodecs.hpp>
|
|
#include <clipper2/clipper.h>
|
|
typedef std::vector<cv::Point> TextBox;
|
|
typedef std::string TextLine;
|
|
typedef std::pair<TextBox, TextLine> TextBlock;
|
|
enum class Directional
|
|
{
|
|
H,
|
|
V,
|
|
Auto
|
|
};
|
|
|
|
struct ScaleParam
|
|
{
|
|
int srcWidth;
|
|
int srcHeight;
|
|
int dstWidth;
|
|
int dstHeight;
|
|
float ratioWidth;
|
|
float ratioHeight;
|
|
};
|
|
|
|
class CommonOnnxModel
|
|
{
|
|
std::vector<Ort::AllocatedStringPtr> inputNamesPtr;
|
|
std::vector<Ort::AllocatedStringPtr> outputNamesPtr;
|
|
std::unique_ptr<Ort::Session> session;
|
|
Ort::Env env = Ort::Env(ORT_LOGGING_LEVEL_ERROR);
|
|
Ort::SessionOptions sessionOptions = Ort::SessionOptions();
|
|
const std::array<float, 3> meanValues;
|
|
const std::array<float, 3> normValues;
|
|
|
|
std::vector<float> substractMeanNormalize(cv::Mat &src, const float *meanVals, const float *normVals)
|
|
{
|
|
auto inputTensorSize = src.cols * src.rows * src.channels();
|
|
std::vector<float> inputTensorValues(inputTensorSize);
|
|
size_t numChannels = src.channels();
|
|
size_t imageSize = src.cols * src.rows;
|
|
|
|
for (size_t pid = 0; pid < imageSize; pid++)
|
|
{
|
|
for (size_t ch = 0; ch < numChannels; ++ch)
|
|
{
|
|
float data = (float)(src.data[pid * numChannels + ch] * normVals[ch] - meanVals[ch] * normVals[ch]);
|
|
inputTensorValues[ch * imageSize + pid] = data;
|
|
}
|
|
}
|
|
return inputTensorValues;
|
|
}
|
|
|
|
void setNumThread(int numOfThread)
|
|
{
|
|
sessionOptions.SetInterOpNumThreads(numOfThread);
|
|
sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
|
|
}
|
|
|
|
template <typename T, typename Func, typename Func2>
|
|
void getinputoutputNames(T &vec, Func func, Func2 func2)
|
|
{
|
|
Ort::AllocatorWithDefaultOptions allocator;
|
|
const size_t numInputNodes = ((*session.get()).*func)();
|
|
vec.reserve(numInputNodes);
|
|
std::vector<int64_t> input_node_dims;
|
|
|
|
for (size_t i = 0; i < numInputNodes; i++)
|
|
{
|
|
auto inputName = ((*session.get()).*func2)(i, allocator);
|
|
vec.push_back(std::move(inputName));
|
|
}
|
|
}
|
|
|
|
public:
|
|
std::pair<std::vector<float>, std::vector<int64_t>> RunSession(cv::Mat src)
|
|
{
|
|
auto inputTensorValues = substractMeanNormalize(src, meanValues.data(), normValues.data());
|
|
std::array<int64_t, 4> inputShape{1, src.channels(), src.rows, src.cols};
|
|
auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
|
|
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(),
|
|
inputTensorValues.size(), inputShape.data(),
|
|
inputShape.size());
|
|
assert(inputTensor.IsTensor());
|
|
std::vector<const char *> inputNames = {inputNamesPtr.data()->get()};
|
|
std::vector<const char *> outputNames = {outputNamesPtr.data()->get()};
|
|
auto outputTensor = session->Run(Ort::RunOptions{nullptr}, inputNames.data(), &inputTensor,
|
|
inputNames.size(), outputNames.data(), outputNames.size());
|
|
assert(outputTensor.size() == 1 && outputTensor.front().IsTensor());
|
|
std::vector<int64_t> outputShape = outputTensor[0].GetTensorTypeAndShapeInfo().GetShape();
|
|
auto outputCount = outputTensor.front().GetTensorTypeAndShapeInfo().GetElementCount();
|
|
float *floatArray = outputTensor.front().GetTensorMutableData<float>();
|
|
std::vector<float> outputData(floatArray, floatArray + outputCount);
|
|
return {outputData, outputShape};
|
|
}
|
|
CommonOnnxModel(const std::wstring &path, const std::array<float, 3> &_meanValues, const std::array<float, 3> &_normValues, int numOfThread = 4) : meanValues(_meanValues), normValues(_normValues)
|
|
{
|
|
setNumThread(numOfThread);
|
|
session = std::make_unique<Ort::Session>(env, path.c_str(), sessionOptions);
|
|
getinputoutputNames(inputNamesPtr, &Ort::Session::GetInputCount, &Ort::Session::GetInputNameAllocated);
|
|
getinputoutputNames(outputNamesPtr, &Ort::Session::GetOutputCount, &Ort::Session::GetOutputNameAllocated);
|
|
}
|
|
};
|
|
|
|
class CrnnNet : public CommonOnnxModel
|
|
{
|
|
public:
|
|
CrnnNet(const std::wstring &pathStr, const std::wstring &keysPath, int numOfThread);
|
|
std::vector<TextLine> getTextLines(std::vector<cv::Mat> &partImg);
|
|
|
|
private:
|
|
const int dstHeight = 48;
|
|
|
|
std::vector<std::string> keys;
|
|
|
|
TextLine scoreToTextLine(const std::vector<float> &outputData, size_t h, size_t w);
|
|
|
|
TextLine getTextLine(const cv::Mat &src);
|
|
};
|
|
|
|
class DbNet : public CommonOnnxModel
|
|
{
|
|
public:
|
|
DbNet(const std::wstring &pathStr, int numOfThread) : CommonOnnxModel(pathStr, {0.485 * 255, 0.456 * 255, 0.406 * 255}, {1.0 / 0.229 / 255.0, 1.0 / 0.224 / 255.0, 1.0 / 0.225 / 255.0}, numOfThread)
|
|
{
|
|
}
|
|
std::vector<TextBox> getTextBoxes(cv::Mat &src, ScaleParam &s, float boxScoreThresh,
|
|
float boxThresh, float unClipRatio);
|
|
};
|
|
|
|
// onnxruntime init windows
|
|
ScaleParam getScaleParam(cv::Mat &src, const float scale)
|
|
{
|
|
int srcWidth = src.cols;
|
|
int srcHeight = src.rows;
|
|
int dstWidth = int((float)srcWidth * scale);
|
|
int dstHeight = int((float)srcHeight * scale);
|
|
if (dstWidth % 32 != 0)
|
|
{
|
|
dstWidth = (dstWidth / 32 - 1) * 32;
|
|
dstWidth = (std::max)(dstWidth, 32);
|
|
}
|
|
if (dstHeight % 32 != 0)
|
|
{
|
|
dstHeight = (dstHeight / 32 - 1) * 32;
|
|
dstHeight = (std::max)(dstHeight, 32);
|
|
}
|
|
float scaleWidth = (float)dstWidth / (float)srcWidth;
|
|
float scaleHeight = (float)dstHeight / (float)srcHeight;
|
|
return {srcWidth, srcHeight, dstWidth, dstHeight, scaleWidth, scaleHeight};
|
|
}
|
|
|
|
ScaleParam getScaleParam(cv::Mat &src, const int targetSize)
|
|
{
|
|
int srcWidth, srcHeight, dstWidth, dstHeight;
|
|
srcWidth = dstWidth = src.cols;
|
|
srcHeight = dstHeight = src.rows;
|
|
|
|
float ratio = 1.f;
|
|
if (srcWidth > srcHeight)
|
|
{
|
|
ratio = float(targetSize) / float(srcWidth);
|
|
}
|
|
else
|
|
{
|
|
ratio = float(targetSize) / float(srcHeight);
|
|
}
|
|
dstWidth = int(float(srcWidth) * ratio);
|
|
dstHeight = int(float(srcHeight) * ratio);
|
|
if (dstWidth % 32 != 0)
|
|
{
|
|
dstWidth = (dstWidth / 32) * 32;
|
|
dstWidth = (std::max)(dstWidth, 32);
|
|
}
|
|
if (dstHeight % 32 != 0)
|
|
{
|
|
dstHeight = (dstHeight / 32) * 32;
|
|
dstHeight = (std::max)(dstHeight, 32);
|
|
}
|
|
float ratioWidth = (float)dstWidth / (float)srcWidth;
|
|
float ratioHeight = (float)dstHeight / (float)srcHeight;
|
|
return {srcWidth, srcHeight, dstWidth, dstHeight, ratioWidth, ratioHeight};
|
|
}
|
|
|
|
std::vector<cv::Point2f> getBox(const cv::RotatedRect &rect)
|
|
{
|
|
cv::Point2f vertices[4];
|
|
rect.points(vertices);
|
|
// std::vector<cv::Point2f> ret(4);
|
|
std::vector<cv::Point2f> ret2(vertices, vertices + sizeof(vertices) / sizeof(vertices[0]));
|
|
// memcpy(vertices, &ret[0], ret.size() * sizeof(ret[0]));
|
|
return ret2;
|
|
}
|
|
|
|
cv::Mat getRotateCropImage(const cv::Mat &src, std::vector<cv::Point> box)
|
|
{
|
|
cv::Mat image;
|
|
src.copyTo(image);
|
|
std::vector<cv::Point> points = box;
|
|
|
|
int collectX[4] = {box[0].x, box[1].x, box[2].x, box[3].x};
|
|
int collectY[4] = {box[0].y, box[1].y, box[2].y, box[3].y};
|
|
int left = int(*std::min_element(collectX, collectX + 4));
|
|
int right = int(*std::max_element(collectX, collectX + 4));
|
|
int top = int(*std::min_element(collectY, collectY + 4));
|
|
int bottom = int(*std::max_element(collectY, collectY + 4));
|
|
|
|
cv::Mat imgCrop;
|
|
image(cv::Rect(left, top, right - left, bottom - top)).copyTo(imgCrop);
|
|
|
|
for (auto &point : points)
|
|
{
|
|
point.x -= left;
|
|
point.y -= top;
|
|
}
|
|
|
|
int imgCropWidth = int(sqrt(pow(points[0].x - points[1].x, 2) +
|
|
pow(points[0].y - points[1].y, 2)));
|
|
int imgCropHeight = int(sqrt(pow(points[0].x - points[3].x, 2) +
|
|
pow(points[0].y - points[3].y, 2)));
|
|
|
|
cv::Point2f ptsDst[4];
|
|
ptsDst[0] = cv::Point2f(0., 0.);
|
|
ptsDst[1] = cv::Point2f(imgCropWidth, 0.);
|
|
ptsDst[2] = cv::Point2f(imgCropWidth, imgCropHeight);
|
|
ptsDst[3] = cv::Point2f(0.f, imgCropHeight);
|
|
|
|
cv::Point2f ptsSrc[4];
|
|
ptsSrc[0] = cv::Point2f(points[0].x, points[0].y);
|
|
ptsSrc[1] = cv::Point2f(points[1].x, points[1].y);
|
|
ptsSrc[2] = cv::Point2f(points[2].x, points[2].y);
|
|
ptsSrc[3] = cv::Point2f(points[3].x, points[3].y);
|
|
|
|
cv::Mat M = cv::getPerspectiveTransform(ptsSrc, ptsDst);
|
|
|
|
cv::Mat partImg;
|
|
cv::warpPerspective(imgCrop, partImg, M,
|
|
cv::Size(imgCropWidth, imgCropHeight),
|
|
cv::BORDER_REPLICATE);
|
|
|
|
// if (float(partImg.rows) >= float(partImg.cols) * 1.5) {
|
|
// cv::Mat srcCopy = cv::Mat(partImg.rows, partImg.cols, partImg.depth());
|
|
// cv::transpose(partImg, srcCopy);
|
|
// cv::flip(srcCopy, srcCopy, 0);
|
|
// return srcCopy;
|
|
// } else {
|
|
// return partImg;
|
|
// }
|
|
|
|
return partImg;
|
|
}
|
|
|
|
bool cvPointCompare(const cv::Point &a, const cv::Point &b)
|
|
{
|
|
return a.x < b.x;
|
|
}
|
|
|
|
std::vector<cv::Point2f> getMinBoxes(const cv::RotatedRect &boxRect, float &maxSideLen)
|
|
{
|
|
maxSideLen = std::max(boxRect.size.width, boxRect.size.height);
|
|
std::vector<cv::Point2f> boxPoint = getBox(boxRect);
|
|
std::sort(boxPoint.begin(), boxPoint.end(), cvPointCompare);
|
|
int index1, index2, index3, index4;
|
|
if (boxPoint[1].y > boxPoint[0].y)
|
|
{
|
|
index1 = 0;
|
|
index4 = 1;
|
|
}
|
|
else
|
|
{
|
|
index1 = 1;
|
|
index4 = 0;
|
|
}
|
|
if (boxPoint[3].y > boxPoint[2].y)
|
|
{
|
|
index2 = 2;
|
|
index3 = 3;
|
|
}
|
|
else
|
|
{
|
|
index2 = 3;
|
|
index3 = 2;
|
|
}
|
|
std::vector<cv::Point2f> minBox(4);
|
|
minBox[0] = boxPoint[index1];
|
|
minBox[1] = boxPoint[index2];
|
|
minBox[2] = boxPoint[index3];
|
|
minBox[3] = boxPoint[index4];
|
|
return minBox;
|
|
}
|
|
|
|
template <class T>
|
|
inline T clamp(T x, T min, T max)
|
|
{
|
|
if (x > max)
|
|
return max;
|
|
if (x < min)
|
|
return min;
|
|
return x;
|
|
}
|
|
float boxScoreFast(const std::vector<cv::Point2f> &boxes, const cv::Mat &pred)
|
|
{
|
|
int width = pred.cols;
|
|
int height = pred.rows;
|
|
|
|
float arrayX[4] = {boxes[0].x, boxes[1].x, boxes[2].x, boxes[3].x};
|
|
float arrayY[4] = {boxes[0].y, boxes[1].y, boxes[2].y, boxes[3].y};
|
|
|
|
int minX = clamp(int(std::floor(*(std::min_element(arrayX, arrayX + 4)))), 0, width - 1);
|
|
int maxX = clamp(int(std::ceil(*(std::max_element(arrayX, arrayX + 4)))), 0, width - 1);
|
|
int minY = clamp(int(std::floor(*(std::min_element(arrayY, arrayY + 4)))), 0, height - 1);
|
|
int maxY = clamp(int(std::ceil(*(std::max_element(arrayY, arrayY + 4)))), 0, height - 1);
|
|
|
|
cv::Mat mask = cv::Mat::zeros(maxY - minY + 1, maxX - minX + 1, CV_8UC1);
|
|
|
|
cv::Point box[4];
|
|
box[0] = cv::Point(int(boxes[0].x) - minX, int(boxes[0].y) - minY);
|
|
box[1] = cv::Point(int(boxes[1].x) - minX, int(boxes[1].y) - minY);
|
|
box[2] = cv::Point(int(boxes[2].x) - minX, int(boxes[2].y) - minY);
|
|
box[3] = cv::Point(int(boxes[3].x) - minX, int(boxes[3].y) - minY);
|
|
const cv::Point *pts[1] = {box};
|
|
int npts[] = {4};
|
|
cv::fillPoly(mask, pts, npts, 1, cv::Scalar(1));
|
|
|
|
cv::Mat croppedImg;
|
|
pred(cv::Rect(minX, minY, maxX - minX + 1, maxY - minY + 1))
|
|
.copyTo(croppedImg);
|
|
|
|
auto score = (float)cv::mean(croppedImg, mask)[0];
|
|
return score;
|
|
}
|
|
|
|
float getContourArea(const std::vector<cv::Point2f> &box, float unClipRatio)
|
|
{
|
|
size_t size = box.size();
|
|
float area = 0.0f;
|
|
float dist = 0.0f;
|
|
for (size_t i = 0; i < size; i++)
|
|
{
|
|
area += box[i].x * box[(i + 1) % size].y -
|
|
box[i].y * box[(i + 1) % size].x;
|
|
dist += sqrtf((box[i].x - box[(i + 1) % size].x) *
|
|
(box[i].x - box[(i + 1) % size].x) +
|
|
(box[i].y - box[(i + 1) % size].y) *
|
|
(box[i].y - box[(i + 1) % size].y));
|
|
}
|
|
area = fabs(float(area / 2.0));
|
|
|
|
return area * unClipRatio / dist;
|
|
}
|
|
|
|
cv::RotatedRect unClip(std::vector<cv::Point2f> box, float unClipRatio)
|
|
{
|
|
float distance = getContourArea(box, unClipRatio);
|
|
|
|
Clipper2Lib::ClipperOffset offset;
|
|
Clipper2Lib::Path64 p;
|
|
p.push_back(Clipper2Lib::Point64(int(box[0].x), int(box[0].y)));
|
|
p.push_back(Clipper2Lib::Point64(int(box[1].x), int(box[1].y)));
|
|
p.push_back(Clipper2Lib::Point64(int(box[2].x), int(box[2].y)));
|
|
p.push_back(Clipper2Lib::Point64(int(box[3].x), int(box[3].y)));
|
|
offset.AddPath(p, Clipper2Lib::JoinType::Round, Clipper2Lib::EndType::Polygon);
|
|
Clipper2Lib::Paths64 soln;
|
|
offset.Execute(distance, soln);
|
|
std::vector<cv::Point2f> points;
|
|
|
|
for (size_t j = 0; j < soln.size(); j++)
|
|
{
|
|
for (size_t i = 0; i < soln[soln.size() - 1].size(); i++)
|
|
{
|
|
points.emplace_back(cv::Point2f{float(soln[j][i].x), float(soln[j][i].y)});
|
|
}
|
|
}
|
|
cv::RotatedRect res;
|
|
if (points.empty())
|
|
{
|
|
res = cv::RotatedRect(cv::Point2f(0, 0), cv::Size2f(1, 1), 0);
|
|
}
|
|
else
|
|
{
|
|
res = cv::minAreaRect(points);
|
|
}
|
|
return res;
|
|
}
|
|
CrnnNet::CrnnNet(const std::wstring &pathStr, const std::wstring &keysPath, int numOfThread) : CommonOnnxModel(pathStr, {127.5, 127.5, 127.5}, {1.0 / 127.5, 1.0 / 127.5, 1.0 / 127.5}, numOfThread)
|
|
{
|
|
// load keys
|
|
std::ifstream in(keysPath.c_str());
|
|
std::string line;
|
|
if (in)
|
|
{
|
|
while (getline(in, line))
|
|
{ // line中不包括每行的换行符
|
|
keys.push_back(line);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
return;
|
|
}
|
|
keys.insert(keys.begin(), "#");
|
|
keys.emplace_back(" ");
|
|
}
|
|
|
|
template <class ForwardIterator>
|
|
inline static size_t argmax(ForwardIterator first, ForwardIterator last)
|
|
{
|
|
return std::distance(first, std::max_element(first, last));
|
|
}
|
|
|
|
TextLine CrnnNet::scoreToTextLine(const std::vector<float> &outputData, size_t h, size_t w)
|
|
{
|
|
auto keySize = keys.size();
|
|
auto dataSize = outputData.size();
|
|
std::string strRes;
|
|
std::vector<float> scores;
|
|
size_t lastIndex = 0;
|
|
size_t maxIndex;
|
|
float maxValue;
|
|
|
|
for (size_t i = 0; i < h; i++)
|
|
{
|
|
size_t start = i * w;
|
|
size_t stop = (i + 1) * w;
|
|
if (stop > dataSize - 1)
|
|
{
|
|
stop = (i + 1) * w - 1;
|
|
}
|
|
maxIndex = int(argmax(&outputData[start], &outputData[stop]));
|
|
maxValue = float(*std::max_element(&outputData[start], &outputData[stop]));
|
|
|
|
if (maxIndex > 0 && maxIndex < keySize && (!(i > 0 && maxIndex == lastIndex)))
|
|
{
|
|
scores.emplace_back(maxValue);
|
|
strRes.append(keys[maxIndex]);
|
|
}
|
|
lastIndex = maxIndex;
|
|
}
|
|
return strRes;
|
|
}
|
|
|
|
TextLine CrnnNet::getTextLine(const cv::Mat &src)
|
|
{
|
|
float scale = (float)dstHeight / (float)src.rows;
|
|
int dstWidth = int((float)src.cols * scale);
|
|
cv::Mat srcResize;
|
|
resize(src, srcResize, cv::Size(dstWidth, dstHeight));
|
|
auto &&[outputData, outputShape] = RunSession(srcResize);
|
|
return scoreToTextLine(outputData, outputShape[1], outputShape[2]);
|
|
}
|
|
|
|
std::vector<TextLine> CrnnNet::getTextLines(std::vector<cv::Mat> &partImg)
|
|
{
|
|
int size = partImg.size();
|
|
std::vector<TextLine> textLines(size);
|
|
for (int i = 0; i < size; ++i)
|
|
{
|
|
TextLine textLine = getTextLine(partImg[i]);
|
|
textLines[i] = textLine;
|
|
}
|
|
return textLines;
|
|
}
|
|
|
|
std::vector<TextBox> findRsBoxes(const cv::Mat &predMat, const cv::Mat &dilateMat, ScaleParam &s,
|
|
const float boxScoreThresh, const float unClipRatio)
|
|
{
|
|
const int longSideThresh = 3; // minBox 长边门限
|
|
const int maxCandidates = 1000;
|
|
|
|
std::vector<std::vector<cv::Point>> contours;
|
|
std::vector<cv::Vec4i> hierarchy;
|
|
|
|
cv::findContours(dilateMat, contours, hierarchy, cv::RETR_LIST,
|
|
cv::CHAIN_APPROX_SIMPLE);
|
|
|
|
size_t numContours = contours.size() >= maxCandidates ? maxCandidates : contours.size();
|
|
|
|
std::vector<TextBox> rsBoxes;
|
|
|
|
for (size_t i = 0; i < numContours; i++)
|
|
{
|
|
if (contours[i].size() <= 2)
|
|
{
|
|
continue;
|
|
}
|
|
cv::RotatedRect minAreaRect = cv::minAreaRect(contours[i]);
|
|
|
|
float longSide;
|
|
std::vector<cv::Point2f> minBoxes = getMinBoxes(minAreaRect, longSide);
|
|
|
|
if (longSide < longSideThresh)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
float boxScore = boxScoreFast(minBoxes, predMat);
|
|
if (boxScore < boxScoreThresh)
|
|
continue;
|
|
|
|
//-----unClip-----
|
|
cv::RotatedRect clipRect = unClip(minBoxes, unClipRatio);
|
|
if (clipRect.size.height < 1.001 && clipRect.size.width < 1.001)
|
|
{
|
|
continue;
|
|
}
|
|
//-----unClip-----
|
|
|
|
std::vector<cv::Point2f> clipMinBoxes = getMinBoxes(clipRect, longSide);
|
|
if (longSide < longSideThresh + 2)
|
|
continue;
|
|
|
|
std::vector<cv::Point> intClipMinBoxes;
|
|
|
|
for (auto &clipMinBox : clipMinBoxes)
|
|
{
|
|
float x = clipMinBox.x / s.ratioWidth;
|
|
float y = clipMinBox.y / s.ratioHeight;
|
|
int ptX = (std::min)((std::max)(int(x), 0), s.srcWidth - 1);
|
|
int ptY = (std::min)((std::max)(int(y), 0), s.srcHeight - 1);
|
|
cv::Point point{ptX, ptY};
|
|
intClipMinBoxes.push_back(point);
|
|
}
|
|
rsBoxes.push_back(intClipMinBoxes);
|
|
}
|
|
reverse(rsBoxes.begin(), rsBoxes.end());
|
|
return rsBoxes;
|
|
}
|
|
|
|
std::vector<TextBox> DbNet::getTextBoxes(cv::Mat &src, ScaleParam &s, float boxScoreThresh, float boxThresh, float unClipRatio)
|
|
{
|
|
cv::Mat srcResize;
|
|
resize(src, srcResize, cv::Size(s.dstWidth, s.dstHeight));
|
|
auto &&[outputData, outputShape] = RunSession(srcResize);
|
|
|
|
//-----Data preparation-----
|
|
int outHeight = (int)outputShape[2];
|
|
int outWidth = (int)outputShape[3];
|
|
size_t area = outHeight * outWidth;
|
|
|
|
std::vector<float> predData(area, 0.0);
|
|
std::vector<unsigned char> cbufData(area, ' ');
|
|
|
|
for (int i = 0; i < area; i++)
|
|
{
|
|
predData[i] = float(outputData[i]);
|
|
cbufData[i] = (unsigned char)((outputData[i]) * 255);
|
|
}
|
|
|
|
cv::Mat predMat(outHeight, outWidth, CV_32F, (float *)predData.data());
|
|
cv::Mat cBufMat(outHeight, outWidth, CV_8UC1, (unsigned char *)cbufData.data());
|
|
|
|
//-----boxThresh-----
|
|
const double maxValue = 255;
|
|
const double threshold = boxThresh * 255;
|
|
cv::Mat thresholdMat;
|
|
cv::threshold(cBufMat, thresholdMat, threshold, maxValue, cv::THRESH_BINARY);
|
|
|
|
//-----dilate-----
|
|
cv::Mat dilateMat;
|
|
cv::Mat dilateElement = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(2, 2));
|
|
cv::dilate(thresholdMat, dilateMat, dilateElement);
|
|
|
|
return findRsBoxes(predMat, dilateMat, s, boxScoreThresh, unClipRatio);
|
|
}
|
|
|
|
class OcrLite
|
|
{
|
|
public:
|
|
OcrLite(const std::wstring &detPath,
|
|
const std::wstring &recPath, const std::wstring &keysPath, int numOfThread) : crnnNet(recPath, keysPath, numOfThread), dbNet(detPath, numOfThread)
|
|
{
|
|
}
|
|
|
|
std::vector<TextBlock> detect(const void *binptr, size_t size,
|
|
int padding, int maxSideLen,
|
|
float boxScoreThresh, float boxThresh, float unClipRatio, Directional);
|
|
|
|
private:
|
|
DbNet dbNet;
|
|
CrnnNet crnnNet;
|
|
|
|
std::vector<cv::Mat> getPartImages(cv::Mat &src, std::vector<TextBox> &textBoxes);
|
|
|
|
std::vector<TextBlock> detect_internal(cv::Mat &src, cv::Rect &originRect, ScaleParam &scale,
|
|
float boxScoreThresh = 0.6f, float boxThresh = 0.3f,
|
|
float unClipRatio = 2.0f, Directional mode = Directional::H);
|
|
bool guess_V(const std::vector<TextBox> &);
|
|
};
|
|
|
|
cv::Mat makePadding(cv::Mat &src, const int padding)
|
|
{
|
|
if (padding <= 0)
|
|
return src;
|
|
cv::Scalar paddingScalar = {255, 255, 255};
|
|
cv::Mat paddingSrc;
|
|
cv::copyMakeBorder(src, paddingSrc, padding, padding, padding, padding, cv::BORDER_ISOLATED, paddingScalar);
|
|
return paddingSrc;
|
|
}
|
|
|
|
std::vector<TextBlock> OcrLite::detect(const void *binptr, size_t size,
|
|
const int padding, const int maxSideLen,
|
|
float boxScoreThresh, float boxThresh, float unClipRatio, Directional mode)
|
|
{
|
|
std::vector<uchar> bytes{(uchar *)binptr, (uchar *)binptr + size};
|
|
cv::Mat originSrc = imdecode(bytes, cv::IMREAD_COLOR); // default : BGR
|
|
int originMaxSide = (std::max)(originSrc.cols, originSrc.rows);
|
|
int resize;
|
|
if (maxSideLen <= 0 || maxSideLen > originMaxSide)
|
|
{
|
|
resize = originMaxSide;
|
|
}
|
|
else
|
|
{
|
|
resize = maxSideLen;
|
|
}
|
|
resize += 2 * padding;
|
|
cv::Rect paddingRect(padding, padding, originSrc.cols, originSrc.rows);
|
|
cv::Mat paddingSrc = makePadding(originSrc, padding);
|
|
ScaleParam scale = getScaleParam(paddingSrc, resize);
|
|
return detect_internal(paddingSrc, paddingRect, scale,
|
|
boxScoreThresh, boxThresh, unClipRatio, mode);
|
|
}
|
|
|
|
std::vector<cv::Mat> OcrLite::getPartImages(cv::Mat &src, std::vector<TextBox> &textBoxes)
|
|
{
|
|
std::vector<cv::Mat> partImages;
|
|
for (size_t i = 0; i < textBoxes.size(); ++i)
|
|
{
|
|
cv::Mat partImg = getRotateCropImage(src, textBoxes[i]);
|
|
partImages.emplace_back(partImg);
|
|
}
|
|
return partImages;
|
|
}
|
|
|
|
void matRotateClockWise180(cv::Mat &src)
|
|
{
|
|
flip(src, src, 0);
|
|
flip(src, src, 1);
|
|
}
|
|
|
|
void matRotateClockWise90(cv::Mat &src)
|
|
{
|
|
transpose(src, src);
|
|
flip(src, src, 1);
|
|
}
|
|
bool OcrLite::guess_V(const std::vector<TextBox> &textBoxes)
|
|
{
|
|
auto whs = 1.0f;
|
|
for (auto &box : textBoxes)
|
|
{
|
|
int minX = std::numeric_limits<int>::max();
|
|
int minY = std::numeric_limits<int>::max();
|
|
int maxX = std::numeric_limits<int>::min();
|
|
int maxY = std::numeric_limits<int>::min();
|
|
for (auto &point : box)
|
|
{
|
|
if (point.x < minX)
|
|
minX = point.x;
|
|
if (point.y < minY)
|
|
minY = point.y;
|
|
if (point.x > maxX)
|
|
maxX = point.x;
|
|
if (point.y > maxY)
|
|
maxY = point.y;
|
|
}
|
|
auto w = maxX - minX;
|
|
auto h = maxY - minY;
|
|
if (h == 0 || w == 0)
|
|
continue;
|
|
whs *= w / h;
|
|
}
|
|
return whs < 1;
|
|
}
|
|
std::vector<TextBlock> OcrLite::detect_internal(cv::Mat &src, cv::Rect &originRect, ScaleParam &scale,
|
|
float boxScoreThresh, float boxThresh, float unClipRatio, Directional mode)
|
|
{
|
|
|
|
std::vector<TextBox> textBoxes = dbNet.getTextBoxes(src, scale, boxScoreThresh, boxThresh, unClipRatio);
|
|
std::vector<cv::Mat> partImages = getPartImages(src, textBoxes);
|
|
for (size_t i = 0; i < partImages.size(); ++i)
|
|
{
|
|
if (mode == Directional::V || (mode == Directional::Auto && guess_V(textBoxes)))
|
|
{
|
|
matRotateClockWise180(partImages[i]);
|
|
matRotateClockWise90(partImages[i]);
|
|
}
|
|
}
|
|
|
|
std::vector<TextLine> textLines = crnnNet.getTextLines(partImages);
|
|
|
|
std::vector<TextBlock> textBlocks;
|
|
for (size_t i = 0; i < textLines.size(); ++i)
|
|
{
|
|
std::vector<cv::Point> boxPoint = std::vector<cv::Point>(4);
|
|
int padding = originRect.x; // padding conversion
|
|
boxPoint[0] = cv::Point(textBoxes[i][0].x - padding, textBoxes[i][0].y - padding);
|
|
boxPoint[1] = cv::Point(textBoxes[i][1].x - padding, textBoxes[i][1].y - padding);
|
|
boxPoint[2] = cv::Point(textBoxes[i][2].x - padding, textBoxes[i][2].y - padding);
|
|
boxPoint[3] = cv::Point(textBoxes[i][3].x - padding, textBoxes[i][3].y - padding);
|
|
TextBlock textBlock{boxPoint, textLines[i]};
|
|
textBlocks.emplace_back(textBlock);
|
|
}
|
|
|
|
return textBlocks;
|
|
}
|
|
|
|
struct ocrpoints
|
|
{
|
|
int x1, y1, x2, y2, x3, y3, x4, y4;
|
|
};
|
|
DECLARE_API OcrLite *OcrInit(const wchar_t *szDetModel, const wchar_t *szRecModel, const wchar_t *szKeyPath, int nThreads)
|
|
{
|
|
OcrLite *pOcrObj = nullptr;
|
|
try
|
|
{
|
|
pOcrObj = new OcrLite(szDetModel, szRecModel, szKeyPath, nThreads);
|
|
}
|
|
catch (...)
|
|
{
|
|
}
|
|
if (pOcrObj)
|
|
{
|
|
return pOcrObj;
|
|
}
|
|
else
|
|
{
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
DECLARE_API void OcrDetect(OcrLite *pOcrObj, const void *binptr, size_t size, Directional mode, void (*cb)(ocrpoints, const char *))
|
|
{
|
|
if (!pOcrObj)
|
|
return;
|
|
|
|
try
|
|
{
|
|
auto result = pOcrObj->detect(binptr, size, 50, 1024, 0.1, 0.1, 2.0, mode);
|
|
|
|
for (auto item : result)
|
|
{
|
|
cb({item.first[0].x, item.first[0].y,
|
|
item.first[1].x, item.first[1].y,
|
|
item.first[2].x, item.first[2].y,
|
|
item.first[3].x, item.first[3].y},
|
|
item.second.c_str());
|
|
}
|
|
}
|
|
catch (...)
|
|
{
|
|
}
|
|
}
|
|
|
|
DECLARE_API void OcrDestroy(OcrLite *pOcrObj)
|
|
{
|
|
if (pOcrObj)
|
|
delete pOcrObj;
|
|
} |