97 lines
2.6 KiB
C++
Raw Normal View History

2024-01-08 23:37:00 +08:00
#pragma execution_character_set("utf-8")
2024-04-02 15:36:52 +08:00
#include <windows.h>
2024-01-08 23:37:00 +08:00
#include <iostream>
#include <fstream>
2024-04-02 15:36:52 +08:00
#include <vector>
#include "define.h"
#include "cinterface.h"
struct mecab_node_t
{
struct mecab_node_t *prev;
struct mecab_node_t *next;
struct mecab_node_t *enext;
struct mecab_node_t *bnext;
struct mecab_path_t *rpath;
struct mecab_path_t *lpath;
const char *surface;
const char *feature;
unsigned int id;
unsigned short length;
unsigned short rlength;
unsigned short rcAttr;
unsigned short lcAttr;
unsigned short posid;
unsigned char char_type;
unsigned char stat;
unsigned char isbest;
float alpha;
float beta;
float prob;
short wcost;
long cost;
2024-01-08 23:37:00 +08:00
};
2024-04-02 15:36:52 +08:00
typedef struct mecab_t mecab_t;
typedef mecab_t *(*mecab_new)(int argc, char **argv);
typedef mecab_node_t *(*mecab_sparse_tonode)(mecab_t *mecab, const char *);
typedef void (*mecab_destroy)(mecab_t *mecab);
2024-01-08 23:37:00 +08:00
HMODULE mecablib;
2024-04-02 15:36:52 +08:00
void *mecab_init(char *utf8path, wchar_t *mepath)
{
2024-01-08 23:37:00 +08:00
mecablib = LoadLibraryW(mepath);
2024-04-02 15:36:52 +08:00
if (mecablib == 0)
return 0;
auto _mecab_new = (mecab_new)GetProcAddress(mecablib, "mecab_new");
if (_mecab_new == 0)
return 0;
std::vector<std::string> vargv = {"fugashi", "-C", "-r", "nul", "-d", utf8path, "-Owakati"};
2024-01-08 23:37:00 +08:00
auto argv = vecstr2c(vargv);
auto trigger = _mecab_new(vargv.size(), argv);
freestringlist(argv, vargv.size());
return trigger;
}
2024-04-02 15:36:52 +08:00
void mecab_end(void *trigger)
{
if (trigger == 0)
return;
if (mecablib == 0)
return;
2024-01-08 23:37:00 +08:00
auto _mecab_destroy = (mecab_destroy)GetProcAddress(mecablib, "mecab_destroy");
2024-04-02 15:36:52 +08:00
if (_mecab_destroy == 0)
return;
mecab_destroy((mecab_t *)trigger);
2024-01-08 23:37:00 +08:00
}
2024-04-02 15:36:52 +08:00
bool mecab_parse(void *trigger, char *utf8string, char ***surface, char ***features, int *num)
{
if (trigger == 0)
return false;
if (mecablib == 0)
return false;
2024-01-08 23:37:00 +08:00
auto _mecab_sparse_tonode = (mecab_sparse_tonode)GetProcAddress(mecablib, "mecab_sparse_tonode");
2024-04-02 15:36:52 +08:00
if (_mecab_sparse_tonode == 0)
return false;
2024-01-08 23:37:00 +08:00
std::string cstr = utf8string;
2024-04-02 15:36:52 +08:00
auto node = _mecab_sparse_tonode((mecab_t *)trigger, cstr.c_str());
2024-01-08 23:37:00 +08:00
2024-04-02 15:36:52 +08:00
std::vector<std::string> surfs;
std::vector<std::string> featuresv;
while (node->next)
{
2024-01-08 23:37:00 +08:00
node = node->next;
2024-04-02 15:36:52 +08:00
if (node->stat == 3)
{
2024-01-08 23:37:00 +08:00
break;
}
std::string surf = node->surface;
surf = surf.substr(0, node->length);
2024-04-02 15:36:52 +08:00
surfs.emplace_back(surf);
2024-01-08 23:37:00 +08:00
featuresv.emplace_back(node->feature);
}
*surface = vecstr2c(surfs);
*features = vecstr2c(featuresv);
*num = surfs.size();
return true;
}