2024-01-08 23:37:00 +08:00
|
|
|
|
2024-11-05 15:46:45 +08:00
|
|
|
|
2024-04-02 15:36:52 +08:00
|
|
|
struct mecab_node_t
|
|
|
|
{
|
|
|
|
struct mecab_node_t *prev;
|
|
|
|
struct mecab_node_t *next;
|
|
|
|
struct mecab_node_t *enext;
|
|
|
|
struct mecab_node_t *bnext;
|
|
|
|
struct mecab_path_t *rpath;
|
|
|
|
struct mecab_path_t *lpath;
|
|
|
|
const char *surface;
|
|
|
|
const char *feature;
|
|
|
|
unsigned int id;
|
|
|
|
unsigned short length;
|
|
|
|
unsigned short rlength;
|
|
|
|
unsigned short rcAttr;
|
|
|
|
unsigned short lcAttr;
|
|
|
|
unsigned short posid;
|
|
|
|
unsigned char char_type;
|
|
|
|
unsigned char stat;
|
|
|
|
unsigned char isbest;
|
|
|
|
float alpha;
|
|
|
|
float beta;
|
|
|
|
float prob;
|
|
|
|
short wcost;
|
|
|
|
long cost;
|
2024-01-08 23:37:00 +08:00
|
|
|
};
|
|
|
|
|
2024-04-02 15:36:52 +08:00
|
|
|
typedef struct mecab_t mecab_t;
|
2024-11-05 15:46:45 +08:00
|
|
|
typedef mecab_t *(*mecab_new)(int argc, const char **argv);
|
2024-04-02 15:36:52 +08:00
|
|
|
typedef mecab_node_t *(*mecab_sparse_tonode)(mecab_t *mecab, const char *);
|
|
|
|
typedef void (*mecab_destroy)(mecab_t *mecab);
|
2024-01-08 23:37:00 +08:00
|
|
|
HMODULE mecablib;
|
2024-11-05 15:46:45 +08:00
|
|
|
DECLARE_API void *mecab_init(char *utf8path, wchar_t *mepath)
|
2024-04-02 15:36:52 +08:00
|
|
|
{
|
2024-01-08 23:37:00 +08:00
|
|
|
mecablib = LoadLibraryW(mepath);
|
2024-04-02 15:36:52 +08:00
|
|
|
if (mecablib == 0)
|
|
|
|
return 0;
|
|
|
|
auto _mecab_new = (mecab_new)GetProcAddress(mecablib, "mecab_new");
|
|
|
|
if (_mecab_new == 0)
|
|
|
|
return 0;
|
2024-11-05 15:46:45 +08:00
|
|
|
const char *argv[] = {"fugashi", "-C", "-r", "nul", "-d", utf8path, "-Owakati"};
|
2024-07-14 15:09:37 +08:00
|
|
|
auto trigger = _mecab_new(ARRAYSIZE(argv), argv);
|
2024-01-08 23:37:00 +08:00
|
|
|
return trigger;
|
|
|
|
}
|
2024-11-05 15:46:45 +08:00
|
|
|
DECLARE_API void mecab_end(void *trigger)
|
2024-04-02 15:36:52 +08:00
|
|
|
{
|
|
|
|
if (trigger == 0)
|
|
|
|
return;
|
|
|
|
if (mecablib == 0)
|
|
|
|
return;
|
2024-01-08 23:37:00 +08:00
|
|
|
auto _mecab_destroy = (mecab_destroy)GetProcAddress(mecablib, "mecab_destroy");
|
2024-04-02 15:36:52 +08:00
|
|
|
if (_mecab_destroy == 0)
|
|
|
|
return;
|
|
|
|
mecab_destroy((mecab_t *)trigger);
|
2024-01-08 23:37:00 +08:00
|
|
|
}
|
2024-07-14 15:09:37 +08:00
|
|
|
|
2024-11-05 15:46:45 +08:00
|
|
|
DECLARE_API bool mecab_parse(void *trigger, char *utf8string, void (*callback)(const char *, const char *))
|
2024-04-02 15:36:52 +08:00
|
|
|
{
|
|
|
|
if (trigger == 0)
|
|
|
|
return false;
|
|
|
|
if (mecablib == 0)
|
|
|
|
return false;
|
2024-01-08 23:37:00 +08:00
|
|
|
auto _mecab_sparse_tonode = (mecab_sparse_tonode)GetProcAddress(mecablib, "mecab_sparse_tonode");
|
2024-04-02 15:36:52 +08:00
|
|
|
if (_mecab_sparse_tonode == 0)
|
|
|
|
return false;
|
2024-01-08 23:37:00 +08:00
|
|
|
|
|
|
|
std::string cstr = utf8string;
|
2024-04-02 15:36:52 +08:00
|
|
|
auto node = _mecab_sparse_tonode((mecab_t *)trigger, cstr.c_str());
|
2024-01-08 23:37:00 +08:00
|
|
|
|
2024-04-02 15:36:52 +08:00
|
|
|
while (node->next)
|
|
|
|
{
|
2024-01-08 23:37:00 +08:00
|
|
|
node = node->next;
|
2024-04-02 15:36:52 +08:00
|
|
|
if (node->stat == 3)
|
|
|
|
{
|
2024-01-08 23:37:00 +08:00
|
|
|
break;
|
|
|
|
}
|
2024-07-14 15:09:37 +08:00
|
|
|
std::string surf = std::string(node->surface, node->length);
|
|
|
|
callback(surf.c_str(), node->feature);
|
2024-01-08 23:37:00 +08:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|