2024-01-08 23:37:00 +08:00
|
|
|
{
|
2024-08-07 00:18:10 +08:00
|
|
|
"stringreplace": {
|
|
|
|
"use": false,
|
|
|
|
"name": "字符串替换",
|
|
|
|
"args": {
|
2024-09-30 22:12:54 +08:00
|
|
|
"internal": []
|
2024-08-07 00:18:10 +08:00
|
|
|
}
|
|
|
|
},
|
2024-05-01 11:33:04 +08:00
|
|
|
"_remove_non_shiftjis_char": {
|
|
|
|
"use": false,
|
|
|
|
"name": "过滤文本中的非日语字符集字符"
|
|
|
|
},
|
|
|
|
"_remove_control": {
|
|
|
|
"use": false,
|
|
|
|
"name": "过滤控制字符"
|
|
|
|
},
|
2024-07-21 23:59:35 +08:00
|
|
|
"_remove_symbo": {
|
|
|
|
"use": false,
|
|
|
|
"name": "过滤英文标点"
|
|
|
|
},
|
2024-05-01 11:33:04 +08:00
|
|
|
"_remove_chaos": {
|
|
|
|
"use": false,
|
|
|
|
"name": "过滤其他乱码"
|
|
|
|
},
|
|
|
|
"_remove_not_in_ja_bracket": {
|
|
|
|
"use": false,
|
|
|
|
"name": "过滤「」以外的字符"
|
2024-01-08 23:37:00 +08:00
|
|
|
},
|
|
|
|
"_1": {
|
|
|
|
"use": false,
|
|
|
|
"name": "去除花括号{}"
|
|
|
|
},
|
2024-06-15 00:56:16 +08:00
|
|
|
"length_threshold": {
|
|
|
|
"use": false,
|
2024-09-30 22:12:54 +08:00
|
|
|
"name": "按字数过滤或截断",
|
2024-06-15 00:56:16 +08:00
|
|
|
"args": {
|
|
|
|
"minzishu": 1,
|
|
|
|
"maxzishu": 99999,
|
2024-09-30 22:12:54 +08:00
|
|
|
"cut": false,
|
|
|
|
"cut_reverse": false
|
2024-06-15 00:56:16 +08:00
|
|
|
},
|
|
|
|
"argstype": {
|
|
|
|
"minzishu": {
|
|
|
|
"name": "最小字数",
|
|
|
|
"type": "intspin",
|
|
|
|
"min": 0,
|
|
|
|
"max": 99999999,
|
|
|
|
"step": 1
|
|
|
|
},
|
|
|
|
"maxzishu": {
|
|
|
|
"name": "最大字数",
|
|
|
|
"type": "intspin",
|
|
|
|
"min": 0,
|
|
|
|
"max": 99999999,
|
|
|
|
"step": 1
|
|
|
|
},
|
|
|
|
"cut": {
|
2024-09-30 22:12:54 +08:00
|
|
|
"name": "超过最大字数时截断而非过滤",
|
|
|
|
"type": "switch"
|
|
|
|
},
|
|
|
|
"cut_reverse": {
|
|
|
|
"name": "截断时反向截断",
|
2024-06-15 00:56:16 +08:00
|
|
|
"type": "switch"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"lines_threshold": {
|
|
|
|
"use": false,
|
2024-09-30 22:12:54 +08:00
|
|
|
"name": "按行数过滤或截断",
|
2024-06-15 00:56:16 +08:00
|
|
|
"args": {
|
|
|
|
"minzishu": 1,
|
|
|
|
"maxzishu": 99999,
|
2024-09-30 22:12:54 +08:00
|
|
|
"cut": false,
|
|
|
|
"cut_reverse": false
|
2024-06-15 00:56:16 +08:00
|
|
|
},
|
|
|
|
"argstype": {
|
|
|
|
"minzishu": {
|
|
|
|
"name": "最小行数",
|
|
|
|
"type": "intspin",
|
|
|
|
"min": 0,
|
|
|
|
"max": 99999999,
|
|
|
|
"step": 1
|
|
|
|
},
|
|
|
|
"maxzishu": {
|
|
|
|
"name": "最大行数",
|
|
|
|
"type": "intspin",
|
|
|
|
"min": 0,
|
|
|
|
"max": 99999999,
|
|
|
|
"step": 1
|
|
|
|
},
|
|
|
|
"cut": {
|
2024-09-30 22:12:54 +08:00
|
|
|
"name": "超过最大行数时截断而非过滤",
|
|
|
|
"type": "switch"
|
|
|
|
},
|
|
|
|
"cut_reverse": {
|
|
|
|
"name": "截断时反向截断",
|
2024-06-15 00:56:16 +08:00
|
|
|
"type": "switch"
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2024-01-08 23:37:00 +08:00
|
|
|
"_2": {
|
|
|
|
"use": false,
|
2024-01-28 14:09:32 +08:00
|
|
|
"name": "去除重复字符_AAAABBBBCCCC->ABC",
|
2024-01-08 23:37:00 +08:00
|
|
|
"args": {
|
2024-02-20 23:59:51 +08:00
|
|
|
"重复次数(若为1则自动分析去重)": 1,
|
2024-05-01 11:33:04 +08:00
|
|
|
"保持非重复字符": true
|
2024-02-20 23:59:51 +08:00
|
|
|
},
|
2024-05-01 11:33:04 +08:00
|
|
|
"argstype": {
|
2024-02-20 23:59:51 +08:00
|
|
|
"保持非重复字符": {
|
|
|
|
"type": "switch"
|
|
|
|
},
|
|
|
|
"重复次数(若为1则自动分析去重)": {
|
|
|
|
"type": "intspin",
|
|
|
|
"min": 0,
|
|
|
|
"max": 10000,
|
|
|
|
"step": 1
|
|
|
|
}
|
2024-01-08 23:37:00 +08:00
|
|
|
}
|
|
|
|
},
|
2024-06-01 19:38:29 +08:00
|
|
|
"dedump": {
|
2024-06-01 19:44:14 +08:00
|
|
|
"use": false,
|
2024-06-01 19:38:29 +08:00
|
|
|
"name": "过滤历史重复_LRU",
|
|
|
|
"args": {
|
|
|
|
"cachesize": 3
|
|
|
|
},
|
|
|
|
"argstype": {
|
|
|
|
"cachesize": {
|
|
|
|
"type": "intspin",
|
|
|
|
"name": "缓存条数",
|
|
|
|
"min": -1,
|
|
|
|
"max": 10000,
|
|
|
|
"step": 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
2024-01-08 23:37:00 +08:00
|
|
|
"_3": {
|
|
|
|
"use": false,
|
2024-01-28 14:09:32 +08:00
|
|
|
"name": "去除重复行_ABCDABCDABCD->ABCD",
|
2024-01-08 23:37:00 +08:00
|
|
|
"args": {
|
|
|
|
"重复次数(若为1则自动分析去重)": 1
|
2024-02-20 23:59:51 +08:00
|
|
|
},
|
2024-05-01 11:33:04 +08:00
|
|
|
"argstype": {
|
2024-02-20 23:59:51 +08:00
|
|
|
"重复次数(若为1则自动分析去重)": {
|
|
|
|
"type": "intspin",
|
|
|
|
"min": 0,
|
|
|
|
"max": 10000,
|
|
|
|
"step": 1
|
|
|
|
}
|
2024-01-08 23:37:00 +08:00
|
|
|
}
|
2024-05-01 11:33:04 +08:00
|
|
|
},
|
|
|
|
"_3_2": {
|
2024-01-08 23:37:00 +08:00
|
|
|
"use": false,
|
2024-01-28 14:09:32 +08:00
|
|
|
"name": "去除重复行_S1S1S1S2S2S2->S1S2"
|
2024-01-08 23:37:00 +08:00
|
|
|
},
|
|
|
|
"_4": {
|
|
|
|
"use": false,
|
|
|
|
"name": "过滤尖括号<>"
|
|
|
|
},
|
2024-01-28 13:59:47 +08:00
|
|
|
"_6EX": {
|
|
|
|
"use": false,
|
2024-08-14 16:44:43 +08:00
|
|
|
"name": "过滤换行符"
|
2024-01-28 13:59:47 +08:00
|
|
|
},
|
2024-01-08 23:37:00 +08:00
|
|
|
"_91": {
|
|
|
|
"use": false,
|
|
|
|
"name": "过滤数字"
|
2024-05-01 11:33:04 +08:00
|
|
|
},
|
|
|
|
"_92": {
|
2024-01-08 23:37:00 +08:00
|
|
|
"use": false,
|
|
|
|
"name": "过滤英文字母"
|
|
|
|
},
|
|
|
|
"_10": {
|
|
|
|
"use": false,
|
2024-01-28 14:09:32 +08:00
|
|
|
"name": "去除重复行_ABCDBCDCDD->ABCD"
|
2024-01-08 23:37:00 +08:00
|
|
|
},
|
|
|
|
"_13": {
|
|
|
|
"use": false,
|
2024-01-28 14:09:32 +08:00
|
|
|
"name": "去除重复行_AABABCABCD->ABCD"
|
2024-01-08 23:37:00 +08:00
|
|
|
},
|
2024-01-28 13:59:47 +08:00
|
|
|
"_13EX": {
|
|
|
|
"use": false,
|
2024-01-28 14:09:32 +08:00
|
|
|
"name": "去除重复行_AABABCABCDEEFEFG->ABCDEFG"
|
2024-01-28 13:59:47 +08:00
|
|
|
},
|
2024-01-08 23:37:00 +08:00
|
|
|
"_11": {
|
|
|
|
"use": false,
|
|
|
|
"name": "自定义python处理"
|
|
|
|
}
|
|
|
|
}
|