File size: 1,152 Bytes
37ced70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import re

kaomoji_regex = re.compile(
    r"[oヽwΣ┗╰O︿Ψ凸]?[(|≡*(].{0,4}[Д✿_▽→≧﹏`∩⊙∇☆≡๑〃′エ≦▔@﹁εヘ•́ω益‿≖ฺ皿•̀艹 ̄△|゚].{0,5}[|≡*))][┛ブ凸cdd︴oOΨ︿w╯ノ]?"
)
chinese_regex = re.compile(r"[\u4e00-\u9fa5]")
digit_regex = re.compile(r"(\\d+)(\\.\\d+)?", re.UNICODE)

chinese_char_regex = re.compile(r"^[\u4e00-\u9fa5]$", re.UNICODE)
eng_and_digit_char_regex = re.compile(r"^[0-9.,A-Za-z]+$", re.UNICODE)
upper_eng_and_digit_regex = re.compile(r"^[ 0-9A-Z\"'.,:?!\-]+$", re.UNICODE)
valid_char_regex = re.compile(
    r"[\t\r\n ]|"
    r"[\u4e00-\u9fa5]|"
    r"\u0080|[\u20a0-\u20bf]|\u00a2|\u00a3|\u00a5|\uffe0|\uffe1|\uffe5|\uffe6|"
    r"\u3000|\u3002|\u00b7|\u2014|\u2019|\u2026|\uff01|\uff1f|\uff0e|\uff1a|\uff1b|\uff0b|\uff0c|\uff0d|\uff0f|[\ufe10-\ufe16]|[\ufe50-\ufe51]|[\ufe55-\ufe57]|\ufe6a|"
    r"[\u0030-\u0039]|"
    r"[\u0391-\u03c9]|"
    r"[\u00b0-\u00b3]|[\u2015-\u2018]|[\u3000-\u303f]|"
    r"[\u0022-\u002f\u003a-\u003e\u0040\u005b-\u0060\u007b-\u007e]|"
    r"[\uff21-\uff3a]|[\uff41-\uff5a]|[\u0041-\u005a]|[\u0061-\u007a]",
    re.UNICODE,
)