hhguo's picture
update
37ced70
import re
kaomoji_regex = re.compile(
r"[oヽwΣ┗╰O︿Ψ凸]?[(|≡*(].{0,4}[Д✿_▽→≧﹏`∩⊙∇☆≡๑〃′エ≦▔@﹁εヘ•́ω益‿≖ฺ皿•̀艹 ̄△|゚].{0,5}[|≡*))][┛ブ凸cdd︴oOΨ︿w╯ノ]?"
)
chinese_regex = re.compile(r"[\u4e00-\u9fa5]")
digit_regex = re.compile(r"(\\d+)(\\.\\d+)?", re.UNICODE)
chinese_char_regex = re.compile(r"^[\u4e00-\u9fa5]$", re.UNICODE)
eng_and_digit_char_regex = re.compile(r"^[0-9.,A-Za-z]+$", re.UNICODE)
upper_eng_and_digit_regex = re.compile(r"^[ 0-9A-Z\"'.,:?!\-]+$", re.UNICODE)
valid_char_regex = re.compile(
r"[\t\r\n ]|"
r"[\u4e00-\u9fa5]|"
r"\u0080|[\u20a0-\u20bf]|\u00a2|\u00a3|\u00a5|\uffe0|\uffe1|\uffe5|\uffe6|"
r"\u3000|\u3002|\u00b7|\u2014|\u2019|\u2026|\uff01|\uff1f|\uff0e|\uff1a|\uff1b|\uff0b|\uff0c|\uff0d|\uff0f|[\ufe10-\ufe16]|[\ufe50-\ufe51]|[\ufe55-\ufe57]|\ufe6a|"
r"[\u0030-\u0039]|"
r"[\u0391-\u03c9]|"
r"[\u00b0-\u00b3]|[\u2015-\u2018]|[\u3000-\u303f]|"
r"[\u0022-\u002f\u003a-\u003e\u0040\u005b-\u0060\u007b-\u007e]|"
r"[\uff21-\uff3a]|[\uff41-\uff5a]|[\u0041-\u005a]|[\u0061-\u007a]",
re.UNICODE,
)