Spaces:
Running
on
Zero
Running
on
Zero
import re | |
kaomoji_regex = re.compile( | |
r"[oヽwΣ┗╰O︿Ψ凸]?[(|≡*(].{0,4}[Д✿_▽→≧﹏`∩⊙∇☆≡๑〃′エ≦▔@﹁εヘ•́ω益‿≖ฺ皿•̀艹 ̄△|゚].{0,5}[|≡*))][┛ブ凸cdd︴oOΨ︿w╯ノ]?" | |
) | |
chinese_regex = re.compile(r"[\u4e00-\u9fa5]") | |
digit_regex = re.compile(r"(\\d+)(\\.\\d+)?", re.UNICODE) | |
chinese_char_regex = re.compile(r"^[\u4e00-\u9fa5]$", re.UNICODE) | |
eng_and_digit_char_regex = re.compile(r"^[0-9.,A-Za-z]+$", re.UNICODE) | |
upper_eng_and_digit_regex = re.compile(r"^[ 0-9A-Z\"'.,:?!\-]+$", re.UNICODE) | |
valid_char_regex = re.compile( | |
r"[\t\r\n ]|" | |
r"[\u4e00-\u9fa5]|" | |
r"\u0080|[\u20a0-\u20bf]|\u00a2|\u00a3|\u00a5|\uffe0|\uffe1|\uffe5|\uffe6|" | |
r"\u3000|\u3002|\u00b7|\u2014|\u2019|\u2026|\uff01|\uff1f|\uff0e|\uff1a|\uff1b|\uff0b|\uff0c|\uff0d|\uff0f|[\ufe10-\ufe16]|[\ufe50-\ufe51]|[\ufe55-\ufe57]|\ufe6a|" | |
r"[\u0030-\u0039]|" | |
r"[\u0391-\u03c9]|" | |
r"[\u00b0-\u00b3]|[\u2015-\u2018]|[\u3000-\u303f]|" | |
r"[\u0022-\u002f\u003a-\u003e\u0040\u005b-\u0060\u007b-\u007e]|" | |
r"[\uff21-\uff3a]|[\uff41-\uff5a]|[\u0041-\u005a]|[\u0061-\u007a]", | |
re.UNICODE, | |
) | |