Spaces:
Sleeping
Sleeping
italoribeiro
commited on
Commit
·
3bc7eb3
1
Parent(s):
5c10777
Add application
Browse files- Dockerfile +11 -0
- app/__init__.py +0 -0
- app/controller/__init__.py +0 -0
- app/controller/classify.py +7 -0
- app/main.py +72 -0
- credentials/firebase-adminsdk.json +13 -0
- model/__init__.py +0 -0
- model/argq.py +39 -0
- model_aemi_cpu.sav +3 -0
- model_aemp_cpu.sav +3 -0
- model_cla_cpu.sav +3 -0
- model_cpu.sav +3 -0
- model_cre_cpu.sav +3 -0
- model_org_cpu.sav +3 -0
- requirements.txt +0 -0
- test/test_main.py +63 -0
Dockerfile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.9
|
2 |
+
|
3 |
+
WORKDIR /code
|
4 |
+
|
5 |
+
COPY ./requirements.txt /code/requirements.txt
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
|
8 |
+
|
9 |
+
COPY . .
|
10 |
+
|
11 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
app/__init__.py
ADDED
File without changes
|
app/controller/__init__.py
ADDED
File without changes
|
app/controller/classify.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from model.argq import ArgqClassifier, get_model
|
2 |
+
from fastapi import Depends
|
3 |
+
|
4 |
+
class ClassifyController:
|
5 |
+
async def get_text_classification(self, text: str, model=Depends(get_model)):
|
6 |
+
print(dir(model))
|
7 |
+
return 0
|
app/main.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from pydantic import BaseModel, Field
|
3 |
+
from fastapi.middleware.cors import CORSMiddleware
|
4 |
+
import logging
|
5 |
+
from model.argq import ArgqClassifier
|
6 |
+
from datetime import datetime
|
7 |
+
import firebase_admin
|
8 |
+
from firebase_admin import credentials, firestore
|
9 |
+
import uvicorn
|
10 |
+
from os import getenv, path
|
11 |
+
|
12 |
+
|
13 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
14 |
+
|
15 |
+
app = FastAPI(title="ArgQ Backend", version="0.0.1")
|
16 |
+
|
17 |
+
app.add_middleware(
|
18 |
+
CORSMiddleware,
|
19 |
+
allow_origins=["*"],
|
20 |
+
allow_credentials=True,
|
21 |
+
allow_methods=["*"],
|
22 |
+
allow_headers=["*"],
|
23 |
+
)
|
24 |
+
|
25 |
+
logging.info("Starting application")
|
26 |
+
cred_file_path = path.join(path.dirname(__file__), "../credentials/firebase-adminsdk.json")
|
27 |
+
cred = credentials.Certificate(cred_file_path)
|
28 |
+
firebase_admin.initialize_app(cred)
|
29 |
+
|
30 |
+
db = firestore.client()
|
31 |
+
|
32 |
+
logging.info("Loading model..")
|
33 |
+
model = ArgqClassifier()
|
34 |
+
logging.info("Model loaded")
|
35 |
+
|
36 |
+
class Tweet(BaseModel):
|
37 |
+
text: str
|
38 |
+
|
39 |
+
class TextWithAspects(BaseModel):
|
40 |
+
tweet: Tweet
|
41 |
+
aspects: list = ["quality", "clarity", "organization", "credibility", "emotional_polarity", "emotional_intensity"]
|
42 |
+
|
43 |
+
class FeedbackItem(BaseModel):
|
44 |
+
text: str
|
45 |
+
timestamp: datetime = Field(default_factory=datetime.utcnow)
|
46 |
+
|
47 |
+
@app.post("/argq/classify")
|
48 |
+
async def get_text_classification(tweet: Tweet):
|
49 |
+
classification = await model.classify_text(tweet.text)
|
50 |
+
return {
|
51 |
+
"classification": classification
|
52 |
+
}
|
53 |
+
|
54 |
+
@app.post("/argq/classify/aspects")
|
55 |
+
async def get_text_classification_by_aspects(request: TextWithAspects):
|
56 |
+
classification = {
|
57 |
+
aspect: await model.classify_text_by_aspect(request.tweet.text, aspect) for aspect in request.aspects
|
58 |
+
}
|
59 |
+
return {
|
60 |
+
"classification": classification
|
61 |
+
}
|
62 |
+
|
63 |
+
@app.post("/argq/feedback")
|
64 |
+
async def post_feedback(item: FeedbackItem):
|
65 |
+
feedback_data = item.dict()
|
66 |
+
feedback_data['timestamp'] = feedback_data['timestamp'].isoformat()
|
67 |
+
doc_ref = db.collection('feedback').document()
|
68 |
+
doc_ref.set(feedback_data)
|
69 |
+
return {"status": "success", "feedback_received": feedback_data}
|
70 |
+
|
71 |
+
if __name__ == "__main__":
|
72 |
+
uvicorn.run(app, host="0.0.0.0", port=int(getenv("PORT", 8000)), reload=True)
|
credentials/firebase-adminsdk.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"type": "service_account",
|
3 |
+
"project_id": "argq-feedback",
|
4 |
+
"private_key_id": "677e4d4261a9a96bd6f3330c6dcc47a0ecfd2061",
|
5 |
+
"private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQCsFvkzZdPae+Qc\n5o4xTCgp+YATrE2005hjcHIKkCdA/QdqPGzw9V84wqO9R2lidUqNtDJU80dItdF9\nCmQY96KPrGhTyvGOXxSJArb+7o6JvAUpDmxAdLKAp20Kef5Zsm1TKmqRW2AJKBbc\nw3sYbTRBaGYfJYBdoIxVoQh7CneJbV0+y9BOUfQVBCLHdYmxDxzqNCngcLUhdh9r\n5yTp20xgTQJHDbBprgsFWMqfwfVwUy18xW1bFeyV8lQNLzb5rd0niNZyDz2jkdiT\ndYsw0d5TpWoe/WlY6lHKun0QwqABIcJCjTkleHTOMNPYeDIUp8YbW7mb19HjwMzB\nTubDxprfAgMBAAECggEARBMub0OXw7UAruIRW7T5qmkVXjLp5l9Rx/xiUIVn0dCG\nT2Mp4UPfcC+m4ChQqu8lF9sxkeNOzpeGEEW9BRdoyhb0ijkcxyobkHtvcndiEWA8\nVlprPjArsLMKcnuBv2/SyXRdBbU0z0p5iMkXd7kzU7B9vVJzbhYljXYPScUBNm5S\nu6AxHUIbMaHi60TWCPpvTw8v3LpBGS3AQjFsxjhhR/usiEZ8ZQaPVVPtqE0l1Ehw\n3UlxAmvKB0hu7jcsEWi+oxtYow8iZlSlkO9wClHcqyiXzleweVQijpt20AwHByB5\n/CQh16mmP48jLPJWU46ZBseaHuNsPQE27nxydcHC4QKBgQDqvwwgPQBPXMWfiQPm\nmCmn7+llKg/G01wgcL4D/9W97KB/zRcaD6lkMbvTzx+o9Lzoq0T4aeW01klUDioc\nwmT70zwHy5S8gIWdlotj1Pj/BCoDhbg2UE1jswOTUZDbST5Gwn6sGREyl0xgQunz\nV+wa4bx2oIxSha0FWnzlCuqYrwKBgQC7q6wCmvWWbH67XCYzpN/igx66cZIta2Qm\ndeA7++m8EbAynGBMb1aEzLpZ55q0MRwbs/vEs3G4wA96FG1YnjCtT6i7lpet8YQh\nGcxF+PkupyqOtcsSVqp/ufHZw7+JsjIiABIL+B2xh3czfLI0r5c3PmUuFQDLbXek\nSlaQfNRM0QKBgFZwKsLkM20FuC9agHk0poIUMVjy+AQ6Z736Rb2rQsVAkIj+t/b8\ntV8TgRopNns8QyNZjXf7Zn4EOdQdsxwL6KthyMUGDaqrmIfCm2kTTux4WBAb+Qzm\n3NhNXo+shS559diQXZx5Tn/WfmUjvonAYkwzuvXjgEgSuPczrrGYJ3I5AoGBALbS\nkosnMkAMCZm0N3LwFzquyWyP3vtoNvRQuNU2n4ibIq7rL9TGUd6lIpUaztbjUKKc\nP5Rry0lTsMAYzj0aPglYJOQ53CGTukgUva8c0ILmTssfxmhjDU3IcxbVXu5hLf15\nXBtU5nhfo3wA6gnxVLp4ilDOHSwPxBHEaXfwY1FBAoGBAJKSmH7XcwGxscoTZ1Ln\nM/iq0K3Vp15tm5dOrtjwD7f7kKV+i8eiOhNxqNaqD6Wor4Jk9iYfbUP+KiKxdNYU\nyeRzKXux5aZTLVwmEN3NalDY6W7O2+97GpxNg8YYyFPPKmtoBkqiSgp4Xg+rJww9\nJwLmjJJuRkIGd50UeDrD4k98\n-----END PRIVATE KEY-----\n",
|
6 |
+
"client_email": "[email protected]",
|
7 |
+
"client_id": "109125477852325451232",
|
8 |
+
"auth_uri": "https://accounts.google.com/o/oauth2/auth",
|
9 |
+
"token_uri": "https://oauth2.googleapis.com/token",
|
10 |
+
"auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
|
11 |
+
"client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-yiy7e%40argq-feedback.iam.gserviceaccount.com",
|
12 |
+
"universe_domain": "googleapis.com"
|
13 |
+
}
|
model/__init__.py
ADDED
File without changes
|
model/argq.py
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import torch
|
3 |
+
from transformers import AutoTokenizer
|
4 |
+
import logging
|
5 |
+
|
6 |
+
class ArgqClassifier:
|
7 |
+
def __init__(self):
|
8 |
+
self.tokenizer = AutoTokenizer.from_pretrained('neuralmind/bert-base-portuguese-cased', do_lower_case=False)
|
9 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
10 |
+
self.device = torch.device(device)
|
11 |
+
logging.info(f"Version: {torch.__version__}")
|
12 |
+
logging.info(f"Device being used: {device}")
|
13 |
+
self.models = {
|
14 |
+
'quality': pickle.load(open('model_cpu.sav', 'rb')),
|
15 |
+
'clarity': pickle.load(open('model_cla_cpu.sav', 'rb')),
|
16 |
+
'organization': pickle.load(open('model_org_cpu.sav', 'rb')),
|
17 |
+
'credibility': pickle.load(open('model_cre_cpu.sav', 'rb')),
|
18 |
+
'emotional_polarity': pickle.load(open('model_aemp_cpu.sav', 'rb')),
|
19 |
+
'emotional_intensity': pickle.load(open('model_aemi_cpu.sav', 'rb'))
|
20 |
+
}
|
21 |
+
self.max_length = 180
|
22 |
+
|
23 |
+
async def classify_text(self, text):
|
24 |
+
inputs = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=self.max_length).to(self.device)
|
25 |
+
model = self.models["quality"]
|
26 |
+
output = model(**inputs)
|
27 |
+
|
28 |
+
pred_labels = torch.argmax(output.logits, 1)
|
29 |
+
y_pred = pred_labels[0]
|
30 |
+
return y_pred.item()
|
31 |
+
|
32 |
+
async def classify_text_by_aspect(self, text, aspect):
|
33 |
+
inputs = self.tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=self.max_length).to(self.device)
|
34 |
+
model = self.models[aspect]
|
35 |
+
output = model(**inputs)
|
36 |
+
|
37 |
+
pred_labels = torch.argmax(output.logits, 1)
|
38 |
+
y_pred = pred_labels[0]
|
39 |
+
return y_pred.item()
|
model_aemi_cpu.sav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9dd8192ac2c157bcb6d4f097403c9623ba0684445fe9eecfcfce0483b79e7325
|
3 |
+
size 435809358
|
model_aemp_cpu.sav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8959d5dc345310146c272300d41c55163d3033ce2b72da403ffd9c01fd82d3e0
|
3 |
+
size 435809358
|
model_cla_cpu.sav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:736da3bd00aa6746deffa44edba23dab150d873e2110f8851bdbacb0e0b1c4db
|
3 |
+
size 435809358
|
model_cpu.sav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:40212e8cc49e8d0f42fa579f1b4d28af3e9f298b51240271ee02bb103cfac8ed
|
3 |
+
size 435809764
|
model_cre_cpu.sav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a4b37a4b17145fe67bf774725e5a6c2f88bfbc1c89f4dfc7b97fdbeb69b5798
|
3 |
+
size 435809358
|
model_org_cpu.sav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:320091e7abd4a91dc34fb4b6c043abf3b8038605d401cdf4639441ab80b457c6
|
3 |
+
size 435809358
|
requirements.txt
ADDED
Binary file (1.95 kB). View file
|
|
test/test_main.py
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi.testclient import TestClient
|
2 |
+
from app.main import app
|
3 |
+
|
4 |
+
client = TestClient(app)
|
5 |
+
|
6 |
+
TEST_TEXTS = [
|
7 |
+
"mano eu não entendo a cabeça da esquerda, vcs são doentes, projetos que vão ajudar a economia do Brasil, até mesmo pra ajudar pagar dividas que o próprio auxilio emergencial vai criar... vcs são doentes???",
|
8 |
+
"O mais difícil de entender é que especialistas dizem que a aprovação não era benéfica e ainda assim eles aprovam! Oq esses deputados entendem dessa questão? Tipo assim, não votem a favor pq é ruim para o povo, aí ligam o fodasse e fazem assim mesmo, que porra é essa?",
|
9 |
+
"Você votou? Provavelmente votou NÃO. Então a pergunta é: você está “tistinho” porque perdeu? Se a autonomia não fosse aprovada você estaria aqui se manifestando contra? Ou estaria exaltando os deputados que entenderam que o BC precisa ter um freio? Totalmente sem noção!",
|
10 |
+
"Rodrigo Maia, você hoje já falou que se arrepende do apoio a Bolsonaro no segundo turno. Parabéns por admitir isto. Agora... quando virá o arrependimento de não ter ao menos colocado para a frente algum dos pedidos de Impeachment?",
|
11 |
+
"Vc propôs essa emenda, esperando que passe ou apenas para constar? Com a postagem do seu presidente da câmara, que até já considerou que o Dep. Daniel Silveira contrapôs à democracia, mesmo não tendo sido julgado e condenado pelo STF, espera que essa sua proposta tenha sucesso? https://t.co/uJjvgcwqEt",
|
12 |
+
"Desculpe senhora deputada, cansei de vcs ! Ninguém faz nada, ninguém! Vcs brincam com o povo! Se hoje um governador maluco fizer um forno, como foi feito na Alemanha e começar a matar as pessoas,tudo bem , os caras que jamais devem ser citados, deram o direito !",
|
13 |
+
"Caro Deputado, não sei se irá ler meu posicionamento. Mas, calaram a voz de uma Deputado q foi eleito para PODER FALAR POR NÓS! Um PODER, calou a não a voz do Daniel, calou foi a NOSSA! Ontem foi deputado pondo mordaça da boca de outro deputado e traçando o fim do CONGRESSO.",
|
14 |
+
"Está na hora de exigir o respeito com seriedade, impeachment se faz mais que necessário, ele está tentando rebaixar a Câmara dos Deputados a seu serviço, uma ação judicial enérgica imediata. Ação do Arthur Lira agora, se deixar passar perderá a força",
|
15 |
+
]
|
16 |
+
|
17 |
+
TEST_TEXTS_EXPECTED_RESULTS = [0, 1, 0, 2, 2, 2, 2, 2]
|
18 |
+
|
19 |
+
TEST_TEXTS_EXPECTED_RESULTS_FOR_CLARITY = [2, 1, 2, 1, 2, 2, 2, 2]
|
20 |
+
|
21 |
+
class TestMain:
|
22 |
+
def test_get_text_classification(self):
|
23 |
+
for i, text in enumerate(TEST_TEXTS):
|
24 |
+
tweet = {"text": text}
|
25 |
+
response = client.post("/argq/classify", json=tweet)
|
26 |
+
assert response.status_code == 200
|
27 |
+
assert response.json() == {"classification": TEST_TEXTS_EXPECTED_RESULTS[i]}
|
28 |
+
|
29 |
+
def test_get_text_clarity_classification(self):
|
30 |
+
for i, text in enumerate(TEST_TEXTS):
|
31 |
+
request = {
|
32 |
+
"tweet":{
|
33 |
+
"text": text
|
34 |
+
},
|
35 |
+
"aspects": [
|
36 |
+
"clarity"
|
37 |
+
]
|
38 |
+
}
|
39 |
+
output = {"classification": {"clarity": TEST_TEXTS_EXPECTED_RESULTS_FOR_CLARITY[i]}}
|
40 |
+
response = client.post("/argq/classify/aspects", json=request)
|
41 |
+
assert response.status_code == 200
|
42 |
+
assert response.json() == output
|
43 |
+
|
44 |
+
def test_get_text_all_aspects_classification(self):
|
45 |
+
text = TEST_TEXTS[0]
|
46 |
+
request = {
|
47 |
+
"tweet":{
|
48 |
+
"text": text
|
49 |
+
}
|
50 |
+
}
|
51 |
+
output = {
|
52 |
+
"classification": {
|
53 |
+
"quality": 0,
|
54 |
+
"clarity": 2,
|
55 |
+
"organization": 1,
|
56 |
+
"credibility": 0,
|
57 |
+
"emotional_polarity": 0,
|
58 |
+
"emotional_intensity": 1
|
59 |
+
}
|
60 |
+
}
|
61 |
+
response = client.post("/argq/classify/aspects", json=request)
|
62 |
+
assert response.status_code == 200
|
63 |
+
assert response.json() == output
|