from flask import Flask, render_template, request, jsonify import torch import transformers from transformers import AutoTokenizer, AutoModel , AutoModelForCausalLM from transformers import AutoModelForSeq2SeqLM import pickle import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt import nltk from nltk.tokenize import word_tokenize import re import string from nltk.corpus import stopwords from tashaphyne.stemming import ArabicLightStemmer import pyarabic.araby as araby from sklearn.feature_extraction.text import TfidfVectorizer from flask import Flask, render_template, request, redirect, url_for import os nltk.download('punkt') app = Flask(__name__) with open('tfidf_vectorizer.pkl', 'rb') as f: vectorizer = pickle.load(f) with open('svm_model.pkl', 'rb') as f: model_classify = pickle.load(f) model = AutoModelForSeq2SeqLM.from_pretrained("bushra1dajam/AraBART") tokenizer = AutoTokenizer.from_pretrained('bushra1dajam/AraBART') device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) def summarize_text(text): inputs = tokenizer("summarize: " + text, return_tensors="pt", max_length=512, truncation=True) inputs = {k: v.to(device) for k, v in inputs.items()} summary_ids = model.generate( inputs["input_ids"], max_length=512, num_beams=8, #no_repeat_ngram_size=4, # Prevents larger n-gram repetitions early_stopping=True) summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary def remove_numbers(text): cleaned_text = re.sub(r'\d+', '', text) return cleaned_text def Removing_non_arabic(text): text =re.sub(r'[^0-9\u0600-\u06ff\u0750-\u077f\ufb50-\ufbc1\ufbd3-\ufd3f\ufd50-\ufd8f\ufd50-\ufd8f\ufe70-\ufefc\uFDF0-\uFDFD.0-9٠-٩]+', ' ',text) return text nltk.download('stopwords') ara_punctuations = '''`÷×؛<>_()*&^%][ـ،/:"؟.,'{}~¦+|!”…“–ـ''' + string.punctuation stop_words = stopwords.words() def remove_punctuations(text): translator = str.maketrans('', '', ara_punctuations) text = text.translate(translator) return text def remove_tashkeel(text): text = text.strip() text = re.sub("[إأٱآا]", "ا", text) text = re.sub("ى", "ي", text) text = re.sub("ؤ", "ء", text) text = re.sub("ئ", "ء", text) text = re.sub("ة", "ه", text) noise = re.compile(""" ّ | # Tashdid َ | # Fatha ً | # Tanwin Fath ُ | # Damma ٌ | # Tanwin Damm ِ | # Kasra ٍ | # Tanwin Kasr ْ | # Sukun ـ # Tatwil/Kashida """, re.VERBOSE) text = re.sub(noise, '', text) text = re.sub(r'(.)\1+', r"\1\1", text) return araby.strip_tashkeel(text) arabic_stopwords = stopwords.words("arabic") def remove_stop_words(text): Text=[i for i in str(text).split() if i not in arabic_stopwords] return " ".join(Text) def tokenize_text(text): tokens = word_tokenize(text) return tokens def Arabic_Light_Stemmer(text): Arabic_Stemmer = ArabicLightStemmer() text=[Arabic_Stemmer.light_stem(y) for y in text] return " " .join(text) def preprocess_text(text): text = remove_numbers(text) text = Removing_non_arabic(text) text = remove_punctuations(text) text = remove_stop_words(text) text = remove_tashkeel(text) text = tokenize_text(text) text = Arabic_Light_Stemmer(text) return text class_mapping = { 0: "جنائية", 1: "احوال شخصية", 2: "عامة" } @app.route('/') def index(): return render_template('index.html') @app.route('/result', methods=['GET', 'POST']) def result(): if request.method == 'POST': input_text = request.form['text'] if input_text: prepro = preprocess_text(input_text) features = vectorizer.transform([prepro]) prediction = model_classify.predict(features) classifiy = prediction[0] classifiy_class = class_mapping.get(classifiy, "لم يتم التعرف") summarized_text = summarize_text(input_text) return render_template('result.html', classification=classifiy_class, summary=summarized_text, input_text=input_text) return render_template('result.html') @app.route('/profile') def profile(): return render_template('profile.html') @app.route('/login', methods=['GET', 'POST']) def login(): return render_template('login.html') @app.route('/create_account', methods=['GET', 'POST']) def create_account(): return render_template('create_account.html') if __name__ == '__main__': app.run(debug=True)