import gradio as gr import re import emoji import logging from typing import Tuple, Optional from functools import lru_cache from collections import Counter logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def count_emojis(text: str) -> int: """Подсчет количества эмодзи в тексте""" return len([c for c in text if c in emoji.EMOJI_DATA]) def extract_mentions(text: str) -> list: """Извлечение упоминаний пользователей""" return re.findall(r'@(\w+)', text) def is_spam(text: str) -> bool: """Определение спам-комментариев""" spam_indicators = ['🔥' * 3, '❤️' * 3, 'follow me', 'check my'] return any(indicator in text.lower() for indicator in spam_indicators) def extract_comment_data(comment_text: str) -> Tuple[Optional[str], Optional[str], int, int]: """Извлекает данные из комментария""" try: # Extract username username_match = re.search(r'Фото профиля\s+(.+?)\n', comment_text) username = username_match.group(1).strip() if username_match else None if not username: return None, None, 0, 0 # Extract comment text comment_lines = comment_text.split('\n') comment = "" time_pattern = r'\d+\s*(?:ч\.|нед\.)' # Identify where the comment text starts for i, line in enumerate(comment_lines): if re.search(time_pattern, line): if i + 1 < len(comment_lines): comment = comment_lines[i + 1].strip() break # Clean up comment text comment = re.sub(r'\d+\s*(?:ч\.|нед\.)\s*$', '', comment) comment = re.sub(r'"Нравится":\s*\d+\s*Ответить\s*$', '', comment) # Extract likes likes_match = re.search(r'"Нравится":\s*(\d+)', comment_text) likes = int(likes_match.group(1)) if likes_match else 0 # Extract time time_match = re.search(r'(\d+)\s*(?:ч\.|нед\.)', comment_text) time = int(time_match.group(1)) if time_match else 0 return username, comment.strip(), likes, time except Exception as e: logger.error(f"Error extracting data: {e}") return None, None, 0, 0 @lru_cache(maxsize=100) def analyze_post(content_type: str, link: str, post_likes: int, post_date: str, description: str, comment_count: int, all_comments: str) -> Tuple[str, str, str, str, str]: """Анализирует пост и комментарии""" try: if not all_comments or 'Фото профиля' not in all_comments: return "Ошибка: неверный формат данных", "", "", "", "0" blocks = re.split(r'(?=Фото профиля)', all_comments) blocks = [b.strip() for b in blocks if b.strip()] comments_data = [] total_emojis = 0 mentions = [] spam_count = 0 for block in blocks: username, comment, likes, time = extract_comment_data(block) if username and comment: emoji_count = count_emojis(comment) comment_mentions = extract_mentions(comment) is_spam_comment = is_spam(comment) comments_data.append({ 'username': username, 'comment': comment, 'likes': likes, 'time': time, 'emoji_count': emoji_count, 'mentions': comment_mentions, 'is_spam': is_spam_comment }) total_emojis += emoji_count mentions.extend(comment_mentions) if is_spam_comment: spam_count += 1 # Подсчет статистики total_comments = len(comments_data) unique_users = len(set(item['username'] for item in comments_data)) total_likes = sum(item['likes'] for item in comments_data) avg_likes = total_likes / total_comments if total_comments > 0 else 0 # Топ комментаторы commenter_counts = Counter(item['username'] for item in comments_data) top_commenters = commenter_counts.most_common(5) analytics = f""" 📊 Подробный анализ комментариев: Основные метрики: • Всего комментариев: {total_comments} • Уникальных пользователей: {unique_users} • Общее количество лайков: {total_likes} • Среднее количество лайков: {avg_likes:.1f} Дополнительная информация: • Использовано эмодзи: {total_emojis} • Количество упоминаний: {len(mentions)} • Выявлено спам-комментариев: {spam_count} Топ комментаторы: {chr(10).join(f'• {user}: {count} комментария' for user, count in top_commenters if count > 1)} """ return ( analytics, "\n".join(item['username'] for item in comments_data), "\n".join(item['comment'] for item in comments_data), "\n".join(str(item['likes']) for item in comments_data), str(total_likes) ) except Exception as e: logger.error(f"Analysis error: {e}") return str(e), "", "", "", "0" # Создаем интерфейс Gradio iface = gr.Interface( fn=analyze_post, inputs=[ gr.Radio( choices=["Photo", "Video"], label="Content Type", value="Photo" ), gr.Textbox( label="Link to Post", placeholder="Вставьте ссылку на пост" ), gr.Number( label="Likes", value=0, minimum=0 ), gr.Textbox( label="Post Date", placeholder="YYYY-MM-DD" ), gr.Textbox( label="Description", lines=3, placeholder="Описание поста" ), gr.Number( label="Comment Count", value=0, minimum=0 ), gr.Textbox( label="Comments", lines=10, placeholder="Вставьте комментарии" ) ], outputs=[ gr.Textbox(label="Analytics Summary", lines=15), gr.Textbox(label="Usernames"), gr.Textbox(label="Comments"), gr.Textbox(label="Likes Chronology"), gr.Textbox(label="Total Likes on Comments") ], title="Enhanced Instagram Comment Analyzer", description="Анализатор комментариев Instagram с расширенной аналитикой", theme="default" ) if __name__ == "__main__": try: iface.launch( share=True, # Создает публичную ссылку debug=True, # Включает режим отладки show_error=True # Показывает подробности ошибок ) except Exception as e: logger.error(f"Error launching interface: {e}", exc_info=True) import re import emoji import gradio as gr from collections import defaultdict, Counter def extract_comment_data(comment_text: str) -> dict: """Extracts data from a comment string.""" comment_data = {} # Username extraction (improved robustness) match = re.search(r"Фото профиля\s*(.+?)\n", comment_text) comment_data["username"] = match.group(1).strip() if match else None if not comment_data["username"]: return None # Skip if no username found # Comment text extraction (handling multiple lines & various time formats) lines = comment_text.splitlines() comment_text = "" for i, line in enumerate(lines): if re.search(r"\d+\s*(?:нед\.|ч\.)", line): #Matches days or hours comment_text = "\n".join(lines[i+1:]).strip() break comment_text += line + "\n" comment_text = comment_text.strip() comment_data["comment"] = comment_text # Likes extraction (more flexible regex) match = re.search(r'"Нравится":\s*(\d+)', comment_text) comment_data["likes"] = int(match.group(1)) if match else 0 # Time extraction (more robust to variations) time_match = re.search(r"(\d+)\s*(?:нед\.|ч\.)", comment_text) comment_data["time"] = int(time_match.group(1)) if time_match else None return comment_data def analyze_comments(comments_text: str) -> dict: """Analyzes a block of comments text.""" comments = [] blocks = re.split(r'(Фото профиля)', comments_text, flags=re.IGNORECASE) for i in range(1,len(blocks),2): comment_data = extract_comment_data(blocks[i]) if comment_data: comments.append(comment_data) # Aggregate data analytics = defaultdict(int) unique_users = set() top_commenters = Counter() for comment in comments: analytics["total_comments"] += 1 unique_users.add(comment["username"]) analytics["total_likes"] += comment["likes"] top_commenters[comment["username"]] += 1 analytics["emojis"] += len(emoji.demojize(comment["comment"])) # Counts emojis analytics["unique_users"] = len(unique_users) analytics["avg_likes"] = analytics["total_likes"] / analytics["total_comments"] if analytics["total_comments"] > 0 else 0 analytics["top_commenters"] = dict(top_commenters.most_common(5)) return analytics, comments iface = gr.Interface( fn=analyze_comments, inputs=gr.Textbox(label="Instagram Comments (Paste here)", lines=10), outputs=[ gr.Textbox(label="Analytics Summary"), gr.JSON(label="Individual Comment Data") ], title="Enhanced Instagram Comment Analyzer", description="Improved analyzer for Instagram comments.", ) iface.launch(share=True)