import re from aiogram import Router, F, Bot from aiogram.filters import Command from aiogram.types import Message, InlineKeyboardMarkup, InlineKeyboardButton, CallbackQuery from aiogram.fsm.context import FSMContext from aiogram.fsm.state import State, StatesGroup from database.db import async_session_maker from database.models import WordSource from services.user_service import UserService from services.vocabulary_service import VocabularyService from services.ai_service import ai_service from utils.i18n import t, get_user_lang, get_user_translation_lang from utils.levels import get_user_level_for_language router = Router() # Поддерживаемые расширения файлов SUPPORTED_EXTENSIONS = {'.txt', '.md'} # Разделители между словом и переводом WORD_SEPARATORS = re.compile(r'\s*[-–—:=\t]\s*') class ImportStates(StatesGroup): """Состояния для импорта слов из текста""" waiting_for_text = State() viewing_words = State() @router.message(Command("import")) async def cmd_import(message: Message, state: FSMContext): """Обработчик команды /import""" async with async_session_maker() as session: user = await UserService.get_user_by_telegram_id(session, message.from_user.id) if not user: await message.answer(t('ru', 'common.start_first')) return await state.set_state(ImportStates.waiting_for_text) lang = user.language_interface or 'ru' await message.answer( t(lang, 'import.title') + "\n\n" + t(lang, 'import.desc') + "\n\n" + t(lang, 'import.can_send') + "\n\n" + t(lang, 'import.cancel_hint') ) @router.message(Command("cancel"), ImportStates.waiting_for_text) async def cancel_import(message: Message, state: FSMContext): """Отмена импорта""" await state.clear() async with async_session_maker() as session: user = await UserService.get_user_by_telegram_id(session, message.from_user.id) lang = get_user_lang(user) await message.answer(t(lang, 'import_extra.cancelled')) @router.message(ImportStates.waiting_for_text) async def process_text(message: Message, state: FSMContext): """Обработка текста от пользователя""" text = message.text.strip() async with async_session_maker() as session: user = await UserService.get_user_by_telegram_id(session, message.from_user.id) lang = get_user_lang(user) if len(text) < 50: await message.answer(t(lang, 'import.too_short')) return if len(text) > 3000: await message.answer(t(lang, 'import.too_long')) return async with async_session_maker() as session: user = await UserService.get_user_by_telegram_id(session, message.from_user.id) # Показываем индикатор обработки processing_msg = await message.answer(t(user.language_interface or 'ru', 'import.processing')) # Извлекаем слова через AI current_level = get_user_level_for_language(user) words = await ai_service.extract_words_from_text( text=text, level=current_level, max_words=15, learning_lang=user.learning_language, translation_lang=get_user_translation_lang(user), ) await processing_msg.delete() if not words: await message.answer(t(user.language_interface or 'ru', 'import.failed')) await state.clear() return # Сохраняем данные в состоянии await state.update_data( words=words, user_id=user.id, original_text=text, level=current_level ) await state.set_state(ImportStates.viewing_words) # Показываем извлечённые слова await show_extracted_words(message, words) async def show_extracted_words(message: Message, words: list): """Показать извлечённые слова с кнопками для добавления""" async with async_session_maker() as session: user = await UserService.get_user_by_telegram_id(session, message.from_user.id) lang = (user.language_interface if user else 'ru') or 'ru' text = t(lang, 'import.found_header', n=len(words)) + "\n\n" for idx, word_data in enumerate(words, 1): text += ( f"{idx}. {word_data['word']} " f"[{word_data.get('transcription', '')}]\n" f" {word_data['translation']}\n" ) if word_data.get('context'): # Укорачиваем контекст, если он слишком длинный context = word_data['context'] if len(context) > 80: context = context[:77] + "..." text += f" «{context}»\n" text += "\n" text += t(lang, 'words.choose') # Создаем кнопки для каждого слова (по 2 в ряд) keyboard = [] for idx, word_data in enumerate(words): button = InlineKeyboardButton( text=f"➕ {word_data['word']}", callback_data=f"import_word_{idx}" ) # Добавляем по 2 кнопки в ряд if len(keyboard) == 0 or len(keyboard[-1]) == 2: keyboard.append([button]) else: keyboard[-1].append(button) # Кнопка "Добавить все" keyboard.append([ InlineKeyboardButton(text=t(lang, 'words.add_all_btn'), callback_data="import_all_words") ]) # Кнопка "Закрыть" keyboard.append([ InlineKeyboardButton(text=t(lang, 'words.close_btn'), callback_data="close_import") ]) reply_markup = InlineKeyboardMarkup(inline_keyboard=keyboard) await message.answer(text, reply_markup=reply_markup) @router.callback_query(F.data.startswith("import_word_"), ImportStates.viewing_words) async def import_single_word(callback: CallbackQuery, state: FSMContext): """Добавить одно слово из импорта""" # Отвечаем сразу, операция может занять время await callback.answer() word_index = int(callback.data.split("_")[2]) data = await state.get_data() words = data.get('words', []) user_id = data.get('user_id') if word_index >= len(words): await callback.answer(t('ru', 'words.err_not_found')) return word_data = words[word_index] async with async_session_maker() as session: user = await UserService.get_user_by_telegram_id(session, callback.from_user.id) lang = get_user_lang(user) # Проверяем, нет ли уже такого слова existing = await VocabularyService.get_word_by_original( session, user_id, word_data['word'], source_lang=user.learning_language ) if existing: await callback.answer(t(lang, 'words.already_exists', word=word_data['word']), show_alert=True) return # Добавляем слово translation_lang = get_user_translation_lang(user) await VocabularyService.add_word( session=session, user_id=user_id, word_original=word_data['word'], word_translation=word_data['translation'], source_lang=user.learning_language if user else None, translation_lang=translation_lang, transcription=word_data.get('transcription'), difficulty_level=data.get('level'), source=WordSource.CONTEXT ) lang = (user.language_interface if user else 'ru') or 'ru' await callback.message.answer(t(lang, 'import.added_single', word=word_data['word'])) @router.callback_query(F.data == "import_all_words", ImportStates.viewing_words) async def import_all_words(callback: CallbackQuery, state: FSMContext): """Добавить все слова из импорта""" # Сразу отвечаем, так как операция может занять заметное время await callback.answer() data = await state.get_data() words = data.get('words', []) user_id = data.get('user_id') added_count = 0 skipped_count = 0 async with async_session_maker() as session: user = await UserService.get_user_by_telegram_id(session, callback.from_user.id) for word_data in words: # Проверяем, нет ли уже такого слова existing = await VocabularyService.get_word_by_original( session, user_id, word_data['word'], source_lang=user.learning_language ) if existing: skipped_count += 1 continue # Добавляем слово translation_lang = get_user_translation_lang(user) await VocabularyService.add_word( session=session, user_id=user_id, word_original=word_data['word'], word_translation=word_data['translation'], source_lang=user.learning_language if user else None, translation_lang=translation_lang, transcription=word_data.get('transcription'), difficulty_level=data.get('level'), source=WordSource.CONTEXT ) added_count += 1 lang = (user.language_interface if user else 'ru') or 'ru' result_text = t(lang, 'import.added_count', n=added_count) if skipped_count > 0: result_text += "\n" + t(lang, 'import.skipped_count', n=skipped_count) await callback.message.edit_reply_markup(reply_markup=None) await callback.message.answer(result_text) await state.clear() @router.callback_query(F.data == "close_import", ImportStates.viewing_words) async def close_import(callback: CallbackQuery, state: FSMContext): """Закрыть импорт""" await callback.message.delete() await state.clear() await callback.answer() def parse_word_line(line: str) -> dict | None: """ Парсит строку формата 'слово - перевод' или 'слово : перевод' Или просто 'слово' (без перевода) Возвращает dict с word и translation (может быть None) или None если пустая строка """ line = line.strip() if not line or line.startswith('#'): # Пропускаем пустые и комментарии return None # Пробуем разделить по разделителям parts = WORD_SEPARATORS.split(line, maxsplit=1) if len(parts) == 2: word = parts[0].strip() translation = parts[1].strip() if word and translation: return {'word': word, 'translation': translation} # Если разделителя нет — это просто слово без перевода word = line.strip() if word: return {'word': word, 'translation': None} return None def parse_file_content(content: str) -> tuple[list[dict], bool]: """ Парсит содержимое файла и возвращает список слов Возвращает (words, needs_translation) — нужен ли перевод через AI """ words = [] seen = set() # Для дедупликации needs_translation = False for line in content.split('\n'): parsed = parse_word_line(line) if parsed and parsed['word'].lower() not in seen: words.append(parsed) seen.add(parsed['word'].lower()) if parsed['translation'] is None: needs_translation = True return words, needs_translation @router.message(F.document) async def handle_file_import(message: Message, state: FSMContext, bot: Bot): """Обработка файлов для импорта слов""" document = message.document # Проверяем расширение файла file_name = document.file_name or '' file_ext = '' if '.' in file_name: file_ext = '.' + file_name.rsplit('.', 1)[-1].lower() async with async_session_maker() as session: user = await UserService.get_user_by_telegram_id(session, message.from_user.id) if not user: await message.answer(t('ru', 'common.start_first')) return lang = get_user_lang(user) if file_ext not in SUPPORTED_EXTENSIONS: await message.answer(t(lang, 'import_file.unsupported_format')) return # Проверяем размер файла (макс 1MB) if document.file_size > 1024 * 1024: await message.answer(t(lang, 'import_file.too_large')) return # Скачиваем файл try: file = await bot.get_file(document.file_id) file_content = await bot.download_file(file.file_path) content = file_content.read().decode('utf-8') except UnicodeDecodeError: await message.answer(t(lang, 'import_file.encoding_error')) return except Exception: await message.answer(t(lang, 'import_file.download_error')) return # Парсим содержимое words, needs_translation = parse_file_content(content) if not words: await message.answer(t(lang, 'import_file.no_words_found')) return # Ограничиваем количество слов max_words = 50 if needs_translation else 100 if len(words) > max_words: words = words[:max_words] await message.answer(t(lang, 'import_file.truncated', n=max_words)) # Если нужен перевод — отправляем в AI if needs_translation: processing_msg = await message.answer(t(lang, 'import_file.translating')) # Получаем переводы от AI words_to_translate = [w['word'] for w in words] translations = await ai_service.translate_words_batch( words=words_to_translate, source_lang=user.learning_language, translation_lang=get_user_translation_lang(user) ) await processing_msg.delete() # Обновляем слова переводами if isinstance(translations, list): for i, word_data in enumerate(words): if i < len(translations): tr = translations[i] word_data['translation'] = tr.get('translation', '') word_data['transcription'] = tr.get('transcription', '') if tr.get('reading'): # Фуригана для японского word_data['reading'] = tr.get('reading') else: # Если AI вернул не список — пробуем сопоставить по слову for word_data in words: word_data['translation'] = '' word_data['transcription'] = '' # Сохраняем данные в состоянии и показываем слова await state.update_data( words=words, user_id=user.id, level=get_user_level_for_language(user) ) await state.set_state(ImportStates.viewing_words) await show_file_words(message, words, lang) async def show_file_words(message: Message, words: list, lang: str): """Показать слова из файла с кнопками для добавления""" # Показываем первые 20 слов в сообщении display_words = words[:20] text = t(lang, 'import_file.found_header', n=len(words)) + "\n\n" for idx, word_data in enumerate(display_words, 1): word = word_data['word'] translation = word_data.get('translation', '') transcription = word_data.get('transcription', '') line = f"{idx}. {word}" if transcription: line += f" [{transcription}]" if translation: line += f" — {translation}" text += line + "\n" if len(words) > 20: text += f"\n...и ещё {len(words) - 20} слов\n" text += "\n" + t(lang, 'import_file.choose_action') # Кнопки действий keyboard = InlineKeyboardMarkup(inline_keyboard=[ [InlineKeyboardButton( text=t(lang, 'import_file.add_all_btn', n=len(words)), callback_data="import_file_all" )], [InlineKeyboardButton( text=t(lang, 'words.close_btn'), callback_data="close_import" )] ]) await message.answer(text, reply_markup=keyboard) @router.callback_query(F.data == "import_file_all", ImportStates.viewing_words) async def import_file_all_words(callback: CallbackQuery, state: FSMContext): """Добавить все слова из файла""" await callback.answer() data = await state.get_data() words = data.get('words', []) user_id = data.get('user_id') added_count = 0 skipped_count = 0 async with async_session_maker() as session: user = await UserService.get_user_by_telegram_id(session, callback.from_user.id) for word_data in words: # Проверяем, нет ли уже такого слова existing = await VocabularyService.get_word_by_original( session, user_id, word_data['word'], source_lang=user.learning_language ) if existing: skipped_count += 1 continue # Добавляем слово await VocabularyService.add_word( session=session, user_id=user_id, word_original=word_data['word'], word_translation=word_data.get('translation', ''), source_lang=user.learning_language if user else None, translation_lang=get_user_translation_lang(user), transcription=word_data.get('transcription'), source=WordSource.IMPORT ) added_count += 1 lang = get_user_lang(user) result_text = t(lang, 'import.added_count', n=added_count) if skipped_count > 0: result_text += "\n" + t(lang, 'import.skipped_count', n=skipped_count) await callback.message.edit_reply_markup(reply_markup=None) await callback.message.answer(result_text) await state.clear()