import re
from aiogram import Router, F, Bot
from aiogram.filters import Command
from aiogram.types import Message, InlineKeyboardMarkup, InlineKeyboardButton, CallbackQuery
from aiogram.fsm.context import FSMContext
from aiogram.fsm.state import State, StatesGroup
from database.db import async_session_maker
from database.models import WordSource
from services.user_service import UserService
from services.vocabulary_service import VocabularyService
from services.ai_service import ai_service
from utils.i18n import t, get_user_lang, get_user_translation_lang
from utils.levels import get_user_level_for_language
router = Router()
# Поддерживаемые расширения файлов
SUPPORTED_EXTENSIONS = {'.txt', '.md'}
# Разделители между словом и переводом
WORD_SEPARATORS = re.compile(r'\s*[-–—:=\t]\s*')
class ImportStates(StatesGroup):
"""Состояния для импорта слов из текста"""
waiting_for_text = State()
viewing_words = State()
@router.message(Command("import"))
async def cmd_import(message: Message, state: FSMContext):
"""Обработчик команды /import"""
async with async_session_maker() as session:
user = await UserService.get_user_by_telegram_id(session, message.from_user.id)
if not user:
await message.answer(t('ru', 'common.start_first'))
return
await state.set_state(ImportStates.waiting_for_text)
lang = user.language_interface or 'ru'
await message.answer(
t(lang, 'import.title') + "\n\n" +
t(lang, 'import.desc') + "\n\n" +
t(lang, 'import.can_send') + "\n\n" +
t(lang, 'import.cancel_hint')
)
@router.message(Command("cancel"), ImportStates.waiting_for_text)
async def cancel_import(message: Message, state: FSMContext):
"""Отмена импорта"""
await state.clear()
async with async_session_maker() as session:
user = await UserService.get_user_by_telegram_id(session, message.from_user.id)
lang = get_user_lang(user)
await message.answer(t(lang, 'import_extra.cancelled'))
@router.message(ImportStates.waiting_for_text)
async def process_text(message: Message, state: FSMContext):
"""Обработка текста от пользователя"""
text = message.text.strip()
async with async_session_maker() as session:
user = await UserService.get_user_by_telegram_id(session, message.from_user.id)
lang = get_user_lang(user)
if len(text) < 50:
await message.answer(t(lang, 'import.too_short'))
return
if len(text) > 3000:
await message.answer(t(lang, 'import.too_long'))
return
async with async_session_maker() as session:
user = await UserService.get_user_by_telegram_id(session, message.from_user.id)
# Показываем индикатор обработки
processing_msg = await message.answer(t(user.language_interface or 'ru', 'import.processing'))
# Извлекаем слова через AI
current_level = get_user_level_for_language(user)
words = await ai_service.extract_words_from_text(
text=text,
level=current_level,
max_words=15,
learning_lang=user.learning_language,
translation_lang=get_user_translation_lang(user),
)
await processing_msg.delete()
if not words:
await message.answer(t(user.language_interface or 'ru', 'import.failed'))
await state.clear()
return
# Сохраняем данные в состоянии
await state.update_data(
words=words,
user_id=user.id,
original_text=text,
level=current_level
)
await state.set_state(ImportStates.viewing_words)
# Показываем извлечённые слова
await show_extracted_words(message, words)
async def show_extracted_words(message: Message, words: list):
"""Показать извлечённые слова с кнопками для добавления"""
async with async_session_maker() as session:
user = await UserService.get_user_by_telegram_id(session, message.from_user.id)
lang = (user.language_interface if user else 'ru') or 'ru'
text = t(lang, 'import.found_header', n=len(words)) + "\n\n"
for idx, word_data in enumerate(words, 1):
text += (
f"{idx}. {word_data['word']} "
f"[{word_data.get('transcription', '')}]\n"
f" {word_data['translation']}\n"
)
if word_data.get('context'):
# Укорачиваем контекст, если он слишком длинный
context = word_data['context']
if len(context) > 80:
context = context[:77] + "..."
text += f" «{context}»\n"
text += "\n"
text += t(lang, 'words.choose')
# Создаем кнопки для каждого слова (по 2 в ряд)
keyboard = []
for idx, word_data in enumerate(words):
button = InlineKeyboardButton(
text=f"➕ {word_data['word']}",
callback_data=f"import_word_{idx}"
)
# Добавляем по 2 кнопки в ряд
if len(keyboard) == 0 or len(keyboard[-1]) == 2:
keyboard.append([button])
else:
keyboard[-1].append(button)
# Кнопка "Добавить все"
keyboard.append([
InlineKeyboardButton(text=t(lang, 'words.add_all_btn'), callback_data="import_all_words")
])
# Кнопка "Закрыть"
keyboard.append([
InlineKeyboardButton(text=t(lang, 'words.close_btn'), callback_data="close_import")
])
reply_markup = InlineKeyboardMarkup(inline_keyboard=keyboard)
await message.answer(text, reply_markup=reply_markup)
@router.callback_query(F.data.startswith("import_word_"), ImportStates.viewing_words)
async def import_single_word(callback: CallbackQuery, state: FSMContext):
"""Добавить одно слово из импорта"""
# Отвечаем сразу, операция может занять время
await callback.answer()
word_index = int(callback.data.split("_")[2])
data = await state.get_data()
words = data.get('words', [])
user_id = data.get('user_id')
if word_index >= len(words):
await callback.answer(t('ru', 'words.err_not_found'))
return
word_data = words[word_index]
async with async_session_maker() as session:
user = await UserService.get_user_by_telegram_id(session, callback.from_user.id)
lang = get_user_lang(user)
# Проверяем, нет ли уже такого слова
existing = await VocabularyService.get_word_by_original(
session, user_id, word_data['word']
)
if existing:
await callback.answer(t(lang, 'words.already_exists', word=word_data['word']), show_alert=True)
return
# Добавляем слово
learn = user.learning_language if user else 'en'
translation_lang = get_user_translation_lang(user)
ctx = word_data.get('context')
examples = ([{learn: ctx, translation_lang: ''}] if ctx else [])
await VocabularyService.add_word(
session=session,
user_id=user_id,
word_original=word_data['word'],
word_translation=word_data['translation'],
source_lang=user.learning_language if user else None,
translation_lang=translation_lang,
transcription=word_data.get('transcription'),
examples=examples,
source=WordSource.CONTEXT,
category='imported',
difficulty_level=data.get('level')
)
lang = (user.language_interface if user else 'ru') or 'ru'
await callback.message.answer(t(lang, 'import.added_single', word=word_data['word']))
@router.callback_query(F.data == "import_all_words", ImportStates.viewing_words)
async def import_all_words(callback: CallbackQuery, state: FSMContext):
"""Добавить все слова из импорта"""
# Сразу отвечаем, так как операция может занять заметное время
await callback.answer()
data = await state.get_data()
words = data.get('words', [])
user_id = data.get('user_id')
added_count = 0
skipped_count = 0
async with async_session_maker() as session:
user = await UserService.get_user_by_telegram_id(session, callback.from_user.id)
for word_data in words:
# Проверяем, нет ли уже такого слова
existing = await VocabularyService.get_word_by_original(
session, user_id, word_data['word']
)
if existing:
skipped_count += 1
continue
# Добавляем слово
learn = user.learning_language if user else 'en'
translation_lang = get_user_translation_lang(user)
ctx = word_data.get('context')
examples = ([{learn: ctx, translation_lang: ''}] if ctx else [])
await VocabularyService.add_word(
session=session,
user_id=user_id,
word_original=word_data['word'],
word_translation=word_data['translation'],
source_lang=user.learning_language if user else None,
translation_lang=translation_lang,
transcription=word_data.get('transcription'),
examples=examples,
source=WordSource.CONTEXT,
category='imported',
difficulty_level=data.get('level')
)
added_count += 1
lang = (user.language_interface if user else 'ru') or 'ru'
result_text = t(lang, 'import.added_count', n=added_count)
if skipped_count > 0:
result_text += "\n" + t(lang, 'import.skipped_count', n=skipped_count)
await callback.message.edit_reply_markup(reply_markup=None)
await callback.message.answer(result_text)
await state.clear()
@router.callback_query(F.data == "close_import", ImportStates.viewing_words)
async def close_import(callback: CallbackQuery, state: FSMContext):
"""Закрыть импорт"""
await callback.message.delete()
await state.clear()
await callback.answer()
def parse_word_line(line: str) -> dict | None:
"""
Парсит строку формата 'слово - перевод' или 'слово : перевод'
Или просто 'слово' (без перевода)
Возвращает dict с word и translation (может быть None) или None если пустая строка
"""
line = line.strip()
if not line or line.startswith('#'): # Пропускаем пустые и комментарии
return None
# Пробуем разделить по разделителям
parts = WORD_SEPARATORS.split(line, maxsplit=1)
if len(parts) == 2:
word = parts[0].strip()
translation = parts[1].strip()
if word and translation:
return {'word': word, 'translation': translation}
# Если разделителя нет — это просто слово без перевода
word = line.strip()
if word:
return {'word': word, 'translation': None}
return None
def parse_file_content(content: str) -> tuple[list[dict], bool]:
"""
Парсит содержимое файла и возвращает список слов
Возвращает (words, needs_translation) — нужен ли перевод через AI
"""
words = []
seen = set() # Для дедупликации
needs_translation = False
for line in content.split('\n'):
parsed = parse_word_line(line)
if parsed and parsed['word'].lower() not in seen:
words.append(parsed)
seen.add(parsed['word'].lower())
if parsed['translation'] is None:
needs_translation = True
return words, needs_translation
@router.message(F.document)
async def handle_file_import(message: Message, state: FSMContext, bot: Bot):
"""Обработка файлов для импорта слов"""
document = message.document
# Проверяем расширение файла
file_name = document.file_name or ''
file_ext = ''
if '.' in file_name:
file_ext = '.' + file_name.rsplit('.', 1)[-1].lower()
async with async_session_maker() as session:
user = await UserService.get_user_by_telegram_id(session, message.from_user.id)
if not user:
await message.answer(t('ru', 'common.start_first'))
return
lang = get_user_lang(user)
if file_ext not in SUPPORTED_EXTENSIONS:
await message.answer(t(lang, 'import_file.unsupported_format'))
return
# Проверяем размер файла (макс 1MB)
if document.file_size > 1024 * 1024:
await message.answer(t(lang, 'import_file.too_large'))
return
# Скачиваем файл
try:
file = await bot.get_file(document.file_id)
file_content = await bot.download_file(file.file_path)
content = file_content.read().decode('utf-8')
except UnicodeDecodeError:
await message.answer(t(lang, 'import_file.encoding_error'))
return
except Exception:
await message.answer(t(lang, 'import_file.download_error'))
return
# Парсим содержимое
words, needs_translation = parse_file_content(content)
if not words:
await message.answer(t(lang, 'import_file.no_words_found'))
return
# Ограничиваем количество слов
max_words = 50 if needs_translation else 100
if len(words) > max_words:
words = words[:max_words]
await message.answer(t(lang, 'import_file.truncated', n=max_words))
# Если нужен перевод — отправляем в AI
if needs_translation:
processing_msg = await message.answer(t(lang, 'import_file.translating'))
# Получаем переводы от AI
words_to_translate = [w['word'] for w in words]
translations = await ai_service.translate_words_batch(
words=words_to_translate,
source_lang=user.learning_language,
translation_lang=get_user_translation_lang(user)
)
await processing_msg.delete()
# Обновляем слова переводами
if isinstance(translations, list):
for i, word_data in enumerate(words):
if i < len(translations):
tr = translations[i]
word_data['translation'] = tr.get('translation', '')
word_data['transcription'] = tr.get('transcription', '')
if tr.get('reading'): # Фуригана для японского
word_data['reading'] = tr.get('reading')
else:
# Если AI вернул не список — пробуем сопоставить по слову
for word_data in words:
word_data['translation'] = ''
word_data['transcription'] = ''
# Сохраняем данные в состоянии и показываем слова
await state.update_data(
words=words,
user_id=user.id,
level=get_user_level_for_language(user)
)
await state.set_state(ImportStates.viewing_words)
await show_file_words(message, words, lang)
async def show_file_words(message: Message, words: list, lang: str):
"""Показать слова из файла с кнопками для добавления"""
# Показываем первые 20 слов в сообщении
display_words = words[:20]
text = t(lang, 'import_file.found_header', n=len(words)) + "\n\n"
for idx, word_data in enumerate(display_words, 1):
word = word_data['word']
translation = word_data.get('translation', '')
transcription = word_data.get('transcription', '')
line = f"{idx}. {word}"
if transcription:
line += f" [{transcription}]"
if translation:
line += f" — {translation}"
text += line + "\n"
if len(words) > 20:
text += f"\n...и ещё {len(words) - 20} слов\n"
text += "\n" + t(lang, 'import_file.choose_action')
# Кнопки действий
keyboard = InlineKeyboardMarkup(inline_keyboard=[
[InlineKeyboardButton(
text=t(lang, 'import_file.add_all_btn', n=len(words)),
callback_data="import_file_all"
)],
[InlineKeyboardButton(
text=t(lang, 'words.close_btn'),
callback_data="close_import"
)]
])
await message.answer(text, reply_markup=keyboard)
@router.callback_query(F.data == "import_file_all", ImportStates.viewing_words)
async def import_file_all_words(callback: CallbackQuery, state: FSMContext):
"""Добавить все слова из файла"""
await callback.answer()
data = await state.get_data()
words = data.get('words', [])
user_id = data.get('user_id')
added_count = 0
skipped_count = 0
async with async_session_maker() as session:
user = await UserService.get_user_by_telegram_id(session, callback.from_user.id)
for word_data in words:
# Проверяем, нет ли уже такого слова
existing = await VocabularyService.get_word_by_original(
session, user_id, word_data['word']
)
if existing:
skipped_count += 1
continue
# Добавляем слово
await VocabularyService.add_word(
session=session,
user_id=user_id,
word_original=word_data['word'],
word_translation=word_data.get('translation', ''),
source_lang=user.learning_language if user else None,
translation_lang=get_user_translation_lang(user),
transcription=word_data.get('transcription'),
source=WordSource.IMPORT
)
added_count += 1
lang = get_user_lang(user)
result_text = t(lang, 'import.added_count', n=added_count)
if skipped_count > 0:
result_text += "\n" + t(lang, 'import.skipped_count', n=skipped_count)
await callback.message.edit_reply_markup(reply_markup=None)
await callback.message.answer(result_text)
await state.clear()