recommend/web_app.py

# -*- coding: utf-8 -*-
"""
网页端应用 - 个性化饮食推荐助手 + 背诵排序功能
"""

from flask import Flask, render_template, request, jsonify
import re
import random
import logging
from pathlib import Path

# 配置日志
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler('logs/web_app.log', encoding='utf-8'),
        logging.StreamHandler()
    ]
)

logger = logging.getLogger(__name__)

app = Flask(__name__)
app.config['SECRET_KEY'] = 'your-secret-key-here'
# 确保模板文件使用UTF-8编码读取
app.jinja_env.auto_reload = True
app.config['TEMPLATES_AUTO_RELOAD'] = True


class RecitationSorter:
    """背诵排序器"""

    def __init__(self):
        self.items = []

    def extract_items(self, text):
        """从文本中提取背诵项目"""
        items = []

        # 方法1: 按行分割，过滤空行和无关行
        lines = text.strip().split('\n')
        for line in lines:
            line = line.strip()
            # 跳过空行
            if not line:
                continue

            # 跳过明显的表头行（包含"章节"、"知识点"等）
            if any(keyword in line for keyword in ['章节', '知识点', '选择题', '主观题', '完成', '划']):
                continue

            # 跳过页码行
            if re.match(r'^第\d+页', line) or re.match(r'^共\d+页', line):
                continue

            # 跳过说明文字
            if any(keyword in line for keyword in ['使用说明', '祝:', '凯程', '框架', '理解', '背诵']):
                continue

            # 提取知识点的几种模式
            # 模式1: 以数字或字母开头（如"1. 知识点"或"第一章 内容"）
            match = re.match(r'^[第]?[一二三四五六七八九十\d]+[章节]?\s*[：:、]?\s*(.+)', line)
            if match:
                item = match.group(1).strip()
                if item and len(item) > 1:  # 至少2个字符才认为是有效知识点
                    items.append(item)
                continue

            # 模式2: 以"-"或"•"开头的列表项
            match = re.match(r'^[-•]\s*(.+)', line)
            if match:
                item = match.group(1).strip()
                if item and len(item) > 1:
                    items.append(item)
                continue

            # 模式3: 表格中的知识点（通常不包含特殊标记符）
            # 如果行中包含常见的中文标点，但不包含表格标记符，可能是知识点
            if len(line) > 2 and not re.match(r'^[✓×√✗\s]+$', line):
                # 检查是否包含常见的中文内容
                if re.search(r'[\u4e00-\u9fff]', line):  # 包含中文
                    # 排除明显的表格分隔符
                    if not re.match(r'^[|+\-\s]+$', line):
                        items.append(line)

        # 去重
        unique_items = []
        seen = set()
        for item in items:
            # 标准化：去除首尾空格，统一标点
            normalized = item.strip()
            if normalized and normalized not in seen:
                seen.add(normalized)
                unique_items.append(normalized)

        return unique_items

    def random_sort(self, items):
        """随机排序项目"""
        shuffled = items.copy()
        random.shuffle(shuffled)
        return shuffled


# 创建全局排序器实例
sorter = RecitationSorter()


@app.route('/')
def index():
    """首页"""
    return render_template('index.html')


@app.route('/recitation')
def recitation():
    """背诵排序页面"""
    return render_template('recitation.html')


@app.route('/api/extract', methods=['POST'])
def extract_items():
    """提取背诵项目API"""
    try:
        data = request.get_json()
        text = data.get('text', '')

        if not text:
            return jsonify({
                'success': False,
                'message': '请输入要处理的文本'
            }), 400

        # 提取项目
        items = sorter.extract_items(text)

        if not items:
            return jsonify({
                'success': False,
                'message': '未能识别到背诵内容，请检查文本格式'
            }), 400

        logger.info(f"提取到 {len(items)} 个背诵项目")

        return jsonify({
            'success': True,
            'items': items,
            'count': len(items)
        })

    except Exception as e:
        logger.error(f"提取项目失败: {e}")
        return jsonify({
            'success': False,
            'message': f'处理失败: {str(e)}'
        }), 500


@app.route('/api/sort', methods=['POST'])
def sort_items():
    """随机排序API"""
    try:
        data = request.get_json()
        items = data.get('items', [])

        if not items:
            return jsonify({
                'success': False,
                'message': '请先提取背诵项目'
            }), 400

        # 随机排序
        sorted_items = sorter.random_sort(items)

        logger.info(f"对 {len(sorted_items)} 个项目进行随机排序")

        return jsonify({
            'success': True,
            'items': sorted_items,
            'count': len(sorted_items)
        })

    except Exception as e:
        logger.error(f"排序失败: {e}")
        return jsonify({
            'success': False,
            'message': f'排序失败: {str(e)}'
        }), 500


@app.route('/health')
def health():
    """健康检查"""
    return jsonify({'status': 'ok'})


if __name__ == '__main__':
    # 创建必要的目录
    Path('templates').mkdir(exist_ok=True)
    Path('static').mkdir(exist_ok=True)
    Path('logs').mkdir(exist_ok=True)

    # 启动应用
    app.run(debug=True, host='0.0.0.0', port=5000)