feat: Introduce LLM response caching and streaming, add application configuration, and enhance session data with progress and history tracking.
This commit is contained in:
@@ -5,8 +5,17 @@ LLM调用辅助模块
|
||||
|
||||
import asyncio
|
||||
import yaml
|
||||
from typing import Optional, Callable, AsyncIterator
|
||||
from config.llm_config import LLMConfig
|
||||
from config.app_config import app_config
|
||||
from utils.fallback_openai_client import AsyncFallbackOpenAIClient
|
||||
from utils.cache_manager import LLMCacheManager
|
||||
|
||||
# 初始化LLM缓存管理器
|
||||
llm_cache = LLMCacheManager(
|
||||
cache_dir=app_config.llm_cache_dir,
|
||||
enabled=app_config.llm_cache_enabled
|
||||
)
|
||||
|
||||
class LLMHelper:
|
||||
"""LLM调用辅助类,支持同步和异步调用"""
|
||||
@@ -82,6 +91,104 @@ class LLMHelper:
|
||||
print(f"原始响应: {response}")
|
||||
return {}
|
||||
|
||||
|
||||
async def close(self):
|
||||
"""关闭客户端"""
|
||||
await self.client.close()
|
||||
await self.client.close()
|
||||
|
||||
async def async_call_with_cache(
|
||||
self,
|
||||
prompt: str,
|
||||
system_prompt: str = None,
|
||||
max_tokens: int = None,
|
||||
temperature: float = None,
|
||||
use_cache: bool = True
|
||||
) -> str:
|
||||
"""带缓存的异步LLM调用"""
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
# 生成缓存键
|
||||
cache_key = llm_cache.get_cache_key_from_messages(messages, self.config.model)
|
||||
|
||||
# 尝试从缓存获取
|
||||
if use_cache and app_config.llm_cache_enabled:
|
||||
cached_response = llm_cache.get(cache_key)
|
||||
if cached_response:
|
||||
print("💾 使用LLM缓存响应")
|
||||
return cached_response
|
||||
|
||||
# 调用LLM
|
||||
response = await self.async_call(prompt, system_prompt, max_tokens, temperature)
|
||||
|
||||
# 缓存响应
|
||||
if use_cache and app_config.llm_cache_enabled and response:
|
||||
llm_cache.set(cache_key, response)
|
||||
|
||||
return response
|
||||
|
||||
def call_with_cache(
|
||||
self,
|
||||
prompt: str,
|
||||
system_prompt: str = None,
|
||||
max_tokens: int = None,
|
||||
temperature: float = None,
|
||||
use_cache: bool = True
|
||||
) -> str:
|
||||
"""带缓存的同步LLM调用"""
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
except RuntimeError:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
import nest_asyncio
|
||||
nest_asyncio.apply()
|
||||
|
||||
return loop.run_until_complete(
|
||||
self.async_call_with_cache(prompt, system_prompt, max_tokens, temperature, use_cache)
|
||||
)
|
||||
|
||||
async def async_call_stream(
|
||||
self,
|
||||
prompt: str,
|
||||
system_prompt: str = None,
|
||||
max_tokens: int = None,
|
||||
temperature: float = None,
|
||||
callback: Optional[Callable[[str], None]] = None
|
||||
) -> AsyncIterator[str]:
|
||||
"""流式异步LLM调用"""
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
kwargs = {
|
||||
'stream': True,
|
||||
'max_tokens': max_tokens or self.config.max_tokens,
|
||||
'temperature': temperature or self.config.temperature
|
||||
}
|
||||
|
||||
try:
|
||||
response = await self.client.chat_completions_create(
|
||||
messages=messages,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
full_response = ""
|
||||
async for chunk in response:
|
||||
if chunk.choices[0].delta.content:
|
||||
content = chunk.choices[0].delta.content
|
||||
full_response += content
|
||||
|
||||
# 调用回调函数
|
||||
if callback:
|
||||
callback(content)
|
||||
|
||||
yield content
|
||||
|
||||
except Exception as e:
|
||||
print(f"流式LLM调用失败: {e}")
|
||||
yield ""
|
||||
Reference in New Issue
Block a user