mirror of
https://github.com/open-webui/open-webui.git
synced 2026-05-05 18:38:17 -05:00
124 lines
4.0 KiB
Python
124 lines
4.0 KiB
Python
import logging
|
|
from typing import Optional
|
|
from open_webui.retrieval.web.main import SearchResult, get_filtered_results
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
"""
|
|
Azure AI Search integration for Open WebUI.
|
|
Documentation: https://learn.microsoft.com/en-us/python/api/overview/azure/search-documents-readme?view=azure-python
|
|
|
|
Required package: azure-search-documents
|
|
Install: pip install azure-search-documents
|
|
"""
|
|
|
|
|
|
def search_azure(
|
|
api_key: str,
|
|
endpoint: str,
|
|
index_name: str,
|
|
query: str,
|
|
count: int,
|
|
filter_list: Optional[list[str]] = None,
|
|
) -> list[SearchResult]:
|
|
"""
|
|
Search using Azure AI Search.
|
|
|
|
Args:
|
|
api_key: Azure Search API key (query key or admin key)
|
|
endpoint: Azure Search service endpoint (e.g., https://myservice.search.windows.net)
|
|
index_name: Name of the search index to query
|
|
query: Search query string
|
|
count: Number of results to return
|
|
filter_list: Optional list of domains to filter results
|
|
|
|
Returns:
|
|
List of SearchResult objects with link, title, and snippet
|
|
"""
|
|
try:
|
|
from azure.core.credentials import AzureKeyCredential
|
|
from azure.search.documents import SearchClient
|
|
except ImportError:
|
|
log.error(
|
|
'azure-search-documents package is not installed. Install it with: pip install azure-search-documents'
|
|
)
|
|
raise ImportError(
|
|
'azure-search-documents is required for Azure AI Search. '
|
|
'Install it with: pip install azure-search-documents'
|
|
)
|
|
|
|
try:
|
|
# Create search client with API key authentication
|
|
credential = AzureKeyCredential(api_key)
|
|
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)
|
|
|
|
# Perform the search
|
|
results = search_client.search(search_text=query, top=count)
|
|
|
|
# Convert results to list and extract fields
|
|
search_results = []
|
|
for result in results:
|
|
# Azure AI Search returns documents with custom schemas
|
|
# We need to extract common fields that might represent URL, title, and content
|
|
# Common field names to look for:
|
|
result_dict = dict(result)
|
|
|
|
# Try to find URL field (common names)
|
|
link = (
|
|
result_dict.get('url')
|
|
or result_dict.get('link')
|
|
or result_dict.get('uri')
|
|
or result_dict.get('metadata_storage_path')
|
|
or ''
|
|
)
|
|
|
|
# Try to find title field (common names)
|
|
title = (
|
|
result_dict.get('title')
|
|
or result_dict.get('name')
|
|
or result_dict.get('metadata_title')
|
|
or result_dict.get('metadata_storage_name')
|
|
or None
|
|
)
|
|
|
|
# Try to find content/snippet field (common names)
|
|
snippet = (
|
|
result_dict.get('content')
|
|
or result_dict.get('snippet')
|
|
or result_dict.get('description')
|
|
or result_dict.get('summary')
|
|
or result_dict.get('text')
|
|
or None
|
|
)
|
|
|
|
# Truncate snippet if too long
|
|
if snippet and len(snippet) > 500:
|
|
snippet = snippet[:497] + '...'
|
|
|
|
if link: # Only add if we found a valid link
|
|
search_results.append(
|
|
{
|
|
'link': link,
|
|
'title': title,
|
|
'snippet': snippet,
|
|
}
|
|
)
|
|
|
|
# Apply domain filtering if specified
|
|
if filter_list:
|
|
search_results = get_filtered_results(search_results, filter_list)
|
|
|
|
# Convert to SearchResult objects
|
|
return [
|
|
SearchResult(
|
|
link=result['link'],
|
|
title=result.get('title'),
|
|
snippet=result.get('snippet'),
|
|
)
|
|
for result in search_results
|
|
]
|
|
|
|
except Exception as ex:
|
|
log.error(f'Azure AI Search error: {ex}')
|
|
raise ex
|