From a052cf317650449be499c3e4f1306b6739bcae07 Mon Sep 17 00:00:00 2001 From: Travis Date: Wed, 30 Jul 2025 23:27:01 +1000 Subject: [PATCH 1/3] Adding endpoints for collection create and vector insert --- .../Configuration/VectorDbSettings.cs | 4 +- dev-share-api/Controllers/ApiController.cs | 92 ++++++++++++++----- dev-share-api/Models/ShareVectorRequest.cs | 11 +++ dev-share-api/Services/VectorService.cs | 4 +- 4 files changed, 85 insertions(+), 26 deletions(-) create mode 100644 dev-share-api/Models/ShareVectorRequest.cs diff --git a/dev-share-api/Configuration/VectorDbSettings.cs b/dev-share-api/Configuration/VectorDbSettings.cs index 5729bec..025dc9d 100644 --- a/dev-share-api/Configuration/VectorDbSettings.cs +++ b/dev-share-api/Configuration/VectorDbSettings.cs @@ -5,8 +5,8 @@ public class VectorDbSettings public const string SectionName = "VectorDb"; // Collections - public string ResourceCollection { get; set; } = "DevShare_Resource"; - public string InsightCollection { get; set; } = "DevShare_Insight"; + public string ResourceCollection { get; set; } = "BlotzShare_Resource"; + public string InsightCollection { get; set; } = "BlotzShare_Insight"; // Vector dimensions public uint Dimensions { get; set; } = 384; // MiniLM-L6-v2 dimension diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index f234cd8..f1cad43 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -1,14 +1,9 @@ -using HtmlAgilityPack; -using Microsoft.Playwright; using Models; using Microsoft.AspNetCore.Mvc; using Services; using Qdrant.Client.Grpc; -using System.Text; using Executor; using System.Collections.Concurrent; -using System.Text.Json; -using Newtonsoft.Json.Linq; namespace Controllers; @@ -186,29 +181,82 @@ public async Task> Indexing([FromBody] string collect return Ok(await _vectorService.IndexingAsync(collectionName, field)); } - [HttpPost("insight/share")] - public async Task ShareInsight([FromBody] ShareInsightRequest request) + [HttpPost("collections/{collectionName}")] + public async Task CreateCollection(string collectionName) { - var insightId = request.InsightId ?? Guid.NewGuid().ToString(); - var denseEmbedding = await _embeddingService.GetDenseEmbeddingAsync(request.Content); - var (indices, values) = await _embeddingService.GetSparseEmbeddingAsync(request.Content); + try + { + await _vectorService.CreateCollectionAsync(collectionName); + return Ok(new { message = $"Collection '{collectionName}' created successfully" }); + } + catch (Exception ex) + { + return StatusCode(500, new { error = $"Failed to create collection: {ex.Message}" }); + } + } + + [HttpPost("vectors/resource")] + public async Task UpsertResource([FromBody] ShareVectorRequest request) + { + try + { + var resourceId = request.ResourceId ?? Guid.NewGuid().ToString(); + var denseEmbedding = await _embeddingService.GetDenseEmbeddingAsync(request.Content); + var (indices, values) = await _embeddingService.GetSparseEmbeddingAsync(request.Content); + + var denseVector = new DenseVector(); + denseVector.Data.AddRange(denseEmbedding); - var denseVector = new DenseVector(); - denseVector.Data.AddRange(denseEmbedding); + var sparseVector = new SparseVector(); + sparseVector.Indices.AddRange(indices); + sparseVector.Values.AddRange(values); - var sparseVector = new SparseVector(); - sparseVector.Indices.AddRange(indices); - sparseVector.Values.AddRange(values); + var vectors = new Dictionary + { + ["dense_vector"] = new() { Dense = denseVector }, + ["sparse_vector"] = new() { Sparse = sparseVector } + }; - var vectors = new Dictionary + request.Vectors = vectors; + await _vectorService.UpsertResourceAsync(resourceId, request.Url, request.Content, request.Vectors); + return Ok(new { message = "Resource vector upserted successfully" }); + } + catch (Exception ex) { - ["dense_vector"] = new() { Dense = denseVector }, - ["sparse_vector"] = new() { Sparse = sparseVector } - }; + return StatusCode(500, new { error = $"Failed to upsert resource vector: {ex.Message}" }); + } + } - request.Vectors = vectors; - await _vectorService.UpsertInsightAsync(insightId, request.Url, request.Content, request.ResourceId, request.Vectors); - return Ok(); + [HttpPost("vectors/insight")] + public async Task UpsertInsight([FromBody] ShareInsightRequest request) + { + try + { + var insightId = request.InsightId ?? Guid.NewGuid().ToString(); + var denseEmbedding = await _embeddingService.GetDenseEmbeddingAsync(request.Content); + var (indices, values) = await _embeddingService.GetSparseEmbeddingAsync(request.Content); + + var denseVector = new DenseVector(); + denseVector.Data.AddRange(denseEmbedding); + + var sparseVector = new SparseVector(); + sparseVector.Indices.AddRange(indices); + sparseVector.Values.AddRange(values); + + var vectors = new Dictionary + { + ["dense_vector"] = new() { Dense = denseVector }, + ["sparse_vector"] = new() { Sparse = sparseVector } + }; + + request.Vectors = vectors; + await _vectorService.UpsertInsightAsync(insightId, request.Url, request.Content, request.ResourceId, request.Vectors); + return Ok(new { message = "Insight vector upserted successfully" }); + } + catch (Exception ex) + { + return StatusCode(500, new { error = $"Failed to upsert resource vector: {ex.Message}" }); + } } //todo make sure the return data from service is List and List diff --git a/dev-share-api/Models/ShareVectorRequest.cs b/dev-share-api/Models/ShareVectorRequest.cs new file mode 100644 index 0000000..0ea219e --- /dev/null +++ b/dev-share-api/Models/ShareVectorRequest.cs @@ -0,0 +1,11 @@ +using Qdrant.Client.Grpc; + +namespace Models; + +public class ShareVectorRequest +{ + public string ResourceId { get; set; } + public string Url { get; set; } + public string Content { get; set; } + public Dictionary Vectors { get; set; } +} \ No newline at end of file diff --git a/dev-share-api/Services/VectorService.cs b/dev-share-api/Services/VectorService.cs index bef6d11..04f8f5a 100644 --- a/dev-share-api/Services/VectorService.cs +++ b/dev-share-api/Services/VectorService.cs @@ -3,13 +3,13 @@ using Models; using Qdrant.Client; using Qdrant.Client.Grpc; -using System.Text.Json; namespace Services; public interface IVectorService { Task InitializeAsync(); + Task CreateCollectionAsync(string collectionName); Task IndexingAsync(string collectionName, string fieldName); Task UpdateCollectionAsync(string collectionName); @@ -135,7 +135,7 @@ public async Task> SearchInsightAsync(string query, int t return insightResults.Select(MapToInsightDto).ToList(); } - private async Task CreateCollectionAsync(string collectionName) + public async Task CreateCollectionAsync(string collectionName) { try { From db8c99d8a083f14aa984b388ec1030630d62363f Mon Sep 17 00:00:00 2001 From: Travis Date: Thu, 7 Aug 2025 16:58:33 +1000 Subject: [PATCH 2/3] Add video URL detection and refactor onlineSearchService --- dev-share-api/Controllers/ApiController.cs | 8 + .../Services/OnlineResearchService.cs | 151 ++++++++---------- dev-share-api/utils/UrlTypeDetector.cs | 49 ++++++ 3 files changed, 127 insertions(+), 81 deletions(-) create mode 100644 dev-share-api/utils/UrlTypeDetector.cs diff --git a/dev-share-api/Controllers/ApiController.cs b/dev-share-api/Controllers/ApiController.cs index f1cad43..817bfdf 100644 --- a/dev-share-api/Controllers/ApiController.cs +++ b/dev-share-api/Controllers/ApiController.cs @@ -19,6 +19,7 @@ public class ExtractController : ControllerBase private readonly IOnlineResearchService _onlineResearchService; private readonly IServiceScopeFactory _scopeFactory; private static readonly ConcurrentDictionary TaskStore = new(); + private static readonly HttpClient _httpClient = new(); public ExtractController( IEmbeddingService embeddingService, @@ -52,6 +53,13 @@ public async Task Share([FromBody] UrlRequest request) Console.WriteLine($"Extracting: {url}"); + bool isVideo = await UrlTypeDetector.IsVideoUrlAsync(url, _httpClient); + + if (isVideo) + { + return BadRequest(new { Type = "Video", Message = "Video URL detected" }); + } + var taskId = Guid.NewGuid().ToString(); var task = new ShareTask { diff --git a/dev-share-api/Services/OnlineResearchService.cs b/dev-share-api/Services/OnlineResearchService.cs index 310154d..8670cf1 100644 --- a/dev-share-api/Services/OnlineResearchService.cs +++ b/dev-share-api/Services/OnlineResearchService.cs @@ -13,12 +13,7 @@ public interface IOnlineResearchService public class OnlineResearchService : IOnlineResearchService { private readonly AzureOpenAIClient _client; - private readonly string _deploymentName = "gpt-4o-mini"; - private static readonly JsonSerializerOptions _jsonOptions = new() - { - PropertyNameCaseInsensitive = true, - WriteIndented = true - }; + private const string _deploymentName = "gpt-4o-mini"; public OnlineResearchService(AzureOpenAIClient openAIClient) { @@ -28,96 +23,90 @@ public OnlineResearchService(AzureOpenAIClient openAIClient) public async Task> PerformOnlineResearchAsync(string query, int topK = 3) { if (string.IsNullOrWhiteSpace(query)) - { throw new ArgumentException("Query cannot be empty", nameof(query)); - } - try + var messages = new List { - var response = await GetOpenAIResponseAsync(query, topK); - return await ParseResponseToVectorResourceDtos(response); - } - catch (Exception ex) - { - throw; - } - } - - private async Task GetOpenAIResponseAsync(string query, int topK) - { - var prompt = GeneratePrompt(query, topK); - ChatCompletion response = await _client.GetChatClient(_deploymentName) - .CompleteChatAsync(prompt); - - return response.Content?.FirstOrDefault()?.Text ?? string.Empty; - } + new SystemChatMessage($@" + You are an AI research assistant. Your task is to return an array of up to {topK} concise and factual resources relevant to the user's query. - private static async Task> ParseResponseToVectorResourceDtos(string response) - { - if (string.IsNullOrWhiteSpace(response)) - { - return new[] { CreateFallbackDto(response) }; - } - - try - { - // Clean the response by removing Markdown code block and escapes - var cleanedResponse = response - .Replace("```json", "") - .Replace("```", "") - .Replace("\\n", "") - .Replace("\n", "") - .Trim(); - - var results = await Task.Run(() => - JsonSerializer.Deserialize(cleanedResponse, _jsonOptions)); - - if (results?.Any() == true) - { - return results; - } - - // Try parsing as single object if array fails - var singleResult = await Task.Run(() => - JsonSerializer.Deserialize(cleanedResponse, _jsonOptions)); - - return singleResult != null - ? new[] { singleResult } - : new[] { CreateFallbackDto(response) }; - } - catch (JsonException ex) - { - return new[] { CreateFallbackDto(response) }; - } - } - - private static string GeneratePrompt(string query, int topK) - { - return @$" - You are an AI assistant. Given a user query, return an array of {topK} JSON objects with the following fields suitable for a vector database: + For each resource, provide: + - Title: A title for the answer of the summary in less than 15 words. + - Content: A concise factual answer or summary. + - Url: A direct, relevant web source. + Always call the `generate_research_results` function with your result in JSON: [ {{ - ""Content"": ""First concise, factual answer here."", - ""Url"": ""https://relevant-source-1.com"" - }}, - {{ - ""Content"": ""Second concise, factual answer here."", - ""Url"": ""https://relevant-source-2.com"" + ""title"": string, + ""content"": string, + ""url"": string }} ] - User query: {query} + Guidelines: + - No explanations or formatting. + - Never return plain text; always structured JSON using the function. + - Results must be unique and from reputable sources. + "), + new UserChatMessage(query) + }; + + var tool = CreateGenerateResearchResultsTool(topK); - Return exactly {topK} JSON objects in an array. Ensure each answer is unique and relevant."; + return await CallToolAndDeserializeAsync>( + toolFunctionName: "generate_research_results", + messages: messages, + tool: tool + ); } - private static ResourceDto CreateFallbackDto(string fallBackContent) + private ChatTool CreateGenerateResearchResultsTool(int topK) { - return new() + return ChatTool.CreateFunctionTool( + functionName: "generate_research_results", + functionDescription: $"Returns up to {topK} concise and factual research results for the given query.", + functionParameters: BinaryData.FromObjectAsJson(new + { + type = "array", + items = new + { + type = "object", + properties = new + { + title = new { type = "string", description = "Title" }, + content = new { type = "string", description = "Concise, factual answer or summary." }, + url = new { type = "string", description = "Direct relevant web source." } + }, + required = new[] { "title", "content", "url" } + }, + minItems = 1, + maxItems = topK + }) + ); + } + + public async Task CallToolAndDeserializeAsync( + string toolFunctionName, + List messages, + ChatTool tool) + { + var client = _client.GetChatClient(deploymentName: _deploymentName); + ChatCompletionOptions options = new() { - Content = fallBackContent, - Url = string.Empty + Tools = { tool } }; + ChatCompletion response = await client.CompleteChatAsync(messages, options); + + var toolCall = response.ToolCalls.FirstOrDefault(tc => tc.FunctionName == toolFunctionName); + if (toolCall == null) + throw new InvalidOperationException("No function call response found."); + + var json = toolCall.FunctionArguments.ToString(); + var result = JsonSerializer.Deserialize(json); + if (result == null) + throw new InvalidOperationException("Deserialization failed."); + + return result; } } diff --git a/dev-share-api/utils/UrlTypeDetector.cs b/dev-share-api/utils/UrlTypeDetector.cs new file mode 100644 index 0000000..4c933d4 --- /dev/null +++ b/dev-share-api/utils/UrlTypeDetector.cs @@ -0,0 +1,49 @@ +public static class UrlTypeDetector +{ + private static readonly string[] VideoExtensions = { ".mp4", ".webm", ".m3u8", ".mov", ".avi" }; + private static readonly string[] VideoDomains = { "youtube.com", "youtu.be", "vimeo.com", "dailymotion.com" }; + + public static bool IsLikelyVideoFromPattern(string url) + { + if (!Uri.TryCreate(url, UriKind.Absolute, out var uri)) + return false; + + var host = uri.Host.ToLowerInvariant(); + if (VideoDomains.Any(domain => host.Contains(domain))) + return true; + + var path = uri.AbsolutePath.ToLowerInvariant(); + if (VideoExtensions.Any(ext => path.EndsWith(ext))) + return true; + + return false; + } + + public static async Task IsVideoByContentTypeAsync(string url, HttpClient? client = null) + { + client ??= new HttpClient(); + + try + { + using var request = new HttpRequestMessage(HttpMethod.Head, url); + request.Headers.TryAddWithoutValidation("User-Agent", "Mozilla/5.0"); + + using var response = await client.SendAsync(request); + + if (!response.IsSuccessStatusCode) + return false; + + var mediaType = response.Content.Headers.ContentType?.MediaType; + return mediaType?.StartsWith("video/", StringComparison.OrdinalIgnoreCase) == true; + } + catch + { + return false; + } + } + + public static async Task IsVideoUrlAsync(string url, HttpClient? client = null) + { + return IsLikelyVideoFromPattern(url) || await IsVideoByContentTypeAsync(url, client); + } +} From 57319c03db008becc1fde36888fa80a5eb1c512a Mon Sep 17 00:00:00 2001 From: Travis Date: Thu, 7 Aug 2025 18:52:08 +1000 Subject: [PATCH 3/3] update json response for online search --- .../Services/OnlineResearchService.cs | 70 +++++++++++++------ 1 file changed, 48 insertions(+), 22 deletions(-) diff --git a/dev-share-api/Services/OnlineResearchService.cs b/dev-share-api/Services/OnlineResearchService.cs index 8670cf1..40e90ef 100644 --- a/dev-share-api/Services/OnlineResearchService.cs +++ b/dev-share-api/Services/OnlineResearchService.cs @@ -14,6 +14,11 @@ public class OnlineResearchService : IOnlineResearchService { private readonly AzureOpenAIClient _client; private const string _deploymentName = "gpt-4o-mini"; + private static readonly JsonSerializerOptions _jsonOptions = new() + { + PropertyNameCaseInsensitive = true, + WriteIndented = true + }; public OnlineResearchService(AzureOpenAIClient openAIClient) { @@ -36,13 +41,15 @@ public async Task> PerformOnlineResearchAsync(string qu - Url: A direct, relevant web source. Always call the `generate_research_results` function with your result in JSON: - [ - {{ - ""title"": string, - ""content"": string, - ""url"": string - }} - ] + {{ + ""results"": [ + {{ + ""title"": string, + ""content"": string, + ""url"": string + }} + ] + }} Guidelines: - No explanations or formatting. @@ -54,11 +61,11 @@ public async Task> PerformOnlineResearchAsync(string qu var tool = CreateGenerateResearchResultsTool(topK); - return await CallToolAndDeserializeAsync>( + return await CallToolAndDeserializeAsync( toolFunctionName: "generate_research_results", messages: messages, tool: tool - ); + ).ContinueWith(t => t.Result?.Results ?? new List()); ; } private ChatTool CreateGenerateResearchResultsTool(int topK) @@ -68,20 +75,28 @@ private ChatTool CreateGenerateResearchResultsTool(int topK) functionDescription: $"Returns up to {topK} concise and factual research results for the given query.", functionParameters: BinaryData.FromObjectAsJson(new { - type = "array", - items = new + type = "object", + properties = new { - type = "object", - properties = new + results = new { - title = new { type = "string", description = "Title" }, - content = new { type = "string", description = "Concise, factual answer or summary." }, - url = new { type = "string", description = "Direct relevant web source." } - }, - required = new[] { "title", "content", "url" } + type = "array", + items = new + { + type = "object", + properties = new + { + title = new { type = "string", description = "Title" }, + content = new { type = "string", description = "Concise, factual answer or summary." }, + url = new { type = "string", description = "Direct relevant web source." } + }, + required = new[] { "content", "url" } + }, + minItems = 1, + maxItems = topK + } }, - minItems = 1, - maxItems = topK + required = new[] { "results" } }) ); } @@ -102,11 +117,22 @@ public async Task CallToolAndDeserializeAsync( if (toolCall == null) throw new InvalidOperationException("No function call response found."); - var json = toolCall.FunctionArguments.ToString(); - var result = JsonSerializer.Deserialize(json); + var jsonRes = toolCall.FunctionArguments.ToString(); + var cleanedResponse = jsonRes + .Replace("```json", "") + .Replace("```", "") + .Replace("\\n", "") + .Replace("\n", "") + .Trim(); + var result = JsonSerializer.Deserialize(cleanedResponse, _jsonOptions); if (result == null) throw new InvalidOperationException("Deserialization failed."); return result; } + + private class ResourceResultWrapper + { + public List Results { get; set; } = new(); + } }