Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dev-share-api/Configuration/VectorDbSettings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ public class VectorDbSettings
public const string SectionName = "VectorDb";

// Collections
public string ResourceCollection { get; set; } = "DevShare_Resource";
public string InsightCollection { get; set; } = "DevShare_Insight";
public string ResourceCollection { get; set; } = "BlotzShare_Resource";
public string InsightCollection { get; set; } = "BlotzShare_Insight";

// Vector dimensions
public uint Dimensions { get; set; } = 384; // MiniLM-L6-v2 dimension
Expand Down
100 changes: 78 additions & 22 deletions dev-share-api/Controllers/ApiController.cs
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
using HtmlAgilityPack;
using Microsoft.Playwright;
using Models;
using Microsoft.AspNetCore.Mvc;
using Services;
using Qdrant.Client.Grpc;
using System.Text;
using Executor;
using System.Collections.Concurrent;
using System.Text.Json;
using Newtonsoft.Json.Linq;


namespace Controllers;
Expand All @@ -24,6 +19,7 @@ public class ExtractController : ControllerBase
private readonly IOnlineResearchService _onlineResearchService;
private readonly IServiceScopeFactory _scopeFactory;
private static readonly ConcurrentDictionary<string, ShareTask> TaskStore = new();
private static readonly HttpClient _httpClient = new();

public ExtractController(
IEmbeddingService embeddingService,
Expand Down Expand Up @@ -57,6 +53,13 @@ public async Task<IActionResult> Share([FromBody] UrlRequest request)

Console.WriteLine($"Extracting: {url}");

bool isVideo = await UrlTypeDetector.IsVideoUrlAsync(url, _httpClient);

if (isVideo)
{
return BadRequest(new { Type = "Video", Message = "Video URL detected" });
}

var taskId = Guid.NewGuid().ToString();
var task = new ShareTask
{
Expand Down Expand Up @@ -186,29 +189,82 @@ public async Task<ActionResult<UpdateResult>> Indexing([FromBody] string collect
return Ok(await _vectorService.IndexingAsync(collectionName, field));
}

[HttpPost("insight/share")]
public async Task<IActionResult> ShareInsight([FromBody] ShareInsightRequest request)
[HttpPost("collections/{collectionName}")]
public async Task<IActionResult> CreateCollection(string collectionName)
{
try
{
await _vectorService.CreateCollectionAsync(collectionName);
return Ok(new { message = $"Collection '{collectionName}' created successfully" });
}
catch (Exception ex)
{
return StatusCode(500, new { error = $"Failed to create collection: {ex.Message}" });
}
}

[HttpPost("vectors/resource")]
public async Task<IActionResult> UpsertResource([FromBody] ShareVectorRequest request)
{
var insightId = request.InsightId ?? Guid.NewGuid().ToString();
var denseEmbedding = await _embeddingService.GetDenseEmbeddingAsync(request.Content);
var (indices, values) = await _embeddingService.GetSparseEmbeddingAsync(request.Content);
try
{
var resourceId = request.ResourceId ?? Guid.NewGuid().ToString();
var denseEmbedding = await _embeddingService.GetDenseEmbeddingAsync(request.Content);
var (indices, values) = await _embeddingService.GetSparseEmbeddingAsync(request.Content);

var denseVector = new DenseVector();
denseVector.Data.AddRange(denseEmbedding);

var denseVector = new DenseVector();
denseVector.Data.AddRange(denseEmbedding);
var sparseVector = new SparseVector();
sparseVector.Indices.AddRange(indices);
sparseVector.Values.AddRange(values);

var sparseVector = new SparseVector();
sparseVector.Indices.AddRange(indices);
sparseVector.Values.AddRange(values);
var vectors = new Dictionary<string, Vector>
{
["dense_vector"] = new() { Dense = denseVector },
["sparse_vector"] = new() { Sparse = sparseVector }
};

var vectors = new Dictionary<string, Vector>
request.Vectors = vectors;
await _vectorService.UpsertResourceAsync(resourceId, request.Url, request.Content, request.Vectors);
return Ok(new { message = "Resource vector upserted successfully" });
}
catch (Exception ex)
{
["dense_vector"] = new() { Dense = denseVector },
["sparse_vector"] = new() { Sparse = sparseVector }
};
return StatusCode(500, new { error = $"Failed to upsert resource vector: {ex.Message}" });
}
}

request.Vectors = vectors;
await _vectorService.UpsertInsightAsync(insightId, request.Url, request.Content, request.ResourceId, request.Vectors);
return Ok();
[HttpPost("vectors/insight")]
public async Task<IActionResult> UpsertInsight([FromBody] ShareInsightRequest request)
{
try
{
var insightId = request.InsightId ?? Guid.NewGuid().ToString();
var denseEmbedding = await _embeddingService.GetDenseEmbeddingAsync(request.Content);
var (indices, values) = await _embeddingService.GetSparseEmbeddingAsync(request.Content);

var denseVector = new DenseVector();
denseVector.Data.AddRange(denseEmbedding);

var sparseVector = new SparseVector();
sparseVector.Indices.AddRange(indices);
sparseVector.Values.AddRange(values);

var vectors = new Dictionary<string, Vector>
{
["dense_vector"] = new() { Dense = denseVector },
["sparse_vector"] = new() { Sparse = sparseVector }
};

request.Vectors = vectors;
await _vectorService.UpsertInsightAsync(insightId, request.Url, request.Content, request.ResourceId, request.Vectors);
return Ok(new { message = "Insight vector upserted successfully" });
}
catch (Exception ex)
{
return StatusCode(500, new { error = $"Failed to upsert resource vector: {ex.Message}" });
}
}

//todo make sure the return data from service is List<Resource> and List<Insight>
Expand Down
11 changes: 11 additions & 0 deletions dev-share-api/Models/ShareVectorRequest.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
using Qdrant.Client.Grpc;

namespace Models;

public class ShareVectorRequest
{
public string ResourceId { get; set; }
public string Url { get; set; }
public string Content { get; set; }
public Dictionary<string, Vector> Vectors { get; set; }
}
159 changes: 87 additions & 72 deletions dev-share-api/Services/OnlineResearchService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public interface IOnlineResearchService
public class OnlineResearchService : IOnlineResearchService
{
private readonly AzureOpenAIClient _client;
private readonly string _deploymentName = "gpt-4o-mini";
private const string _deploymentName = "gpt-4o-mini";
private static readonly JsonSerializerOptions _jsonOptions = new()
{
PropertyNameCaseInsensitive = true,
Expand All @@ -28,96 +28,111 @@ public OnlineResearchService(AzureOpenAIClient openAIClient)
public async Task<IEnumerable<ResourceDto>> PerformOnlineResearchAsync(string query, int topK = 3)
{
if (string.IsNullOrWhiteSpace(query))
{
throw new ArgumentException("Query cannot be empty", nameof(query));
}

try
{
var response = await GetOpenAIResponseAsync(query, topK);
return await ParseResponseToVectorResourceDtos(response);
}
catch (Exception ex)
var messages = new List<ChatMessage>
{
throw;
}
new SystemChatMessage($@"
You are an AI research assistant. Your task is to return an array of up to {topK} concise and factual resources relevant to the user's query.

For each resource, provide:
- Title: A title for the answer of the summary in less than 15 words.
- Content: A concise factual answer or summary.
- Url: A direct, relevant web source.

Always call the `generate_research_results` function with your result in JSON:
{{
""results"": [
{{
""title"": string,
""content"": string,
""url"": string
}}
]
}}

Guidelines:
- No explanations or formatting.
- Never return plain text; always structured JSON using the function.
- Results must be unique and from reputable sources.
"),
new UserChatMessage(query)
};

var tool = CreateGenerateResearchResultsTool(topK);

return await CallToolAndDeserializeAsync<ResourceResultWrapper>(
toolFunctionName: "generate_research_results",
messages: messages,
tool: tool
).ContinueWith(t => t.Result?.Results ?? new List<ResourceDto>()); ;
}

private async Task<string> GetOpenAIResponseAsync(string query, int topK)
private ChatTool CreateGenerateResearchResultsTool(int topK)
{
var prompt = GeneratePrompt(query, topK);
ChatCompletion response = await _client.GetChatClient(_deploymentName)
.CompleteChatAsync(prompt);

return response.Content?.FirstOrDefault()?.Text ?? string.Empty;
return ChatTool.CreateFunctionTool(
functionName: "generate_research_results",
functionDescription: $"Returns up to {topK} concise and factual research results for the given query.",
functionParameters: BinaryData.FromObjectAsJson(new
{
type = "object",
properties = new
{
results = new
{
type = "array",
items = new
{
type = "object",
properties = new
{
title = new { type = "string", description = "Title" },
content = new { type = "string", description = "Concise, factual answer or summary." },
url = new { type = "string", description = "Direct relevant web source." }
},
required = new[] { "content", "url" }
},
minItems = 1,
maxItems = topK
}
},
required = new[] { "results" }
})
);
}

private static async Task<IEnumerable<ResourceDto>> ParseResponseToVectorResourceDtos(string response)
public async Task<T> CallToolAndDeserializeAsync<T>(
string toolFunctionName,
List<ChatMessage> messages,
ChatTool tool)
{
if (string.IsNullOrWhiteSpace(response))
var client = _client.GetChatClient(deploymentName: _deploymentName);
ChatCompletionOptions options = new()
{
return new[] { CreateFallbackDto(response) };
}
Tools = { tool }
};
ChatCompletion response = await client.CompleteChatAsync(messages, options);

try
{
// Clean the response by removing Markdown code block and escapes
var cleanedResponse = response
var toolCall = response.ToolCalls.FirstOrDefault(tc => tc.FunctionName == toolFunctionName);
if (toolCall == null)
throw new InvalidOperationException("No function call response found.");

var jsonRes = toolCall.FunctionArguments.ToString();
var cleanedResponse = jsonRes
.Replace("```json", "")
.Replace("```", "")
.Replace("\\n", "")
.Replace("\n", "")
.Trim();
var result = JsonSerializer.Deserialize<T>(cleanedResponse, _jsonOptions);
if (result == null)
throw new InvalidOperationException("Deserialization failed.");

var results = await Task.Run(() =>
JsonSerializer.Deserialize<ResourceDto[]>(cleanedResponse, _jsonOptions));

if (results?.Any() == true)
{
return results;
}

// Try parsing as single object if array fails
var singleResult = await Task.Run(() =>
JsonSerializer.Deserialize<ResourceDto>(cleanedResponse, _jsonOptions));

return singleResult != null
? new[] { singleResult }
: new[] { CreateFallbackDto(response) };
}
catch (JsonException ex)
{
return new[] { CreateFallbackDto(response) };
}
return result;
}

private static string GeneratePrompt(string query, int topK)
private class ResourceResultWrapper
{
return @$"
You are an AI assistant. Given a user query, return an array of {topK} JSON objects with the following fields suitable for a vector database:

[
{{
""Content"": ""First concise, factual answer here."",
""Url"": ""https://relevant-source-1.com""
}},
{{
""Content"": ""Second concise, factual answer here."",
""Url"": ""https://relevant-source-2.com""
}}
]

User query: {query}

Return exactly {topK} JSON objects in an array. Ensure each answer is unique and relevant.";
}

private static ResourceDto CreateFallbackDto(string fallBackContent)
{
return new()
{
Content = fallBackContent,
Url = string.Empty
};
public List<ResourceDto> Results { get; set; } = new();
}
}
4 changes: 2 additions & 2 deletions dev-share-api/Services/VectorService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@
using Models;
using Qdrant.Client;
using Qdrant.Client.Grpc;
using System.Text.Json;

namespace Services;

public interface IVectorService
{
Task InitializeAsync();
Task CreateCollectionAsync(string collectionName);
Task<UpdateResult> IndexingAsync(string collectionName, string fieldName);
Task UpdateCollectionAsync(string collectionName);

Expand Down Expand Up @@ -135,7 +135,7 @@ public async Task<List<VectorInsightDto>> SearchInsightAsync(string query, int t
return insightResults.Select(MapToInsightDto).ToList();
}

private async Task CreateCollectionAsync(string collectionName)
public async Task CreateCollectionAsync(string collectionName)
{
try
{
Expand Down
Loading
Loading