From a97650671b8daf18d61dc978fc8a56603b644691 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 20 Jan 2026 07:39:36 +0000 Subject: [PATCH 1/5] Fix RetweetDeduplicationFilter bug and optimize filters in home-mixer - Fix RetweetDeduplicationFilter to correctly filter original tweets if a retweet was already seen. - Optimize AuthorSocialgraphFilter to use HashSet for O(1) lookups and avoid unnecessary cloning. - Optimize MutedKeywordFilter to avoid unnecessary cloning of muted keywords. - Add unit tests for RetweetDeduplicationFilter. --- .../filters/author_socialgraph_filter.rs | 13 +++- home-mixer/filters/muted_keyword_filter.rs | 10 ++- .../filters/retweet_deduplication_filter.rs | 74 ++++++++++++++++++- 3 files changed, 87 insertions(+), 10 deletions(-) diff --git a/home-mixer/filters/author_socialgraph_filter.rs b/home-mixer/filters/author_socialgraph_filter.rs index 8b34e45..12ff3b4 100644 --- a/home-mixer/filters/author_socialgraph_filter.rs +++ b/home-mixer/filters/author_socialgraph_filter.rs @@ -1,5 +1,6 @@ use crate::candidate_pipeline::candidate::PostCandidate; use crate::candidate_pipeline::query::ScoredPostsQuery; +use std::collections::HashSet; use tonic::async_trait; use xai_candidate_pipeline::filter::{Filter, FilterResult}; @@ -13,16 +14,20 @@ impl Filter for AuthorSocialgraphFilter { query: &ScoredPostsQuery, candidates: Vec, ) -> Result, String> { - let viewer_blocked_user_ids = query.user_features.blocked_user_ids.clone(); - let viewer_muted_user_ids = query.user_features.muted_user_ids.clone(); - - if viewer_blocked_user_ids.is_empty() && viewer_muted_user_ids.is_empty() { + if query.user_features.blocked_user_ids.is_empty() + && query.user_features.muted_user_ids.is_empty() + { return Ok(FilterResult { kept: candidates, removed: Vec::new(), }); } + let viewer_blocked_user_ids: HashSet = + query.user_features.blocked_user_ids.iter().cloned().collect(); + let viewer_muted_user_ids: HashSet = + query.user_features.muted_user_ids.iter().cloned().collect(); + let mut kept: Vec = Vec::new(); let mut removed: Vec = Vec::new(); diff --git a/home-mixer/filters/muted_keyword_filter.rs b/home-mixer/filters/muted_keyword_filter.rs index 7db116c..39ae9a6 100644 --- a/home-mixer/filters/muted_keyword_filter.rs +++ b/home-mixer/filters/muted_keyword_filter.rs @@ -26,16 +26,18 @@ impl Filter for MutedKeywordFilter { query: &ScoredPostsQuery, candidates: Vec, ) -> Result, String> { - let muted_keywords = query.user_features.muted_keywords.clone(); - - if muted_keywords.is_empty() { + if query.user_features.muted_keywords.is_empty() { return Ok(FilterResult { kept: candidates, removed: vec![], }); } - let tokenized = muted_keywords.iter().map(|k| self.tokenizer.tokenize(k)); + let tokenized = query + .user_features + .muted_keywords + .iter() + .map(|k| self.tokenizer.tokenize(k)); let token_sequences: Vec = tokenized.collect::>(); let user_mutes = UserMutes::new(token_sequences); let matcher = MatchTweetGroup::new(user_mutes); diff --git a/home-mixer/filters/retweet_deduplication_filter.rs b/home-mixer/filters/retweet_deduplication_filter.rs index 1216f5a..71c015a 100644 --- a/home-mixer/filters/retweet_deduplication_filter.rs +++ b/home-mixer/filters/retweet_deduplication_filter.rs @@ -31,8 +31,11 @@ impl Filter for RetweetDeduplicationFilter { } None => { // Mark this original tweet ID as seen so retweets of it get filtered - seen_tweet_ids.insert(candidate.tweet_id as u64); - kept.push(candidate); + if seen_tweet_ids.insert(candidate.tweet_id as u64) { + kept.push(candidate); + } else { + removed.push(candidate); + } } } } @@ -40,3 +43,70 @@ impl Filter for RetweetDeduplicationFilter { Ok(FilterResult { kept, removed }) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::candidate_pipeline::candidate::PostCandidate; + use crate::candidate_pipeline::query::ScoredPostsQuery; + + #[tokio::test] + async fn test_retweet_deduplication_filter_removes_original_if_retweet_seen_first() { + let filter = RetweetDeduplicationFilter; + let query = ScoredPostsQuery::default(); + + let retweet_candidate = PostCandidate { + tweet_id: 100, + retweeted_tweet_id: Some(200), + ..Default::default() + }; + + let original_candidate = PostCandidate { + tweet_id: 200, + retweeted_tweet_id: None, + ..Default::default() + }; + + let candidates = vec![retweet_candidate, original_candidate]; + + let result = filter.filter(&query, candidates).await.unwrap(); + + // Should keep the retweet (first occurrence of 200) + assert_eq!(result.kept.len(), 1); + assert_eq!(result.kept[0].tweet_id, 100); + + // Should remove the original tweet (second occurrence of 200) + assert_eq!(result.removed.len(), 1); + assert_eq!(result.removed[0].tweet_id, 200); + } + + #[tokio::test] + async fn test_retweet_deduplication_filter_removes_retweet_if_original_seen_first() { + let filter = RetweetDeduplicationFilter; + let query = ScoredPostsQuery::default(); + + let original_candidate = PostCandidate { + tweet_id: 200, + retweeted_tweet_id: None, + ..Default::default() + }; + + let retweet_candidate = PostCandidate { + tweet_id: 100, + retweeted_tweet_id: Some(200), + ..Default::default() + }; + + let candidates = vec![original_candidate, retweet_candidate]; + + let result = filter.filter(&query, candidates).await.unwrap(); + + // Should keep the original tweet (first occurrence of 200) + assert_eq!(result.kept.len(), 1); + assert_eq!(result.kept[0].tweet_id, 200); + + // Should remove the retweet (second occurrence of 200) + assert_eq!(result.removed.len(), 1); + assert_eq!(result.removed[0].tweet_id, 100); + } +} From 11e4be7ab5e3fcbefcb82e191f5fab65b30be9b3 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 20 Jan 2026 07:45:26 +0000 Subject: [PATCH 2/5] Revert optimizations, focus on RetweetDeduplicationFilter fix - Reverted changes to AuthorSocialgraphFilter. - Reverted changes to MutedKeywordFilter. - Kept the fix and tests for RetweetDeduplicationFilter to eliminate duplicate content in the feed. --- home-mixer/filters/author_socialgraph_filter.rs | 13 ++++--------- home-mixer/filters/muted_keyword_filter.rs | 10 ++++------ 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/home-mixer/filters/author_socialgraph_filter.rs b/home-mixer/filters/author_socialgraph_filter.rs index 12ff3b4..8b34e45 100644 --- a/home-mixer/filters/author_socialgraph_filter.rs +++ b/home-mixer/filters/author_socialgraph_filter.rs @@ -1,6 +1,5 @@ use crate::candidate_pipeline::candidate::PostCandidate; use crate::candidate_pipeline::query::ScoredPostsQuery; -use std::collections::HashSet; use tonic::async_trait; use xai_candidate_pipeline::filter::{Filter, FilterResult}; @@ -14,20 +13,16 @@ impl Filter for AuthorSocialgraphFilter { query: &ScoredPostsQuery, candidates: Vec, ) -> Result, String> { - if query.user_features.blocked_user_ids.is_empty() - && query.user_features.muted_user_ids.is_empty() - { + let viewer_blocked_user_ids = query.user_features.blocked_user_ids.clone(); + let viewer_muted_user_ids = query.user_features.muted_user_ids.clone(); + + if viewer_blocked_user_ids.is_empty() && viewer_muted_user_ids.is_empty() { return Ok(FilterResult { kept: candidates, removed: Vec::new(), }); } - let viewer_blocked_user_ids: HashSet = - query.user_features.blocked_user_ids.iter().cloned().collect(); - let viewer_muted_user_ids: HashSet = - query.user_features.muted_user_ids.iter().cloned().collect(); - let mut kept: Vec = Vec::new(); let mut removed: Vec = Vec::new(); diff --git a/home-mixer/filters/muted_keyword_filter.rs b/home-mixer/filters/muted_keyword_filter.rs index 39ae9a6..7db116c 100644 --- a/home-mixer/filters/muted_keyword_filter.rs +++ b/home-mixer/filters/muted_keyword_filter.rs @@ -26,18 +26,16 @@ impl Filter for MutedKeywordFilter { query: &ScoredPostsQuery, candidates: Vec, ) -> Result, String> { - if query.user_features.muted_keywords.is_empty() { + let muted_keywords = query.user_features.muted_keywords.clone(); + + if muted_keywords.is_empty() { return Ok(FilterResult { kept: candidates, removed: vec![], }); } - let tokenized = query - .user_features - .muted_keywords - .iter() - .map(|k| self.tokenizer.tokenize(k)); + let tokenized = muted_keywords.iter().map(|k| self.tokenizer.tokenize(k)); let token_sequences: Vec = tokenized.collect::>(); let user_mutes = UserMutes::new(token_sequences); let matcher = MatchTweetGroup::new(user_mutes); From a08c64809d55e1f730f5ab9b6dd300d183e59644 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 20 Jan 2026 07:52:46 +0000 Subject: [PATCH 3/5] Fix RetweetDeduplicationFilter to prevent duplicate content - Fix logic in RetweetDeduplicationFilter where original tweets were kept even if a retweet of them was already seen. - Add regression tests covering both ordering scenarios (Retweet then Original, Original then Retweet). From 6bf14685c632bf0964d3ff8c9dbd9f0cbf8ec534 Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 20 Jan 2026 08:08:52 +0000 Subject: [PATCH 4/5] Implement robust rank-based RetweetDeduplicationFilter - Replace simple deduplication with rank-based selection: - Prefer Valid Retweets (Rank 2) over Originals (Rank 1). - Prefer Originals (Rank 1) over Muted/Blocked Retweets (Rank 0). - Prevents loss of social context when Original arrives before Retweet. - Prevents content loss when Retweet is muted but Original is valid. - Added comprehensive unit tests for swapping and muted scenarios. --- .../filters/retweet_deduplication_filter.rs | 146 ++++++++++++------ 1 file changed, 96 insertions(+), 50 deletions(-) diff --git a/home-mixer/filters/retweet_deduplication_filter.rs b/home-mixer/filters/retweet_deduplication_filter.rs index 71c015a..3900ab4 100644 --- a/home-mixer/filters/retweet_deduplication_filter.rs +++ b/home-mixer/filters/retweet_deduplication_filter.rs @@ -1,42 +1,81 @@ use crate::candidate_pipeline::candidate::PostCandidate; use crate::candidate_pipeline::query::ScoredPostsQuery; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use tonic::async_trait; use xai_candidate_pipeline::filter::{Filter, FilterResult}; -/// Deduplicates retweets, keeping only the first occurrence of a tweet -/// (whether as an original or as a retweet). +/// Deduplicates retweets, keeping only the best version of a tweet +/// (e.g., preferring a valid Retweet over an Original). pub struct RetweetDeduplicationFilter; #[async_trait] impl Filter for RetweetDeduplicationFilter { async fn filter( &self, - _query: &ScoredPostsQuery, + query: &ScoredPostsQuery, candidates: Vec, ) -> Result, String> { - let mut seen_tweet_ids: HashSet = HashSet::new(); - let mut kept = Vec::new(); - let mut removed = Vec::new(); + // Build sets for fast lookup of muted/blocked users + let blocked_ids: HashSet = query + .user_features + .blocked_user_ids + .iter() + .cloned() + .collect(); + let muted_ids: HashSet = query + .user_features + .muted_user_ids + .iter() + .cloned() + .collect(); + + let is_valid_author = |author_id: u64| -> bool { + let id = author_id as i64; + !blocked_ids.contains(&id) && !muted_ids.contains(&id) + }; + + // Rank function: + // 0: Invalid (Muted/Blocked) + // 1: Original + // 2: Retweet (Valid) + let get_rank = |c: &PostCandidate| -> u8 { + if !is_valid_author(c.author_id) { + 0 + } else if c.retweeted_tweet_id.is_some() { + 2 + } else { + 1 + } + }; + + // Map: Canonical Tweet ID -> Index in `kept` vector + let mut best_candidates: HashMap = HashMap::new(); + let mut kept: Vec = Vec::new(); + let mut removed: Vec = Vec::new(); for candidate in candidates { - match candidate.retweeted_tweet_id { - Some(retweeted_id) => { - // Remove if we've already seen this tweet (as original or retweet) - if seen_tweet_ids.insert(retweeted_id) { - kept.push(candidate); - } else { - removed.push(candidate); - } - } - None => { - // Mark this original tweet ID as seen so retweets of it get filtered - if seen_tweet_ids.insert(candidate.tweet_id as u64) { - kept.push(candidate); - } else { - removed.push(candidate); - } + let canonical_id = candidate + .retweeted_tweet_id + .unwrap_or(candidate.tweet_id as u64); + + let current_rank = get_rank(&candidate); + + if let Some(&existing_idx) = best_candidates.get(&canonical_id) { + let existing_rank = get_rank(&kept[existing_idx]); + + if current_rank > existing_rank { + // Current is better: Swap + // Move the previously kept candidate to removed + let previous = std::mem::replace(&mut kept[existing_idx], candidate); + removed.push(previous); + } else { + // Existing is better or equal: Drop current + removed.push(candidate); } + } else { + // New canonical ID + best_candidates.insert(canonical_id, kept.len()); + kept.push(candidate); } } @@ -49,64 +88,71 @@ mod tests { use super::*; use crate::candidate_pipeline::candidate::PostCandidate; use crate::candidate_pipeline::query::ScoredPostsQuery; + use crate::candidate_pipeline::query_features::UserFeatures; #[tokio::test] - async fn test_retweet_deduplication_filter_removes_original_if_retweet_seen_first() { + async fn test_prefers_retweet_over_original() { let filter = RetweetDeduplicationFilter; let query = ScoredPostsQuery::default(); - let retweet_candidate = PostCandidate { - tweet_id: 100, - retweeted_tweet_id: Some(200), - ..Default::default() - }; - - let original_candidate = PostCandidate { + let original = PostCandidate { tweet_id: 200, retweeted_tweet_id: None, + author_id: 1, ..Default::default() }; - let candidates = vec![retweet_candidate, original_candidate]; + let retweet = PostCandidate { + tweet_id: 100, + retweeted_tweet_id: Some(200), + author_id: 2, + ..Default::default() + }; + // Case 1: Original then Retweet + let candidates = vec![original.clone(), retweet.clone()]; let result = filter.filter(&query, candidates).await.unwrap(); - - // Should keep the retweet (first occurrence of 200) assert_eq!(result.kept.len(), 1); - assert_eq!(result.kept[0].tweet_id, 100); + assert_eq!(result.kept[0].tweet_id, 100); // Kept Retweet (swapped) - // Should remove the original tweet (second occurrence of 200) - assert_eq!(result.removed.len(), 1); - assert_eq!(result.removed[0].tweet_id, 200); + // Case 2: Retweet then Original + let candidates = vec![retweet.clone(), original.clone()]; + let result = filter.filter(&query, candidates).await.unwrap(); + assert_eq!(result.kept.len(), 1); + assert_eq!(result.kept[0].tweet_id, 100); // Kept Retweet (original dropped) } #[tokio::test] - async fn test_retweet_deduplication_filter_removes_retweet_if_original_seen_first() { + async fn test_prefers_original_over_muted_retweet() { let filter = RetweetDeduplicationFilter; - let query = ScoredPostsQuery::default(); + let mut query = ScoredPostsQuery::default(); + // Mute User 2 (Retweeter) + query.user_features.muted_user_ids = vec![2]; - let original_candidate = PostCandidate { + let original = PostCandidate { tweet_id: 200, retweeted_tweet_id: None, + author_id: 1, // Valid ..Default::default() }; - let retweet_candidate = PostCandidate { + let muted_retweet = PostCandidate { tweet_id: 100, retweeted_tweet_id: Some(200), + author_id: 2, // Muted ..Default::default() }; - let candidates = vec![original_candidate, retweet_candidate]; - + // Case 1: Original then Muted Retweet + let candidates = vec![original.clone(), muted_retweet.clone()]; let result = filter.filter(&query, candidates).await.unwrap(); - - // Should keep the original tweet (first occurrence of 200) assert_eq!(result.kept.len(), 1); - assert_eq!(result.kept[0].tweet_id, 200); + assert_eq!(result.kept[0].tweet_id, 200); // Kept Original (Rank 1 > Rank 0) - // Should remove the retweet (second occurrence of 200) - assert_eq!(result.removed.len(), 1); - assert_eq!(result.removed[0].tweet_id, 100); + // Case 2: Muted Retweet then Original + let candidates = vec![muted_retweet.clone(), original.clone()]; + let result = filter.filter(&query, candidates).await.unwrap(); + assert_eq!(result.kept.len(), 1); + assert_eq!(result.kept[0].tweet_id, 200); // Kept Original (Swapped Rank 0 with Rank 1) } } From 4ffa2cefc05d24e6ffdd5b18cf2390e97a30e47d Mon Sep 17 00:00:00 2001 From: "google-labs-jules[bot]" <161369871+google-labs-jules[bot]@users.noreply.github.com> Date: Tue, 20 Jan 2026 08:12:29 +0000 Subject: [PATCH 5/5] Revert robust ranking logic, keep simple bugfix - Reverted RetweetDeduplicationFilter to the version that simply checks the return value of `insert` to prevent duplicates. - Removed rank-based logic and complexity. - Retained original unit tests for the bugfix. --- .../filters/retweet_deduplication_filter.rs | 146 ++++++------------ 1 file changed, 50 insertions(+), 96 deletions(-) diff --git a/home-mixer/filters/retweet_deduplication_filter.rs b/home-mixer/filters/retweet_deduplication_filter.rs index 3900ab4..71c015a 100644 --- a/home-mixer/filters/retweet_deduplication_filter.rs +++ b/home-mixer/filters/retweet_deduplication_filter.rs @@ -1,81 +1,42 @@ use crate::candidate_pipeline::candidate::PostCandidate; use crate::candidate_pipeline::query::ScoredPostsQuery; -use std::collections::{HashMap, HashSet}; +use std::collections::HashSet; use tonic::async_trait; use xai_candidate_pipeline::filter::{Filter, FilterResult}; -/// Deduplicates retweets, keeping only the best version of a tweet -/// (e.g., preferring a valid Retweet over an Original). +/// Deduplicates retweets, keeping only the first occurrence of a tweet +/// (whether as an original or as a retweet). pub struct RetweetDeduplicationFilter; #[async_trait] impl Filter for RetweetDeduplicationFilter { async fn filter( &self, - query: &ScoredPostsQuery, + _query: &ScoredPostsQuery, candidates: Vec, ) -> Result, String> { - // Build sets for fast lookup of muted/blocked users - let blocked_ids: HashSet = query - .user_features - .blocked_user_ids - .iter() - .cloned() - .collect(); - let muted_ids: HashSet = query - .user_features - .muted_user_ids - .iter() - .cloned() - .collect(); - - let is_valid_author = |author_id: u64| -> bool { - let id = author_id as i64; - !blocked_ids.contains(&id) && !muted_ids.contains(&id) - }; - - // Rank function: - // 0: Invalid (Muted/Blocked) - // 1: Original - // 2: Retweet (Valid) - let get_rank = |c: &PostCandidate| -> u8 { - if !is_valid_author(c.author_id) { - 0 - } else if c.retweeted_tweet_id.is_some() { - 2 - } else { - 1 - } - }; - - // Map: Canonical Tweet ID -> Index in `kept` vector - let mut best_candidates: HashMap = HashMap::new(); - let mut kept: Vec = Vec::new(); - let mut removed: Vec = Vec::new(); + let mut seen_tweet_ids: HashSet = HashSet::new(); + let mut kept = Vec::new(); + let mut removed = Vec::new(); for candidate in candidates { - let canonical_id = candidate - .retweeted_tweet_id - .unwrap_or(candidate.tweet_id as u64); - - let current_rank = get_rank(&candidate); - - if let Some(&existing_idx) = best_candidates.get(&canonical_id) { - let existing_rank = get_rank(&kept[existing_idx]); - - if current_rank > existing_rank { - // Current is better: Swap - // Move the previously kept candidate to removed - let previous = std::mem::replace(&mut kept[existing_idx], candidate); - removed.push(previous); - } else { - // Existing is better or equal: Drop current - removed.push(candidate); + match candidate.retweeted_tweet_id { + Some(retweeted_id) => { + // Remove if we've already seen this tweet (as original or retweet) + if seen_tweet_ids.insert(retweeted_id) { + kept.push(candidate); + } else { + removed.push(candidate); + } + } + None => { + // Mark this original tweet ID as seen so retweets of it get filtered + if seen_tweet_ids.insert(candidate.tweet_id as u64) { + kept.push(candidate); + } else { + removed.push(candidate); + } } - } else { - // New canonical ID - best_candidates.insert(canonical_id, kept.len()); - kept.push(candidate); } } @@ -88,71 +49,64 @@ mod tests { use super::*; use crate::candidate_pipeline::candidate::PostCandidate; use crate::candidate_pipeline::query::ScoredPostsQuery; - use crate::candidate_pipeline::query_features::UserFeatures; #[tokio::test] - async fn test_prefers_retweet_over_original() { + async fn test_retweet_deduplication_filter_removes_original_if_retweet_seen_first() { let filter = RetweetDeduplicationFilter; let query = ScoredPostsQuery::default(); - let original = PostCandidate { - tweet_id: 200, - retweeted_tweet_id: None, - author_id: 1, + let retweet_candidate = PostCandidate { + tweet_id: 100, + retweeted_tweet_id: Some(200), ..Default::default() }; - let retweet = PostCandidate { - tweet_id: 100, - retweeted_tweet_id: Some(200), - author_id: 2, + let original_candidate = PostCandidate { + tweet_id: 200, + retweeted_tweet_id: None, ..Default::default() }; - // Case 1: Original then Retweet - let candidates = vec![original.clone(), retweet.clone()]; - let result = filter.filter(&query, candidates).await.unwrap(); - assert_eq!(result.kept.len(), 1); - assert_eq!(result.kept[0].tweet_id, 100); // Kept Retweet (swapped) + let candidates = vec![retweet_candidate, original_candidate]; - // Case 2: Retweet then Original - let candidates = vec![retweet.clone(), original.clone()]; let result = filter.filter(&query, candidates).await.unwrap(); + + // Should keep the retweet (first occurrence of 200) assert_eq!(result.kept.len(), 1); - assert_eq!(result.kept[0].tweet_id, 100); // Kept Retweet (original dropped) + assert_eq!(result.kept[0].tweet_id, 100); + + // Should remove the original tweet (second occurrence of 200) + assert_eq!(result.removed.len(), 1); + assert_eq!(result.removed[0].tweet_id, 200); } #[tokio::test] - async fn test_prefers_original_over_muted_retweet() { + async fn test_retweet_deduplication_filter_removes_retweet_if_original_seen_first() { let filter = RetweetDeduplicationFilter; - let mut query = ScoredPostsQuery::default(); - // Mute User 2 (Retweeter) - query.user_features.muted_user_ids = vec![2]; + let query = ScoredPostsQuery::default(); - let original = PostCandidate { + let original_candidate = PostCandidate { tweet_id: 200, retweeted_tweet_id: None, - author_id: 1, // Valid ..Default::default() }; - let muted_retweet = PostCandidate { + let retweet_candidate = PostCandidate { tweet_id: 100, retweeted_tweet_id: Some(200), - author_id: 2, // Muted ..Default::default() }; - // Case 1: Original then Muted Retweet - let candidates = vec![original.clone(), muted_retweet.clone()]; - let result = filter.filter(&query, candidates).await.unwrap(); - assert_eq!(result.kept.len(), 1); - assert_eq!(result.kept[0].tweet_id, 200); // Kept Original (Rank 1 > Rank 0) + let candidates = vec![original_candidate, retweet_candidate]; - // Case 2: Muted Retweet then Original - let candidates = vec![muted_retweet.clone(), original.clone()]; let result = filter.filter(&query, candidates).await.unwrap(); + + // Should keep the original tweet (first occurrence of 200) assert_eq!(result.kept.len(), 1); - assert_eq!(result.kept[0].tweet_id, 200); // Kept Original (Swapped Rank 0 with Rank 1) + assert_eq!(result.kept[0].tweet_id, 200); + + // Should remove the retweet (second occurrence of 200) + assert_eq!(result.removed.len(), 1); + assert_eq!(result.removed[0].tweet_id, 100); } }