Skip to content

Commit 606bc59

Browse files
leozejia泽加武jackwener
authored
fix(jianyu): stabilize search and add detail extraction contract (#912)
* fix(jianyu): stabilize search and add detail extraction contract * fix(jianyu): require query evidence for search results --------- Co-authored-by: 泽加武 <zejiawu@zejiawudeMac-mini.local> Co-authored-by: jackwener <jakevingoo@gmail.com>
1 parent 2ddf571 commit 606bc59

File tree

9 files changed

+1507
-110
lines changed

9 files changed

+1507
-110
lines changed

clis/jianyu/detail.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
import { cli, Strategy } from '@jackwener/opencli/registry';
2+
import { runProcurementDetail } from './shared/procurement-detail.js';
3+
4+
cli({
5+
site: 'jianyu',
6+
name: 'detail',
7+
description: '读取剑鱼标讯详情页并抽取证据字段',
8+
domain: 'www.jianyu360.cn',
9+
strategy: Strategy.COOKIE,
10+
browser: true,
11+
args: [
12+
{ name: 'url', required: true, positional: true, help: 'Detail page URL from jianyu/search' },
13+
{ name: 'query', help: 'Optional query for evidence ranking' },
14+
],
15+
columns: ['title', 'publish_time', 'content_type', 'project_code', 'budget_or_limit', 'deadline_or_open_time', 'url'],
16+
func: async (page, kwargs) => runProcurementDetail(page, {
17+
url: kwargs.url,
18+
query: kwargs.query,
19+
site: 'jianyu',
20+
}),
21+
});

clis/jianyu/search.test.ts

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@ import { describe, expect, it } from 'vitest';
22
import { __test__ } from './search.js';
33

44
describe('jianyu search helpers', () => {
5+
it('builds candidate URLs with supsearch as preferred entry', () => {
6+
const candidates = __test__.buildSearchCandidates('procurement');
7+
expect(candidates[0]).toContain('/jylab/supsearch/index.html');
8+
expect(candidates[0]).toContain('keywords=procurement');
9+
expect(candidates[0]).toContain('selectType=title');
10+
expect(candidates[0]).toContain('searchGroup=1');
11+
});
12+
513
it('builds supsearch URL with required query params', () => {
614
const url = __test__.buildSearchUrl('procurement');
715
expect(url).toContain('keywords=procurement');
@@ -23,4 +31,59 @@ describe('jianyu search helpers', () => {
2331
]);
2432
expect(deduped).toHaveLength(2);
2533
});
34+
35+
it('filters obvious navigation rows before quality gate', () => {
36+
const filtered = __test__.filterNavigationRows('电梯', [
37+
{ title: '招标公告', url: 'https://www.jianyu360.cn/list/stype/ZBGG.html', date: '' },
38+
{ title: '帮助中心', url: 'https://www.jianyu360.cn/helpCenter/index', date: '' },
39+
{ title: '某项目电梯采购公告', url: 'https://www.jianyu360.cn/notice/detail/123', date: '2026-04-07' },
40+
]);
41+
expect(filtered).toHaveLength(1);
42+
expect(filtered[0].title).toContain('电梯采购公告');
43+
});
44+
45+
it('rejects procurement rows that do not contain query evidence', () => {
46+
const filtered = __test__.filterNavigationRows('电梯', [
47+
{
48+
title: '某项目采购公告',
49+
url: 'https://www.jianyu360.cn/notice/detail/123',
50+
date: '2026-04-07',
51+
contextText: '招标公告 项目编号:ABC-123',
52+
},
53+
]);
54+
expect(filtered).toHaveLength(0);
55+
});
56+
57+
it('parses search-index markdown headings', () => {
58+
const rows = __test__.parseSearchIndexMarkdown(`
59+
## [标题一](http://duckduckgo.com/l/?uddg=https%3A%2F%2Fbeijing.jianyu360.cn%2Fjybx%2F20260401_26033143187897.html)
60+
## [标题二](https://www.jianyu360.cn/nologin/content/ABC.html)
61+
`);
62+
expect(rows).toHaveLength(2);
63+
expect(rows[0].title).toBe('标题一');
64+
expect(rows[1].url).toContain('jianyu360.cn/nologin/content');
65+
});
66+
67+
it('unwraps duckduckgo redirect links', () => {
68+
const direct = __test__.unwrapDuckDuckGoUrl('https://duckduckgo.com/l/?uddg=https%3A%2F%2Fwww.jianyu360.cn%2Fnologin%2Fcontent%2FXYZ.html');
69+
expect(direct).toBe('https://www.jianyu360.cn/nologin/content/XYZ.html');
70+
});
71+
72+
it('extracts publish date from jianyu jybx urls', () => {
73+
const date = __test__.extractDateFromJianyuUrl('https://shandong.jianyu360.cn/jybx/20260310_26030938267551.html');
74+
expect(date).toBe('2026-03-10');
75+
});
76+
77+
it('normalizes api payload rows with fallback url/title fields', () => {
78+
const normalized = __test__.normalizeApiRow({
79+
noticeTitle: '某项目电梯采购公告',
80+
detailUrl: '/jybx/20260310_26030938267551.html',
81+
publishTime: '2026-03-10 09:00:00',
82+
buyer: '测试单位',
83+
});
84+
expect(normalized).toBeTruthy();
85+
expect(normalized?.title).toContain('电梯采购公告');
86+
expect(normalized?.url).toContain('/jybx/20260310_26030938267551.html');
87+
expect(normalized?.date).toBe('2026-03-10');
88+
});
2689
});

0 commit comments

Comments
 (0)