inputSchema
{
"type": "object",
"required": [
"seedUrl"
],
"properties": {
"seedUrl": {
"type": "string",
"description": "HTTP(S) page URL; host must be in network.allowedDomains (whitelisted govt sites)."
},
"maxPages": {
"type": "integer",
"description": "Max pages to fetch including the seed, when followPagination. Default 1, max 50."
},
"linkScope": {
"enum": [
"default",
"single_page_downloadable"
],
"type": "string",
"description": "single_page: one GET (PBOC still follows details); disables pagination and nested for other sites. default: full rules below."
},
"collectPdf": {
"type": "boolean",
"description": "Include .pdf in items. Default true."
},
"maxTotalFetches": {
"type": "integer",
"description": "Max HTTP GETs per invocation. Default 200, max 200."
},
"nestedLinkDepth": {
"type": "integer",
"description": "0, 1, or 2. Same-origin navigable hrefs: fetch 1 or 2 levels of child pages to collect more file links. Default 0."
},
"followPagination": {
"type": "boolean",
"description": "If true, try to follow rel=next, next link, 下一页; maxPages; SPA may not work."
},
"followDetailPages": {
"type": "boolean",
"description": "When true (default): PBOC, CSRC, SAFE follow article links from the list into detail pages to collect file links. When false: only file-like hrefs on the list HTML (often few on CSRC/SAFE list pages)."
},
"maxNestedUrlsPerLevel": {
"type": "integer",
"description": "Cap of distinct URLs to follow per nested level. Default 12, max 40."
}
}
}
outputSchema
{
"type": "object",
"required": [
"site",
"items"
],
"properties": {
"site": {
"type": "string"
},
"error": {
"type": "string"
},
"items": {
"type": "array",
"items": {
"type": "object",
"required": [
"url",
"title"
],
"properties": {
"url": {
"type": "string"
},
"title": {
"type": "string"
}
}
}
}
}
}