318 lines
7.7 KiB
TypeScript
318 lines
7.7 KiB
TypeScript
/**
|
|
* 清洗规则 API
|
|
* 提供数据清洗规则管理、执行清洗任务等功能
|
|
*/
|
|
|
|
import { NextRequest, NextResponse } from 'next/server'
|
|
import { getMongoClient, maskPhone } from '@/lib/mongodb'
|
|
|
|
// 清洗规则接口
|
|
interface CleaningRule {
|
|
id: string
|
|
name: string
|
|
description: string
|
|
type: 'format' | 'dedup' | 'validate' | 'enrich' | 'mask'
|
|
targetField: string
|
|
config: {
|
|
pattern?: string
|
|
replacement?: string
|
|
validation?: string
|
|
enrichSource?: string
|
|
maskType?: 'phone' | 'email' | 'idcard' | 'name'
|
|
}
|
|
priority: number
|
|
enabled: boolean
|
|
createdAt: string
|
|
lastRunAt?: string
|
|
processedCount?: number
|
|
}
|
|
|
|
// 预定义清洗规则
|
|
const CLEANING_RULES: CleaningRule[] = [
|
|
{
|
|
id: 'rule_phone_format',
|
|
name: '手机号格式标准化',
|
|
description: '将各种格式的手机号统一为11位标准格式',
|
|
type: 'format',
|
|
targetField: 'phone',
|
|
config: {
|
|
pattern: '^(\\+?86)?([1][3-9]\\d{9})$',
|
|
replacement: '$2'
|
|
},
|
|
priority: 1,
|
|
enabled: true,
|
|
createdAt: '2025-01-01',
|
|
lastRunAt: '2026-01-31',
|
|
processedCount: 1500000
|
|
},
|
|
{
|
|
id: 'rule_phone_dedup',
|
|
name: '手机号去重',
|
|
description: '按手机号去除重复记录,保留最新数据',
|
|
type: 'dedup',
|
|
targetField: 'phone',
|
|
config: {},
|
|
priority: 2,
|
|
enabled: true,
|
|
createdAt: '2025-01-01',
|
|
lastRunAt: '2026-01-30',
|
|
processedCount: 50000
|
|
},
|
|
{
|
|
id: 'rule_phone_validate',
|
|
name: '手机号有效性验证',
|
|
description: '验证手机号是否符合中国大陆手机号规则',
|
|
type: 'validate',
|
|
targetField: 'phone',
|
|
config: {
|
|
validation: '^1[3-9]\\d{9}$'
|
|
},
|
|
priority: 3,
|
|
enabled: true,
|
|
createdAt: '2025-01-01',
|
|
lastRunAt: '2026-01-31',
|
|
processedCount: 2000000000
|
|
},
|
|
{
|
|
id: 'rule_phone_mask',
|
|
name: '手机号脱敏',
|
|
description: '对外展示时隐藏手机号中间4位',
|
|
type: 'mask',
|
|
targetField: 'phone',
|
|
config: {
|
|
maskType: 'phone'
|
|
},
|
|
priority: 4,
|
|
enabled: true,
|
|
createdAt: '2025-01-01',
|
|
processedCount: 0
|
|
},
|
|
{
|
|
id: 'rule_province_enrich',
|
|
name: '省份信息补全',
|
|
description: '根据手机号归属地补全省份信息',
|
|
type: 'enrich',
|
|
targetField: 'province',
|
|
config: {
|
|
enrichSource: 'phone_location'
|
|
},
|
|
priority: 5,
|
|
enabled: true,
|
|
createdAt: '2025-01-01',
|
|
lastRunAt: '2026-01-29',
|
|
processedCount: 800000
|
|
},
|
|
{
|
|
id: 'rule_email_format',
|
|
name: '邮箱格式标准化',
|
|
description: '邮箱地址转小写并去除空格',
|
|
type: 'format',
|
|
targetField: 'email',
|
|
config: {
|
|
pattern: '\\s+',
|
|
replacement: ''
|
|
},
|
|
priority: 6,
|
|
enabled: true,
|
|
createdAt: '2025-01-01',
|
|
processedCount: 100000
|
|
}
|
|
]
|
|
|
|
// 执行清洗规则
|
|
async function executeCleaningRule(ruleId: string, limit: number = 1000): Promise<{
|
|
processed: number
|
|
cleaned: number
|
|
errors: number
|
|
samples: any[]
|
|
}> {
|
|
const rule = CLEANING_RULES.find(r => r.id === ruleId)
|
|
if (!rule) throw new Error('规则不存在')
|
|
|
|
const client = await getMongoClient()
|
|
const collection = client.db('KR').collection('用户估值')
|
|
|
|
// 模拟清洗执行
|
|
const samples = await collection.find({})
|
|
.limit(10)
|
|
.project({ phone: 1, province: 1, city: 1 })
|
|
.toArray()
|
|
|
|
return {
|
|
processed: limit,
|
|
cleaned: Math.floor(limit * 0.95),
|
|
errors: Math.floor(limit * 0.01),
|
|
samples: samples.map(s => ({
|
|
...s,
|
|
phone: maskPhone(s.phone),
|
|
cleaningApplied: rule.name
|
|
}))
|
|
}
|
|
}
|
|
|
|
// GET: 获取清洗规则列表
|
|
export async function GET(request: NextRequest) {
|
|
const { searchParams } = new URL(request.url)
|
|
const action = searchParams.get('action')
|
|
const type = searchParams.get('type')
|
|
const id = searchParams.get('id')
|
|
|
|
try {
|
|
// 获取规则详情
|
|
if (id) {
|
|
const rule = CLEANING_RULES.find(r => r.id === id)
|
|
if (!rule) {
|
|
return NextResponse.json({
|
|
success: false,
|
|
error: '规则不存在'
|
|
}, { status: 404 })
|
|
}
|
|
return NextResponse.json({
|
|
success: true,
|
|
rule
|
|
})
|
|
}
|
|
|
|
// 获取规则统计
|
|
if (action === 'stats') {
|
|
const stats = {
|
|
totalRules: CLEANING_RULES.length,
|
|
enabledRules: CLEANING_RULES.filter(r => r.enabled).length,
|
|
totalProcessed: CLEANING_RULES.reduce((sum, r) => sum + (r.processedCount || 0), 0),
|
|
rulesByType: {
|
|
format: CLEANING_RULES.filter(r => r.type === 'format').length,
|
|
dedup: CLEANING_RULES.filter(r => r.type === 'dedup').length,
|
|
validate: CLEANING_RULES.filter(r => r.type === 'validate').length,
|
|
enrich: CLEANING_RULES.filter(r => r.type === 'enrich').length,
|
|
mask: CLEANING_RULES.filter(r => r.type === 'mask').length
|
|
}
|
|
}
|
|
return NextResponse.json({
|
|
success: true,
|
|
stats
|
|
})
|
|
}
|
|
|
|
// 获取规则列表
|
|
let rules = [...CLEANING_RULES]
|
|
|
|
if (type) {
|
|
rules = rules.filter(r => r.type === type)
|
|
}
|
|
|
|
rules.sort((a, b) => a.priority - b.priority)
|
|
|
|
return NextResponse.json({
|
|
success: true,
|
|
rules,
|
|
total: rules.length
|
|
})
|
|
|
|
} catch (error: any) {
|
|
console.error('清洗规则 API 错误:', error)
|
|
return NextResponse.json({
|
|
success: false,
|
|
error: error.message
|
|
}, { status: 500 })
|
|
}
|
|
}
|
|
|
|
// POST: 创建规则或执行清洗
|
|
export async function POST(request: NextRequest) {
|
|
try {
|
|
const body = await request.json()
|
|
const { action, ruleId, rule, limit } = body
|
|
|
|
// 执行清洗
|
|
if (action === 'execute') {
|
|
if (!ruleId) {
|
|
return NextResponse.json({
|
|
success: false,
|
|
error: '请指定要执行的规则'
|
|
}, { status: 400 })
|
|
}
|
|
|
|
const result = await executeCleaningRule(ruleId, limit || 1000)
|
|
return NextResponse.json({
|
|
success: true,
|
|
result
|
|
})
|
|
}
|
|
|
|
// 批量执行所有启用的规则
|
|
if (action === 'execute_all') {
|
|
const enabledRules = CLEANING_RULES.filter(r => r.enabled)
|
|
const results = []
|
|
|
|
for (const r of enabledRules) {
|
|
try {
|
|
const result = await executeCleaningRule(r.id, limit || 100)
|
|
results.push({
|
|
ruleId: r.id,
|
|
ruleName: r.name,
|
|
...result
|
|
})
|
|
} catch (e: any) {
|
|
results.push({
|
|
ruleId: r.id,
|
|
ruleName: r.name,
|
|
error: e.message
|
|
})
|
|
}
|
|
}
|
|
|
|
return NextResponse.json({
|
|
success: true,
|
|
executed: results.length,
|
|
results
|
|
})
|
|
}
|
|
|
|
// 创建规则
|
|
if (action === 'create') {
|
|
if (!rule?.name || !rule?.type || !rule?.targetField) {
|
|
return NextResponse.json({
|
|
success: false,
|
|
error: '规则名称、类型和目标字段为必填项'
|
|
}, { status: 400 })
|
|
}
|
|
|
|
const newRule: CleaningRule = {
|
|
id: `rule_${Date.now()}`,
|
|
...rule,
|
|
priority: CLEANING_RULES.length + 1,
|
|
enabled: true,
|
|
createdAt: new Date().toISOString().split('T')[0],
|
|
processedCount: 0
|
|
}
|
|
|
|
// TODO: 保存到数据库
|
|
|
|
return NextResponse.json({
|
|
success: true,
|
|
rule: newRule,
|
|
message: '清洗规则创建成功'
|
|
})
|
|
}
|
|
|
|
// 更新规则状态
|
|
if (action === 'toggle') {
|
|
return NextResponse.json({
|
|
success: true,
|
|
message: `规则 ${ruleId} 状态已更新`
|
|
})
|
|
}
|
|
|
|
return NextResponse.json({
|
|
success: false,
|
|
error: '未知操作'
|
|
}, { status: 400 })
|
|
|
|
} catch (error: any) {
|
|
return NextResponse.json({
|
|
success: false,
|
|
error: error.message
|
|
}, { status: 500 })
|
|
}
|
|
}
|