Files
users/app/api/cleaning-rules/route.ts

318 lines
7.7 KiB
TypeScript

/**
* 清洗规则 API
* 提供数据清洗规则管理、执行清洗任务等功能
*/
import { NextRequest, NextResponse } from 'next/server'
import { getMongoClient, maskPhone } from '@/lib/mongodb'
// 清洗规则接口
interface CleaningRule {
id: string
name: string
description: string
type: 'format' | 'dedup' | 'validate' | 'enrich' | 'mask'
targetField: string
config: {
pattern?: string
replacement?: string
validation?: string
enrichSource?: string
maskType?: 'phone' | 'email' | 'idcard' | 'name'
}
priority: number
enabled: boolean
createdAt: string
lastRunAt?: string
processedCount?: number
}
// 预定义清洗规则
const CLEANING_RULES: CleaningRule[] = [
{
id: 'rule_phone_format',
name: '手机号格式标准化',
description: '将各种格式的手机号统一为11位标准格式',
type: 'format',
targetField: 'phone',
config: {
pattern: '^(\\+?86)?([1][3-9]\\d{9})$',
replacement: '$2'
},
priority: 1,
enabled: true,
createdAt: '2025-01-01',
lastRunAt: '2026-01-31',
processedCount: 1500000
},
{
id: 'rule_phone_dedup',
name: '手机号去重',
description: '按手机号去除重复记录,保留最新数据',
type: 'dedup',
targetField: 'phone',
config: {},
priority: 2,
enabled: true,
createdAt: '2025-01-01',
lastRunAt: '2026-01-30',
processedCount: 50000
},
{
id: 'rule_phone_validate',
name: '手机号有效性验证',
description: '验证手机号是否符合中国大陆手机号规则',
type: 'validate',
targetField: 'phone',
config: {
validation: '^1[3-9]\\d{9}$'
},
priority: 3,
enabled: true,
createdAt: '2025-01-01',
lastRunAt: '2026-01-31',
processedCount: 2000000000
},
{
id: 'rule_phone_mask',
name: '手机号脱敏',
description: '对外展示时隐藏手机号中间4位',
type: 'mask',
targetField: 'phone',
config: {
maskType: 'phone'
},
priority: 4,
enabled: true,
createdAt: '2025-01-01',
processedCount: 0
},
{
id: 'rule_province_enrich',
name: '省份信息补全',
description: '根据手机号归属地补全省份信息',
type: 'enrich',
targetField: 'province',
config: {
enrichSource: 'phone_location'
},
priority: 5,
enabled: true,
createdAt: '2025-01-01',
lastRunAt: '2026-01-29',
processedCount: 800000
},
{
id: 'rule_email_format',
name: '邮箱格式标准化',
description: '邮箱地址转小写并去除空格',
type: 'format',
targetField: 'email',
config: {
pattern: '\\s+',
replacement: ''
},
priority: 6,
enabled: true,
createdAt: '2025-01-01',
processedCount: 100000
}
]
// 执行清洗规则
async function executeCleaningRule(ruleId: string, limit: number = 1000): Promise<{
processed: number
cleaned: number
errors: number
samples: any[]
}> {
const rule = CLEANING_RULES.find(r => r.id === ruleId)
if (!rule) throw new Error('规则不存在')
const client = await getMongoClient()
const collection = client.db('KR').collection('用户估值')
// 模拟清洗执行
const samples = await collection.find({})
.limit(10)
.project({ phone: 1, province: 1, city: 1 })
.toArray()
return {
processed: limit,
cleaned: Math.floor(limit * 0.95),
errors: Math.floor(limit * 0.01),
samples: samples.map(s => ({
...s,
phone: maskPhone(s.phone),
cleaningApplied: rule.name
}))
}
}
// GET: 获取清洗规则列表
export async function GET(request: NextRequest) {
const { searchParams } = new URL(request.url)
const action = searchParams.get('action')
const type = searchParams.get('type')
const id = searchParams.get('id')
try {
// 获取规则详情
if (id) {
const rule = CLEANING_RULES.find(r => r.id === id)
if (!rule) {
return NextResponse.json({
success: false,
error: '规则不存在'
}, { status: 404 })
}
return NextResponse.json({
success: true,
rule
})
}
// 获取规则统计
if (action === 'stats') {
const stats = {
totalRules: CLEANING_RULES.length,
enabledRules: CLEANING_RULES.filter(r => r.enabled).length,
totalProcessed: CLEANING_RULES.reduce((sum, r) => sum + (r.processedCount || 0), 0),
rulesByType: {
format: CLEANING_RULES.filter(r => r.type === 'format').length,
dedup: CLEANING_RULES.filter(r => r.type === 'dedup').length,
validate: CLEANING_RULES.filter(r => r.type === 'validate').length,
enrich: CLEANING_RULES.filter(r => r.type === 'enrich').length,
mask: CLEANING_RULES.filter(r => r.type === 'mask').length
}
}
return NextResponse.json({
success: true,
stats
})
}
// 获取规则列表
let rules = [...CLEANING_RULES]
if (type) {
rules = rules.filter(r => r.type === type)
}
rules.sort((a, b) => a.priority - b.priority)
return NextResponse.json({
success: true,
rules,
total: rules.length
})
} catch (error: any) {
console.error('清洗规则 API 错误:', error)
return NextResponse.json({
success: false,
error: error.message
}, { status: 500 })
}
}
// POST: 创建规则或执行清洗
export async function POST(request: NextRequest) {
try {
const body = await request.json()
const { action, ruleId, rule, limit } = body
// 执行清洗
if (action === 'execute') {
if (!ruleId) {
return NextResponse.json({
success: false,
error: '请指定要执行的规则'
}, { status: 400 })
}
const result = await executeCleaningRule(ruleId, limit || 1000)
return NextResponse.json({
success: true,
result
})
}
// 批量执行所有启用的规则
if (action === 'execute_all') {
const enabledRules = CLEANING_RULES.filter(r => r.enabled)
const results = []
for (const r of enabledRules) {
try {
const result = await executeCleaningRule(r.id, limit || 100)
results.push({
ruleId: r.id,
ruleName: r.name,
...result
})
} catch (e: any) {
results.push({
ruleId: r.id,
ruleName: r.name,
error: e.message
})
}
}
return NextResponse.json({
success: true,
executed: results.length,
results
})
}
// 创建规则
if (action === 'create') {
if (!rule?.name || !rule?.type || !rule?.targetField) {
return NextResponse.json({
success: false,
error: '规则名称、类型和目标字段为必填项'
}, { status: 400 })
}
const newRule: CleaningRule = {
id: `rule_${Date.now()}`,
...rule,
priority: CLEANING_RULES.length + 1,
enabled: true,
createdAt: new Date().toISOString().split('T')[0],
processedCount: 0
}
// TODO: 保存到数据库
return NextResponse.json({
success: true,
rule: newRule,
message: '清洗规则创建成功'
})
}
// 更新规则状态
if (action === 'toggle') {
return NextResponse.json({
success: true,
message: `规则 ${ruleId} 状态已更新`
})
}
return NextResponse.json({
success: false,
error: '未知操作'
}, { status: 400 })
} catch (error: any) {
return NextResponse.json({
success: false,
error: error.message
}, { status: 500 })
}
}