Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

“增加“历史数据清理”脚本” #1112

Merged
merged 1 commit into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions server/configs/django/local_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = os.environ.get("ANALYSIS_SECRET_KEY", "25n=e*_e=4q!ert$4u#9v&^2n+)_#mi7&7ll@x29@j=w=k^q@^")

# 设置默认主键类型,适配django3.2
DEFAULT_AUTO_FIELD = 'django.db.models.AutoField'

# 数据库配置,可参考django数据库配置
DATABASES = {
"default": {
Expand Down
3 changes: 3 additions & 0 deletions server/configs/django/local_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = os.environ.get("MAIN_SECRET_KEY", "lh+6y8pyf16bbor*)p=kp=p(cg615+y+5nnin$l(n%os$8z^v%")

# 设置默认主键类型,适配django3.2
DEFAULT_AUTO_FIELD = 'django.db.models.AutoField'

# 数据库配置,可参考django数据库配置
DATABASES = {
"default": {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2021-2024 THL A29 Limited
#
# This source code file is made available under MIT License
# See LICENSE for details
# ==============================================================================

"""
历史数据清理脚本
用于清理已被删除且过期的历史数据,包括以下内容:
1、分批次清理数据量较大的表。
2、剩余少量数据进行关联删除。
3、清理scan、project。

"""

import logging
from django.core.management.base import BaseCommand
from django.db import connection
from apps.codeproj import models as codeprojmodels
from apps.codemetric import models as codemetricmodels
from apps.codelint import models as codelinmodels
from time import sleep

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = 'clear analysis data'

def add_arguments(self, parser):
parser.add_argument('--batch', type=int, help='Set batch size', default=1000)
parser.add_argument('--sleeptime', type=int, help='Set sleep time/s', default=1)

def handle(self, *args, **options):
logger.info("执行历史数据清理功能")
# 获取每一批次删除数据的条数,及批量删除间隔的休眠时间
batch = options['batch']
sleeptime = options['sleeptime']

# 过滤出 deleted_time 字段不为空的对象
projectSet = codeprojmodels.Project.everything.filter(
deleted_time__isnull=False
)
# (1) 先删除和project关联且数据量大的表,不包括scan和project本身
for project in projectSet:
# 分层次进行删除
# 1、codemetric_duplicateblock 表 对应DuplicateBlock 类
self.clearDataByModel(codemetricmodels.DuplicateBlock._meta.db_table, batch, project.id, sleeptime)

# 2、codemetric_duplicatefile 表 对应 DuplicateFile 类
self.clearDataByModel(codemetricmodels.DuplicateFile._meta.db_table, batch, project.id, sleeptime)

# 3、codemetric_duplicateissue 表 对应 DuplicateIssue 类
self.clearDataByModel(codemetricmodels.DuplicateIssue._meta.db_table, batch, project.id, sleeptime)

# 4、codemetric_clocfile 表 对应 ClocFile 类
self.clearDataByModel(codemetricmodels.ClocFile._meta.db_table, batch, project.id, sleeptime)

# 5、codemetric_clocdir 表 对应 ClocDir 类
self.clearDataByModel(codemetricmodels.ClocDir._meta.db_table, batch, project.id, sleeptime)

# 6、codelint_issuedetail 表 对应 IssueDetail 类
self.clearDataByModel(codelinmodels.IssueDetail._meta.db_table, batch, project.id, sleeptime)

# 7、codelint_issue 表 对应 Issue 类
self.clearDataByModel(codelinmodels.Issue._meta.db_table, batch, project.id, sleeptime)

# 8、codemetric_cyclomaticcomplexity 表 对应 CyclomaticComplexity 类
self.clearDataByModel(codemetricmodels.CyclomaticComplexity._meta.db_table, batch, project.id, sleeptime)

# (2) 删除 scan和project 及其关联的剩余少量数据
# codeproj_project 通过id获取
for project in projectSet:
project.delete(permanent=True)

logger.info("过期历史数据全部删除完毕!")

def clearDataByModel(self, table, batch, id, sleeptime):
cursor = connection.cursor()

# 统计要删除的数据条数
countquery = f"select count(project_id) from {table} where project_id = {id}"
countparams = []
cursor.execute(countquery, countparams)
count = cursor.fetchone()[0]
cur = 0

logger.info("开始删除project id为 %d 关联的 %s 表数据共 %d 条", id, table, count)
query = f"delete from {table} where project_id = {id} limit {batch}"
params = []
while cur < count:
cursor.execute(query, params)
cur += cursor.rowcount
logger.info("数据正在进行删除 %d/%d", cur, count)
sleep(sleeptime)
logger.info("当前部分数据删除完毕!")

# 关闭游标和连接
cursor.close()
2 changes: 1 addition & 1 deletion server/projects/analysis/apps/codemetric/models/cc.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ class StateEnum(object):


class CyclomaticComplexity(CDBaseModel):
"""重复代码问题
"""圈复杂度问题
"""

class ChangeTypeEnum(object):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
urlpatterns = [
path("", apis.ProjectCreateApiView.as_view(),
name="apiv1_project_create"),
path("<int:project_id>/", apis.ProjectDetailApiView.as_view(),
name="apiv1_project_detail"),
path("<int:project_id>/scans/", apis.ProjectScanListCreateApiView.as_view(),
name="apiv1_project_scan_list"),
path("<int:project_id>/scans/<int:scan_id>/", apis.ProjectScanResultDetailApiView.as_view(),
Expand Down
17 changes: 17 additions & 0 deletions server/projects/analysis/apps/codeproj/apis/v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,23 @@ def post(self, request):
return Response(data={"msg": "project %s has exist" % project_id}, status=status.HTTP_400_BAD_REQUEST)


class ProjectDetailApiView(generics.DestroyAPIView):
"""软删除项目
使用对象:服务内部

### Delete
应用场景:在analysis服务中,软删除main服务已被删除且过期的project

"""

schema = None
serializer_class = serializers.ProjectSerializer
authentication_classes = (MainServerInternalAuthentication,)

def get_object(self):
return get_object_or_404(models.Project, id=self.kwargs["project_id"])


class ProjectScanListCreateApiView(generics.ListCreateAPIView, ProjectBaseAPIView):
"""项目扫描列表接口
使用对象:服务内部
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# -*- coding: utf-8 -*-
# Copyright (c) 2021-2024 THL A29 Limited
#
# This source code file is made available under MIT License
# See LICENSE for details
# ==============================================================================

"""
历史数据清理脚本
用于清理已被删除且过期的历史数据,包括以下内容:
1、清除已经被软删除的project项目,及其关联的main库中的数据。
2、远程调用analysis服务,执行相关数据的软删除。

"""

import logging
from django.utils import timezone
from django.core.management.base import BaseCommand
from datetime import timedelta
from apps.codeproj import models
from util.webclients import AnalyseClient

logger = logging.getLogger(__name__)


class Command(BaseCommand):
help = 'clear deleted project'

def add_arguments(self, parser):
parser.add_argument('--expired_days', type=int, help='Set expired days', default=180)

def handle(self, *args, **options):
logger.info("执行历史数据清理功能")

# 获取当前时间
now = timezone.now()
time = options['expired_days']
if time < 0:
logger.error("设置的天数不能为负数,请重新执行!")
return

# 过滤出 deleted_time 字段不为空且 time 字段与当前时间的差超过设定值的对象
queryset = models.Project.everything.filter(
deleted_time__lte=now - timedelta(days=time)
)

logger.info("正在清理main服务中已被删除且过期的历史数据......")
for project in queryset:
# 清除main服务中project的关联数据
logger.info("清除项目id为 %d 的关联数据", project.id)
AnalyseClient().api('delete_project', data=None, path_params=(project.id,))
project.delete(permanent=True)

# 在analysis服务中执行清理操作。
logger.info(
"数据清理完毕,共 %d 个项目,如需清理analysis服务中的相关数据,请到analysis服务中执行clear_analysis_data.py脚本!",
queryset.count())
9 changes: 8 additions & 1 deletion server/projects/main/util/webclients.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ def __deco(*args, **kwargs):
result = rsp.data.decode("utf-8")
# 正常返回
if 200 <= rsp.status < 300:
return json.loads(result)
if result:
return json.loads(result)
else:
return result
else:
# 其他错误,如请求参数错误
logger.error(result)
Expand Down Expand Up @@ -121,6 +124,10 @@ def __init__(self):
"path": "api/projects/",
"method": "post",
},
"delete_project": {
"path": "api/projects/%d/",
"method": "delete",
},
"create_scan": {
"path": "api/projects/%d/scans/",
"method": "post",
Expand Down
Loading