Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HUDI-5022] made better error messages for pr compliance #6934

Merged
merged 2 commits into from
Oct 17, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ _Describe context and summary for this change. Highlight if any code was copied.

_Describe any public API or user-facing feature change or any performance impact._

**Risk level: none | low | medium | high**
### Risk level (write none, low medium or high below)

_Choose one. If medium or high, explain what verification was done to mitigate the risks._
_If medium or high, explain what verification was done to mitigate the risks._

### Documentation Update

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr_compliance.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
- name: run script
run: python3 scripts/pr_compliance.py > test.log || { echo "::error::pr_compliance.py $(cat test.log)" && exit 1; }
run: python3 scripts/pr_compliance.py



211 changes: 107 additions & 104 deletions scripts/pr_compliance.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import re
import os
import sys

import inspect
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# _____ _ _ _ __ __ _ _ _ _ _ #
# |_ _(_) |_| | ___ \ \ / /_ _| (_) __| | __ _| |_(_) ___ _ __ #
Expand Down Expand Up @@ -130,14 +130,15 @@ class Outcomes:
# identifier: str - line that signifies the start of a section
# linesAfter: set of str - default lines in the template that we ignore when
# verifying that the user filled out the section
# nextSection: str - The name of the next section or "SUCCESS" if this is the
# last section
class ParseSectionData:
def __init__(self, name: str, identifier: str, linesAfter, nextSection: str):
def __init__(self, name: str, identifier: str, linesAfter: str):
self.name = name
self.identifier = identifier
self.linesAfter = linesAfter
self.nextSection = nextSection
self.prevSection = ""
self.nextSection = ""



#returns true if line matches the identifier
def identify(self, line: str):
Expand All @@ -151,8 +152,8 @@ def identifyAfter(self, line: str):
#Special holder of data for risk level because the identifier line is modified
#by the user
class RiskLevelData(ParseSectionData):
def __init__(self, name: str, identifier: str, linesAfter, nextSection: str):
super().__init__(name, identifier, linesAfter, nextSection)
def __init__(self, name: str, identifier: str, linesAfter):
super().__init__(name, identifier, linesAfter)

#we check that the line start with the identifier because the identifier
#line will be modified when filled out correctly
Expand All @@ -165,8 +166,23 @@ def identify(self, line: str):
class ParseSections:
def __init__(self, psd):
self.sections = {}
for x in psd:
self.sections[x.name] = x
assert len(psd) > 0
for i in range(len(psd)):
prevI = i - 1
nextI = i + 1
if prevI < 0:
psd[i].prevSection = "START"
else:
psd[i].prevSection = psd[prevI].name

if nextI >= len(psd):
psd[i].nextSection = "END"
else:
psd[i].nextSection = psd[nextI].name

self.sections[psd[i].name] = psd[i]



#returns true if line is an identifier for a section that is not value
# PARAMS
Expand All @@ -179,6 +195,18 @@ def validateOthers(self, line: str, value: str):
return True
return False

#gets the name of the section identified in the line
# PARAMS
# line: str - the line that we are parsing
# RETURN
# string - name of the section if the identifier is found, else none
def getSectionName(self, line: str):
for name in self.sections:
if self.sections[name].identify(line):
return name
return None


#returns the ParseSectionData that is named name
def get(self, name):
return self.sections.get(name)
Expand All @@ -200,9 +228,13 @@ def __init__(self, data: ParseSectionData, sections: ParseSections, debug=False)
self.sections = sections

#prints error message if debug is set to true
def error(self, message):
def error(self, line: str, lineno: str, message: str):
if self.debug:
print(f"ERROR:(state: {self.data.name}, found: {self.found}, message: {message}")
pyline = inspect.getframeinfo(inspect.stack()[1][0]).lineno
print(f"::error file=pr_compliance.py,line={pyline}::{message}")
if lineno != "" and line != "":
print(f"::error file=pr_compliance.py,line={pyline}::found on line {lineno}: {line}")
print(f"::debug::state: {self.data.name}, found: {self.found},")

#returns the name of the next section
def nextSection(self):
Expand All @@ -214,29 +246,55 @@ def validateAfter(self, line):
return self.found and self.data.identifyAfter(line)

#Decides what outcome occurs when the section identifier is found
def processIdentify(self, line):
def processIdentify(self, line, lineno):
if self.found:
#since we have already found the identifier, this means that we have
#found a duplicate of the identifier
self.error(f"duplicate line \"{line}\"")
self.error(line, lineno, f"Duplicate {self.data.name} section found")
return Outcomes.ERROR
self.found = True
return Outcomes.CONTINUE

def makeValidateOthersErrorMessage(self, line):
if self.found:
if self.nextSection() != "END" and self.sections.sections[self.nextSection()].identify(line):
#we found the next identifier but haven't found a description
#yet for this section
return f"Section {self.data.name} is missing a description"
#we found a section other than the next section
return f"Section {self.data.name} should be followed by section {self.data.nextSection}"

#section identifier has not been found yet
sectionFound = self.sections.getSectionName(line)
if sectionFound is None:
print("ERROR: none found even though validateOthers returned True")
exit(1)
elif sectionFound == self.data.prevSection:
#we have not found the current section identifier but we found the
#previous section identifier again
return f"Duplicate {self.data.prevSection} section found"

if self.data.prevSection == "START":
return f"Section {self.data.name} should be the first section"
if sectionFound == self.data.nextSection:
return f"Missing section {self.data.name} between {self.data.prevSection} and {self.data.nextSection}"
return f"Section {self.data.name} was expected after section {self.data.prevSection}"

#Decides the outcome state by processing line
def validateLine(self,line):
def validateLine(self,line,lineno):
if self.data.identify(line):
#we have found the identifier so we should decide what to do
return self.processIdentify(line)
return self.processIdentify(line,lineno)
elif self.sections.validateOthers(line, self.data.name):
#we have found the identifier for another section
self.error(f"Out of order section or missing description \"{line}\"")
#figure out what the error is
self.error(line,lineno,self.makeValidateOthersErrorMessage(line))
return Outcomes.ERROR
elif self.validateAfter(line):
#the pr author has added new text to this section so we consider it
#to be filled out
if self.nextSection() == "SUCCESS":
#if next section is "SUCCESS" then there are no more sections
if self.nextSection() == "END":
#if next section is "END" then there are no more sections
#to process
return Outcomes.SUCCESS
return Outcomes.NEXTSECTION
Expand All @@ -250,47 +308,15 @@ def __init__(self, data: ParseSectionData, sections: ParseSections, debug=False)

#After finding the identifier we don't need to look for anything else so we
#can just go to the next section or terminate if this is the last
def processIdentify(self, line):
o = super().processIdentify(line)
def processIdentify(self, line, lineno):
o = super().processIdentify(line, lineno)
if o == Outcomes.CONTINUE:
if self.nextSection() == "SUCCESS":
if self.nextSection() == "END":
return Outcomes.SUCCESS
else:
return Outcomes.NEXTSECTION
return o

#Risk level has different processing because the pr author will modify the
#identifier and doesn't need to add description if risk is none or low
class RiskLevelSection(ParseSection):
def __init__(self, data: ParseSectionData, sections: ParseSections, debug=False):
super().__init__(data, sections, debug)

def processIdentify(self, line):
if self.found:
#since we have already found the identifier, this means that we have
#found a duplicate of the identifier
self.error(f"duplicate line starting with \"{self.identifier}\"")
return Outcomes.ERROR
if line == "**Risk level: none | low | medium | high**":
#the user has not modified this line so a risk level was not chosen
self.error("risk level not chosen")
return Outcomes.ERROR
if "NONE" in line.upper() or "LOW" in line.upper():
# an explanation is not required for none or low so we can just
# move on to the next section or terminate if this is the last
if self.nextSection() == "SUCCESS":
return Outcomes.SUCCESS
else:
return Outcomes.NEXTSECTION
elif "MEDIUM" in line.upper() or "HIGH" in line.upper():
# an explanation is required so we don't change state
self.found = True
return Outcomes.CONTINUE
else:
#the pr author put something weird in for risk level
self.error("invalid choice for risk level")
return Outcomes.ERROR

#Class that orchestrates the validation of the entire body
class ValidateBody:
def __init__(self, body: str, firstSection: str, sections: ParseSections, debug=False):
Expand Down Expand Up @@ -319,13 +345,11 @@ def nextSection(self):
#get the data for that section
data = self.sections.get(sectionName)
if data is None:
print(f"parse section {sectionName} not found")
print(f"ERROR with your parse section setup. Parse section {sectionName} not found")
exit(-3)

#create the section
if data.name == "RISKLEVEL":
self.section = RiskLevelSection(data=data, sections=self.sections, debug=self.debug)
elif data.name == "CHECKLIST":
if data.name == "CHECKLIST":
self.section = NoDataSection(data=data, sections=self.sections, debug=self.debug)
else:
self.section = ParseSection(data=data, sections=self.sections, debug=self.debug)
Expand All @@ -336,13 +360,13 @@ def validate(self):
self.nextSection()

#validate each line
for line in self.body.splitlines():
for lineno, line in enumerate(self.body.splitlines(), 1):
#ignore empty lines
if len(line) == 0:
continue

#run the parse section validation
o = self.section.validateLine(line)
o = self.section.validateLine(line, lineno)

#decide what to do based on outcome
if o == Outcomes.ERROR:
Expand All @@ -353,28 +377,30 @@ def validate(self):
self.nextSection()
#if we get through all the lines without a success outcome, then the
#body does not comply
self.section.error("template is not filled out properly")
if self.section.data.nextSection == "END":
if self.section.found:
self.section.error("","",f"Section {self.section.data.name} is missing a description")
return False
self.section.error("","",f"Missing section {self.section.data.name} at the end")
return False
self.section.error("","", "Please make sure you have filled out the template correctly. You can find a blank template in /.github/PULL_REQUEST_TEMPLATE.md")
return False

#Generate the validator for the current template.
#needs to be manually updated
def make_default_validator(body, debug=False):
changelogs = ParseSectionData("CHANGELOGS",
"### Change Logs",
{"_Describe context and summary for this change. Highlight if any code was copied._"},
"IMPACT")
{"_Describe context and summary for this change. Highlight if any code was copied._"})
impact = ParseSectionData("IMPACT",
"### Impact",
{"_Describe any public API or user-facing feature change or any performance impact._"},
"RISKLEVEL")
{"_Describe any public API or user-facing feature change or any performance impact._"})
risklevel = RiskLevelData("RISKLEVEL",
"**Risk level:",
{"_Choose one. If medium or high, explain what verification was done to mitigate the risks._"},
"CHECKLIST")
"### Risk level ",
{"_If medium or high, explain what verification was done to mitigate the risks._"})
checklist = ParseSectionData("CHECKLIST",
"### Contributor's checklist",
{},
"SUCCESS")
{})
parseSections = ParseSections([changelogs, impact, risklevel, checklist])

return ValidateBody(body, "CHANGELOGS", parseSections, debug)
Expand Down Expand Up @@ -431,18 +457,14 @@ def test_body():
good_impact[1] = "impact description"

template_risklevel = [
"**Risk level: none | low | medium | high**",
"### Risk level (write none, low medium or high below)",
"",
"_Choose one. If medium or high, explain what verification was done to mitigate the risks._",
"_If medium or high, explain what verification was done to mitigate the risks._",
""
]

good_risklevel_none = template_risklevel.copy()
good_risklevel_none[0] = "**Risk level: none**"

good_risklevel_medium = template_risklevel.copy()
good_risklevel_medium[0] = "**Risk level: medium**"
good_risklevel_medium[1] = "risklevel description"
good_risklevel = template_risklevel.copy()
good_risklevel[1] = "none"

template_checklist = [
"### Contributor's checklist",
Expand All @@ -454,13 +476,14 @@ def test_body():
]

#list of sections that when combined form a valid body
good_sections = [good_changelogs, good_impact, good_risklevel_none, template_checklist]
good_sections = [good_changelogs, good_impact, good_risklevel, template_checklist]

#list of sections that when combined form the template
template_sections = [template_changelogs, template_impact, template_risklevel, template_checklist]

tests_passed = True
#Test section not filled out
#no need to test checklist section
for i in range(len(good_sections)-1):
test_sections = []
for j in range(len(good_sections)):
Expand All @@ -480,13 +503,13 @@ def test_body():
tests_passed = run_test(f"duplicate section: {i}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed

#Test out of order section
for i in range(len(good_sections)):
for i in range(len(good_sections)-1):
test_sections = []
for j in range(len(good_sections)):
test_sections.append(good_sections[j].copy())
k = (i + 3) % len(good_sections)
test_sections[i], test_sections[k] = test_sections[k],test_sections[i]
tests_passed = run_test(f"Swapped sections: {i}, {k}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed
for k in range(i+1,len(good_sections)):
test_sections[i], test_sections[k] = test_sections[k],test_sections[i]
tests_passed = run_test(f"Swapped sections: {i}, {k}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed

#Test missing section
for i in range(len(good_sections)):
Expand All @@ -496,28 +519,8 @@ def test_body():
test_sections.append(good_sections[j].copy())
tests_passed = run_test(f"Missing Section: {i}", build_body(test_sections), False, DEBUG_MESSAGES) and tests_passed

#Test good body with risk level of none:
tests_passed = run_test("good documentation. risk level none", build_body(good_sections), True, DEBUG_MESSAGES) and tests_passed

#Test good body with risk level of medium:
risk_level_index = 2
good_medium_risk_level = good_sections.copy()
good_medium_risk_level[risk_level_index] = good_risklevel_medium
tests_passed = run_test("good documentation. risk level medium", build_body(good_medium_risk_level), True, DEBUG_MESSAGES) and tests_passed

#Test good body except medium risk level and there is no description
bad_medium_risk_level = good_sections.copy()
bad_risklevel_medium = good_risklevel_medium.copy()
bad_risklevel_medium[1] = ""
bad_medium_risk_level[risk_level_index] = bad_risklevel_medium
tests_passed = run_test("medium risk level but no description", build_body(bad_medium_risk_level), False, DEBUG_MESSAGES) and tests_passed

#Test unknown risk level:
unknow_risk_level = good_sections.copy()
unknow_risk_level_section = template_risklevel.copy()
unknow_risk_level_section[0] = "**Risk level: unknown**"
unknow_risk_level[risk_level_index] = unknow_risk_level_section
tests_passed = run_test("unknown risk level", build_body(unknow_risk_level), False, DEBUG_MESSAGES) and tests_passed
#Test good body:
tests_passed = run_test("good documentation", build_body(good_sections), True, DEBUG_MESSAGES) and tests_passed

print("*****")
if tests_passed:
Expand Down