Skip to content

Commit

Permalink
Merge pull request #2047 from Agenta-AI/feat/implement-human-evaluations
Browse files Browse the repository at this point in the history
Feat: implemented human evaluations
  • Loading branch information
bekossy authored Sep 3, 2024
2 parents e91e37b + 538b852 commit 60d237e
Show file tree
Hide file tree
Showing 16 changed files with 280 additions and 1,427 deletions.
6 changes: 3 additions & 3 deletions agenta-web/cypress/e2e/ab-testing-evaluation.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ describe("A/B Testing Evaluation workflow", () => {

context("When executing the evaluation", () => {
it("Should successfully execute the evaluation process", () => {
cy.visit(`/apps/${app_id}/annotations/human_a_b_testing`)
cy.url().should("include", "/annotations/human_a_b_testing")
cy.clickLinkAndWait('[data-cy="new-annotation-modal-button"]')
cy.visit(`/apps/${app_id}/evaluations?selectedEvaluation=ab_testing_evaluation`)
cy.url().should("include", "/evaluations?selectedEvaluation=ab_testing_evaluation")
cy.clickLinkAndWait('[data-cy="new-human-eval-modal-button"]')

cy.get(".ant-modal-content").should("exist")
cy.get('[data-cy="variants-dropdown-0"]').trigger("mouseover")
Expand Down
29 changes: 10 additions & 19 deletions agenta-web/cypress/e2e/app-navigation.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,28 +28,19 @@ describe("App Navigation without errors", () => {
cy.get('[data-cy="app-testset-list"]').should("exist")
})

it("should navigate successfully to Automatic Evaluation results evaluators page", () => {
cy.clickLinkAndWait('[data-cy="app-auto-evaluations-link"]')
cy.clickLinkAndWait('[data-cy="app-evaluators-link"]')
cy.url().should("include", "/evaluations/new-evaluator")
})
it("should navigate successfully to Evaluations page", () => {
cy.clickLinkAndWait('[data-cy="app-evaluations-link"]')
cy.url().should("include", "/evaluations")
cy.contains(/evaluations/i)

it("should navigate successfully to Automatic Evaluation results page", () => {
cy.clickLinkAndWait('[data-cy="app-auto-evaluations-link"]')
cy.clickLinkAndWait('[data-cy="app-evaluations-results-link"]')
cy.url().should("include", "/evaluations/results")
})
cy.get(".ant-tabs-tab").eq(1).click()
cy.url().should("include", "/evaluations?selectedEvaluation=ab_testing_evaluation")

it("should navigate successfully to A/B Test page", () => {
cy.clickLinkAndWait('[data-cy="app-human-evaluations-link"]')
cy.clickLinkAndWait('[data-cy="app-human-ab-testing-link"]')
cy.location("pathname").should("include", "/annotations/human_a_b_testing")
})
cy.get(".ant-tabs-tab").eq(2).click()
cy.url().should("include", "/evaluations?selectedEvaluation=single_model_evaluation")

it("should navigate successfully to Single Model Test page", () => {
cy.clickLinkAndWait('[data-cy="app-human-evaluations-link"]')
cy.clickLinkAndWait('[data-cy="app-single-model-test-link"]')
cy.location("pathname").should("include", "/annotations/single_model_test")
cy.get(".ant-tabs-tab").eq(0).click()
cy.url().should("include", "/evaluations?selectedEvaluation=auto_evaluation")
})

if (isDemo()) {
Expand Down
12 changes: 6 additions & 6 deletions agenta-web/cypress/e2e/single-model-test-evaluation.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ describe("Single Model Test workflow", () => {

context("When executing the evaluation", () => {
it("Should successfully execute the evaluation process", () => {
cy.visit(`/apps/${app_id}/annotations/single_model_test`)
cy.url().should("include", "/annotations/single_model_test")
cy.clickLinkAndWait('[data-cy="new-annotation-modal-button"]')
cy.visit(`/apps/${app_id}/evaluations?selectedEvaluation=single_model_evaluation`)
cy.url().should("include", "/evaluations?selectedEvaluation=single_model_evaluation")
cy.clickLinkAndWait('[data-cy="new-human-eval-modal-button"]')

cy.get(".ant-modal-content").should("exist")

Expand Down Expand Up @@ -49,10 +49,10 @@ describe("Single Model Test workflow", () => {
})

it("Should modify the evaluation vote scores", () => {
cy.visit(`/apps/${app_id}/annotations/single_model_test`)
cy.url().should("include", "/annotations/single_model_test")
cy.visit(`/apps/${app_id}/evaluations?selectedEvaluation=single_model_evaluation`)
cy.url().should("include", "/evaluations?selectedEvaluation=single_model_evaluation")
cy.wait(1000)
cy.clickLinkAndWait('[data-cy="single-model-view-evaluation-button"]')
cy.clickLinkAndWait(".ant-table-row").eq(0)
cy.get('[data-cy="evalInstructionsShown-ok-btn"]').click()
cy.get('[data-cy="evaluation-vote-panel-numeric-vote-input"]').clear()
cy.get('[data-cy="evaluation-vote-panel-numeric-vote-input"]').type("85")
Expand Down
286 changes: 0 additions & 286 deletions agenta-web/src/components/Evaluations/AutomaticEvaluationResult.tsx

This file was deleted.

Loading

0 comments on commit 60d237e

Please sign in to comment.