Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat: implemented human evaluations #2047

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions agenta-web/cypress/e2e/ab-testing-evaluation.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ describe("A/B Testing Evaluation workflow", () => {

context("When executing the evaluation", () => {
it("Should successfully execute the evaluation process", () => {
cy.visit(`/apps/${app_id}/annotations/human_a_b_testing`)
cy.url().should("include", "/annotations/human_a_b_testing")
cy.clickLinkAndWait('[data-cy="new-annotation-modal-button"]')
cy.visit(`/apps/${app_id}/evaluations?selectedEvaluation=ab_testing_evaluation`)
cy.url().should("include", "/evaluations?selectedEvaluation=ab_testing_evaluation")
cy.clickLinkAndWait('[data-cy="new-human-eval-modal-button"]')

cy.get(".ant-modal-content").should("exist")
cy.get('[data-cy="variants-dropdown-0"]').trigger("mouseover")
Expand Down
29 changes: 10 additions & 19 deletions agenta-web/cypress/e2e/app-navigation.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,28 +28,19 @@ describe("App Navigation without errors", () => {
cy.get('[data-cy="app-testset-list"]').should("exist")
})

it("should navigate successfully to Automatic Evaluation results evaluators page", () => {
cy.clickLinkAndWait('[data-cy="app-auto-evaluations-link"]')
cy.clickLinkAndWait('[data-cy="app-evaluators-link"]')
cy.url().should("include", "/evaluations/new-evaluator")
})
it("should navigate successfully to Evaluations page", () => {
cy.clickLinkAndWait('[data-cy="app-evaluations-link"]')
cy.url().should("include", "/evaluations")
cy.contains(/evaluations/i)
ashrafchowdury marked this conversation as resolved.
Show resolved Hide resolved

it("should navigate successfully to Automatic Evaluation results page", () => {
cy.clickLinkAndWait('[data-cy="app-auto-evaluations-link"]')
cy.clickLinkAndWait('[data-cy="app-evaluations-results-link"]')
cy.url().should("include", "/evaluations/results")
})
cy.get(".ant-tabs-tab").eq(1).click()
cy.url().should("include", "/evaluations?selectedEvaluation=ab_testing_evaluation")

it("should navigate successfully to A/B Test page", () => {
cy.clickLinkAndWait('[data-cy="app-human-evaluations-link"]')
cy.clickLinkAndWait('[data-cy="app-human-ab-testing-link"]')
cy.location("pathname").should("include", "/annotations/human_a_b_testing")
})
cy.get(".ant-tabs-tab").eq(2).click()
cy.url().should("include", "/evaluations?selectedEvaluation=single_model_evaluation")

it("should navigate successfully to Single Model Test page", () => {
cy.clickLinkAndWait('[data-cy="app-human-evaluations-link"]')
cy.clickLinkAndWait('[data-cy="app-single-model-test-link"]')
cy.location("pathname").should("include", "/annotations/single_model_test")
cy.get(".ant-tabs-tab").eq(0).click()
cy.url().should("include", "/evaluations?selectedEvaluation=auto_evaluation")
})

if (isDemo()) {
Expand Down
12 changes: 6 additions & 6 deletions agenta-web/cypress/e2e/single-model-test-evaluation.cy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ describe("Single Model Test workflow", () => {

context("When executing the evaluation", () => {
it("Should successfully execute the evaluation process", () => {
cy.visit(`/apps/${app_id}/annotations/single_model_test`)
cy.url().should("include", "/annotations/single_model_test")
cy.clickLinkAndWait('[data-cy="new-annotation-modal-button"]')
cy.visit(`/apps/${app_id}/evaluations?selectedEvaluation=single_model_evaluation`)
cy.url().should("include", "/evaluations?selectedEvaluation=single_model_evaluation")
cy.clickLinkAndWait('[data-cy="new-human-eval-modal-button"]')

cy.get(".ant-modal-content").should("exist")

Expand Down Expand Up @@ -49,10 +49,10 @@ describe("Single Model Test workflow", () => {
})

it("Should modify the evaluation vote scores", () => {
cy.visit(`/apps/${app_id}/annotations/single_model_test`)
cy.url().should("include", "/annotations/single_model_test")
cy.visit(`/apps/${app_id}/evaluations?selectedEvaluation=single_model_evaluation`)
cy.url().should("include", "/evaluations?selectedEvaluation=single_model_evaluation")
cy.wait(1000)
cy.clickLinkAndWait('[data-cy="single-model-view-evaluation-button"]')
cy.clickLinkAndWait(".ant-table-row").eq(0)
cy.get('[data-cy="evalInstructionsShown-ok-btn"]').click()
cy.get('[data-cy="evaluation-vote-panel-numeric-vote-input"]').clear()
cy.get('[data-cy="evaluation-vote-panel-numeric-vote-input"]').type("85")
Expand Down
286 changes: 0 additions & 286 deletions agenta-web/src/components/Evaluations/AutomaticEvaluationResult.tsx

This file was deleted.

Loading
Loading