diff --git a/datasets/multidoc2dial/README.md b/datasets/multidoc2dial/README.md new file mode 100644 index 00000000000..cd1e42d314a --- /dev/null +++ b/datasets/multidoc2dial/README.md @@ -0,0 +1,348 @@ +--- +annotations_creators: +- crowdsourced +language_creators: +- crowdsourced +- expert-generated +languages: +- en +licenses: +- apache-2.0 +multilinguality: +- monolingual +pretty_name: MultiDoc2Dial +size_categories: + dialogue_domain: + - 1K Supported Task: Open domain question answering, document-grounded dialogue, passage retrieval + +> Leaderboard: + +### Languages + +English + +## Dataset Structure + +### Data Instances + +Sample data instance for `multidoc2dial` : +``` +{ + "id": "8df07b7a98990db27c395cb1f68a962e_1", + "title": "Top 5 DMV Mistakes and How to Avoid Them#3_0", + "context": "Many DMV customers make easily avoidable mistakes that cause them significant problems, including encounters with law enforcement and impounded vehicles. Because we see customers make these mistakes over and over again , we are issuing this list of the top five DMV mistakes and how to avoid them. \n\n1. Forgetting to Update Address \nBy statute , you must report a change of address to DMV within ten days of moving. That is the case for the address associated with your license, as well as all the addresses associated with each registered vehicle, which may differ. It is not sufficient to only: write your new address on the back of your old license; tell the United States Postal Service; or inform the police officer writing you a ticket. If you fail to keep your address current , you will miss a suspension order and may be charged with operating an unregistered vehicle and/or aggravated unlicensed operation, both misdemeanors. This really happens , but the good news is this is a problem that is easily avoidable. Learn more about how to change the address on your license and registrations [1 ] \n\n2. Leaving the State Without Notifying DMV \nStates communicate with each other , so when you move to another state, be sure to tie up any loose ends regarding your New York State license or registration. That means resolving any unanswered tickets, suspensions or revocations, and surrendering your license plates to NYS when you get to your new home state. A license suspension or revocation here could mean that your new home state will not issue you a license there. Remember , it is important to notify DMV of your new address so that any possible mail correspondence can reach you. Also , turning in your plates is important to avoid an insurance lapse. \n\n3. Letting Insurance Lapse \nBecause we all pay indirectly for crashes involving uninsured motorists , New York State requires every motorist to maintain auto insurance every single day a vehicle is registered. DMV works with insurance companies to electronically monitor your insurance coverage , and we know when coverage is dropped for any reason. When that happens , we mail you an insurance inquiry letter to allow you to clear up the problem. We send 500,000 inquiry letters a year. If the inquiry letter does not resolve the problem , we must suspend the vehicle registration and , if it persists, your driver license!We suspend 300,000 registrations a year for failure to maintain insurance. If you fail to maintain an updated address with us , you won t learn that you have an insurance problem , and we will suspend your registration and license. Make sure you turn in your vehicle s license plates at DMV before you cancel your insurance policy. Insurance policies must be from a company licensed in New York State. Learn more about Insurances Lapes [2] and How to Surrender your Plates [3 ] \n\n4. Understanding how Much Traffic Points Cost \nDMV maintains a point system to track dangerous drivers. Often , motorists convicted of a traffic ticket feel they have resolved all their motoring issues with the local court, but later learn that the Driver Responsibility Assessment DRA is a separate DMV charge based on the total points they accumulate. The $300 DRA fee can be paid in $100 annual installments over three years. Motorists who fail to maintain an updated address with DMV may resolve their tickets with the court, but never receive their DRA assessment because we do not have their new address on record. Failure to pay the DRA will result in a suspended license. Learn more about About the NYS Driver Point System [4] and how to Pay Driver Responsibility Assessment [5 ] \n\n5. Not Bringing Proper Documentation to DMV Office \nAbout ten percent of customers visiting a DMV office do not bring what they need to complete their transaction, and have to come back a second time to finish their business. This can be as simple as not bringing sufficient funds to pay for a license renewal or not having the proof of auto insurance required to register a car. Better yet , don t visit a DMV office at all, and see if your transaction can be performed online, like an address change, registration renewal, license renewal, replacing a lost title, paying a DRA or scheduling a road test. Our award - winning website is recognized as one of the best in the nation. It has all the answers you need to efficiently perform any DMV transaction. Consider signing up for our MyDMV service, which offers even more benefits. Sign up or log into MyDMV [6 ] ", + "question": "Hello, I forgot o update my address, can you help me with that?[SEP]", + "da": "query_condition", + "answers": + { + "text": ['you must report a change of address to DMV within ten days of moving. That is the case for the address associated with your license, as well as all the addresses associated with each registered vehicle, which may differ. "], + "answer_start": [346] + }, + "utterance": "hi, you have to report any change of address to DMV within 10 days after moving. You should do this both for the address associated with your license and all the addresses associated with all your vehicles.", + "domain": "dmv" +} +``` + +Sample data instance for `document_domain` : + +``` +{ + "domain": "ssa", + "doc_id": "Benefits Planner: Survivors | Planning For Your Survivors | Social Security Administration#1_0", + "title": "Benefits Planner: Survivors | Planning For Your Survivors | Social Security Administration#1", + "doc_text": "\n\nBenefits Planner: Survivors | Planning For Your Survivors \nAs you plan for the future , you'll want to think about what your family would need if you should die now. Social Security can help your family if you have earned enough Social Security credits through your work. You can earn up to four credits each year. In 2019 , for example , you earn one credit for each $1,360 of wages or self - employment income. When you have earned $5,440 , you have earned your four credits for the year. The number of credits needed to provide benefits for your survivors depends on your age when you die. No one needs more than 40 credits 10 years of work to be eligible for any Social Security benefit. But , the younger a person is , the fewer credits they must have for family members to receive survivors benefits. Benefits can be paid to your children and your spouse who is caring for the children even if you don't have the required number of credits. They can get benefits if you have credit for one and one - half years of work 6 credits in the three years just before your death. \n\nFor Your Widow Or Widower \nThere are about five million widows and widowers receiving monthly Social Security benefits based on their deceased spouse's earnings record. And , for many of those survivors, particularly aged women, those benefits are keeping them out of poverty. Widows and widowers can receive : reduced benefits as early as age 60 or full benefits at full retirement age or older. benefits as early as age 50 if they're disabled AND their disability started before or within seven years of your death. benefits at any age , if they have not remarried , and if they take care of your child who is under age 16 or disabled and receives benefits on your record. If applying for disability benefits on a deceased worker s record , they can speed up the application process if they complete an Adult Disability Report and have it available at the time of their appointment. We use the same definition of disability for widows and widowers as we do for workers. \n\nFor Your Surviving Divorced Spouse \nIf you have a surviving divorced spouse , they could get the same benefits as your widow or widower provided that your marriage lasted 10 years or more. Benefits paid to a surviving divorced spouse won't affect the benefit amounts your other survivors will receive based on your earnings record. If your former spouse is caring for your child who is under age 16 or disabled and gets benefits on your record , they will not have to meet the length - of - marriage rule. The child must be your natural or legally adopted child. \n\nFor Your Children \nYour unmarried children who are under 18 up to age 19 if attending elementary or secondary school full time can be eligible to receive Social Security benefits when you die. And your child can get benefits at any age if they were disabled before age 22 and remain disabled. Besides your natural children , your stepchildren, grandchildren, step grandchildren or adopted children may receive benefits under certain circumstances. For further information , view our publication. \n\nFor Your Parents \nYou must have been providing at least half of your parent s support and your parent must not be eligible to receive a retirement benefit that is higher than the benefit we could pay on your record. Generally, your parent also must not have married after your death ; however, there are some exceptions. In addition to your natural parent , your stepparent or adoptive parent may receive benefits if they became your parent before you were age 16. \n\nHow Much Would Your Survivors Receive \nHow much your family could receive in benefits depends on your average lifetime earnings. The higher your earnings were , the higher their benefits would be. We calculate a basic amount as if you had reached full retirement age at the time you die. These are examples of monthly benefit payments : Widow or widower, full retirement age or older 100 percent of your benefit amount ; Widow or widower , age 60 to full retirement age 71 to 99 percent of your basic amount ; Disabled widow or widower , age 50 through 59 71 percent ; Widow or widower , any age, caring for a child under age 16 75 percent ; A child under age 18 19 if still in elementary or secondary school or disabled 75 percent ; and Your dependent parent , age 62 or older : One surviving parent 82 percent. Two surviving parents 75 percent to each parent. Percentages for a surviving divorced spouse would be the same as above. There may also be a special lump - sum death payment. \n\nMaximum Family Amount \nThere's a limit to the amount that family members can receive each month. The limit varies , but it is generally equal to between 150 and 180 percent of the basic benefit rate. If the sum of the benefits payable to family members is greater than this limit , the benefits will be reduced proportionately. Any benefits paid to a surviving divorced spouse based on disability or age won't count toward this maximum amount. Get your online or check our Benefit Calculators for an estimate of the benefits your family could receive if you died right now. \n\nOther Things You Need To Know \nThere are limits on how much survivors may earn while they receive benefits. Benefits for a widow, widower, or surviving divorced spouse may be affected by several additional factors : If your widow, widower, or surviving divorced spouse remarries before they reach age 60 age 50 if disabled , they cannot receive benefits as a surviving spouse while they're married. If your widow, widower, or surviving divorced spouse remarries after they reach age 60 age 50 if disabled , they will continue to qualify for benefits on your Social Security record. However , if their current spouse is a Social Security beneficiary , they may want to apply for spouse's benefits on their record. If that amount is more than the widow's or widower's benefit on your record , they will receive a combination of benefits that equals the higher amount. If your widow, widower, or surviving divorced spouse receives benefits on your record , they can switch to their own retirement benefit as early as age 62. This assumes they're eligible for retirement benefits and their retirement rate is higher than their rate as a widow, widower, or surviving divorced spouse. In many cases , a widow or widower can begin receiving one benefit at a reduced rate and then, at full retirement age, switch to the other benefit at an unreduced rate. If your widow, widower, or surviving divorced spouse will also receive a pension based on work not covered by Social Security, such as government or foreign work , their Social Security benefits as a survivor may be affected. ", + "spans": [ + { + "id_sp": "1", + "tag": "h2", + "start_sp": 0, + "end_sp": 61, + "text_sp": "\n\nBenefits Planner: Survivors | Planning For Your Survivors \n", + "title": "Benefits Planner: Survivors | Planning For Your Survivors", + "parent_titles": { + "id_sp": [], + "text": [], + "level": [] + }, + "id_sec": "t_0", + "start_sec": 0, + "text_sec": "\n\nBenefits Planner: Survivors | Planning For Your Survivors \n", + "end_sec": 61 + }, + { + "id_sp": "2", + "tag": "u", + "start_sp": 61, + "end_sp": 90, + "text_sp": "As you plan for the future , ", + "title": "Benefits Planner: Survivors | Planning For Your Survivors", + "parent_titles": { + "id_sp": [], + "text": [], + "level": [] + }, + "id_sec": "1", + "start_sec": 61, + "text_sec": "As you plan for the future , you'll want to think about what your family would need if you should die now. Social Security can help your family if you have earned enough Social Security credits through your work. ", + "end_sec": 274 + }, + { + "id_sp": "3", + "tag": "u", + "start_sp": 90, + "end_sp": 168, + "text_sp": "you'll want to think about what your family would need if you should die now. ", + "title": "Benefits Planner: Survivors | Planning For Your Survivors", + "parent_titles": { + "id_sp": [], + "text": [], + "level": [] + }, + "id_sec": "1", + "start_sec": 61, + "text_sec": "As you plan for the future , you'll want to think about what your family would need if you should die now. Social Security can help your family if you have earned enough Social Security credits through your work. ", + "end_sec": 274 + } + ], + "doc_html_ts": "

Benefits Planner: Survivors | Planning For Your Survivors

As you plan for the future ,you 'll want to think about what your family would need if you should die now .Social Security can help your family if you have earned enough Social Security credits through your work .
You can earn up to four credits each year .In 2019 ,for example ,you earn one credit for each $ 1,360 of wages or self - employment income .When you have earned $ 5,440 ,you have earned your four credits for the year .
The number of credits needed to provide benefits for your survivors depends on your age when you die .No one needs more than 40 credits 10 years of work to be eligible for any Social Security benefit .But ,the younger a person is ,the fewer credits they must have for family members to receive survivors benefits .
Benefits can be paid to your children and your spouse who is caring for the children even if you do n't have the required number of credits .They can get benefits if you have credit for one and one - half years of work 6 credits in the three years just before your death .

For Your Widow Or Widower

There are about five million widows and widowers receiving monthly Social Security benefits based on their deceased spouse 's earnings record .And ,for many of those survivors , particularly aged women , those benefits are keeping them out of poverty .
Widows and widowers can receive :
  • reduced benefits as early as age 60 or full benefits at full retirement age or older .
  • If widows or widowers qualify for retirement benefits on their own record, they can switch to their own retirement benefit as early as age 62.
  • benefits as early as age 50 if they 're disabled AND their disability started before or within seven years of your death .
  • If a widow or widower who is caring for your children receives Social Security benefits, they're still eligible if their disability starts before those payments end or within seven years after they end.
  • benefits at any age ,if they have not remarried ,and if they take care of your child who is under age 16 or disabled and receives benefits on your record .
  • If a widow or widower remarries after they reach age 60 (age 50 if disabled), the remarriage will not affect their eligibility for survivors benefits.
Widows, widowers, and surviving divorced spouses cannot apply online for survivors benefits. They should contact Social Security at 1-800-772-1213 (TTY 1-800-325-0778) to request an appointment.
If applying for disability benefits on a deceased worker s record ,they can speed up the application process if they complete an Adult Disability Report and have it available at the time of their appointment .
We use the same definition of disability for widows and widowers as we do for workers .

For Your Surviving Divorced Spouse

If you have a surviving divorced spouse ,they could get the same benefits as your widow or widower provided that your marriage lasted 10 years or more .
If your surviving divorced spouse qualifies for retirement benefits on their own record they can switch to their own retirement benefit as early as age 62.
Benefits paid to a surviving divorced spouse wo n't affect the benefit amounts your other survivors will receive based on your earnings record .
If your surviving divorced spouse remarries after they reach age 60 (age 50 if disabled), the remarriage will not affect their eligibility for survivors benefits.
If your former spouse is caring for your child who is under age 16 or disabled and gets benefits on your record ,they will not have to meet the length - of - marriage rule .The child must be your natural or legally adopted child .
However, if they qualify for benefits as a surviving divorced mother or father who is caring for your child, their benefits may affect the amount of benefits your other survivors will receive based on your earnings record.

For Your Children

Your unmarried children who are under 18 up to age 19 if attending elementary or secondary school full time can be eligible to receive Social Security benefits when you die .
And your child can get benefits at any age if they were disabled before age 22 and remain disabled .
Besides your natural children ,your stepchildren , grandchildren , step grandchildren or adopted children may receive benefits under certain circumstances .For further information ,view our publication .

For Your Parents

You must have been providing at least half of your parent s support and your parent must not be eligible to receive a retirement benefit that is higher than the benefit we could pay on your record .Generally , your parent also must not have married after your death ;however , there are some exceptions .
In addition to your natural parent ,your stepparent or adoptive parent may receive benefits if they became your parent before you were age 16 .

How Much Would Your Survivors Receive

How much your family could receive in benefitsdepends on your average lifetime earnings .The higher your earnings were ,the higher their benefits would be .We calculate a basic amount as if you had reached full retirement age at the time you die .
If you are already receiving reduced benefits when you die, survivors benefits are based on that amount.
These are examples of monthly benefit payments :
  • Widow or widower , full retirement age or older 100 percent of your benefit amount ;
  • Widow or widower ,age 60 to full retirement age 71 to 99 percent of your basic amount ;
  • Disabled widow or widower ,age 50 through 59 71 percent ;
  • Widow or widower ,any age , caring for a child under age 16 75 percent ;
  • A child under age 18 19 if still in elementary or secondary school or disabled 75 percent ;and
  • Your dependent parent ,age 62 or older :
    • One surviving parent 82 percent .
    • Two surviving parents 75 percent to each parent .
Percentages for a surviving divorced spouse would be the same as above .
There may also be a special lump - sum death payment .

Maximum Family Amount

There 's a limit to the amount that family members can receive each month .The limit varies ,but it is generally equal to between 150 and 180 percent of the basic benefit rate .
If the sum of the benefits payable to family members is greater than this limit ,the benefits will be reduced proportionately .Any benefits paid to a surviving divorced spouse based on disability or age wo n't count toward this maximum amount .
Get your online or check our Benefit Calculators for an estimate of the benefits your family could receive if you died right now .

Other Things You Need To Know

There are limits on how much survivors may earn while they receive benefits .
Benefits for a widow , widower , or surviving divorced spouse may be affected by several additional factors :
If they remarry
If your widow , widower , or surviving divorced spouse remarries before they reach age 60 age 50 if disabled ,they can not receive benefits as a surviving spouse while they 're married .
If your widow , widower , or surviving divorced spouse remarries after they reach age 60 age 50 if disabled ,they will continue to qualify for benefits on your Social Security record .
However ,if their current spouse is a Social Security beneficiary ,they may want to apply for spouse 's benefits on their record .If that amount is more than the widow 's or widower 's benefit on your record ,they will receive a combination of benefits that equals the higher amount .
If they're eligible for retirement benefits on their own record
If your widow , widower , or surviving divorced spouse receives benefits on your record ,they can switch to their own retirement benefit as early as age 62 .This assumes they 're eligible for retirement benefits and their retirement rate is higher than their rate as a widow , widower , or surviving divorced spouse .
In many cases ,a widow or widower can begin receiving one benefit at a reduced rate and then , at full retirement age , switch to the other benefit at an unreduced rate .
Full retirement age for retirement benefits may not match full retirement age for survivors benefits.
If they will also receive a pension based on work not covered by Social Security
If your widow , widower , or surviving divorced spouse will also receive a pension based on work not covered by Social Security , such as government or foreign work ,their Social Security benefits as a survivor may be affected .
", + "doc_html_raw": "
\n\n
\n\n
\n

Benefits Planner: Survivors | Planning For Your Survivors

\n
\n
\n\n
\n\n
\n\n
\n\n\n
\n\n\n\n
\n
\n

As you plan for the future, you'll want to think about what your family would need if you should die now. Social Security can help your family if you have earned enough Social Security credits through your work.

\n

You can earn up to four credits each year. In 2019, for example, you earn one credit for each $1,360 of wages or self-employment income. When you have earned $5,440, you have earned your four credits for the year.

\n

The number of credits needed to provide benefits for your survivors depends on your age when you die. No one needs more than 40 credits (10 years of work) to be eligible for any Social Security benefit. But, the younger a person is, the fewer credits they must have for family members to receive survivors benefits.

\n

Benefits can be paid to your children and your spouse who is caring for the children even if you don't have the required number of credits. They can get benefits if you have credit for one and one-half years of work (6 credits) in the three years just before your death.

\n
\n
\n

For Your Widow Or Widower

\n

There are about five million widows and widowers receiving monthly Social Security benefits based on their deceased spouse's earnings record. And, for many of those survivors, particularly aged women, those benefits are keeping them out of poverty.

\n

Widows and widowers can receive:

\n
    \n
  • reduced benefits as early as age 60 or full benefits at full retirement age or older.
  • \n
    \n If widows or widowers qualify for retirement benefits on their own record, they can switch to their own retirement benefit as early as age 62.\n
    \n
  • benefits as early as age 50 if they're disabled AND their disability started before or within seven years of your death.
  • \n
    \n If a widow or widower who is caring for your children receives Social Security benefits, they're still eligible if their disability starts before those payments end or within seven years after they end.\n
    \n
  • benefits at any age, if they have not remarried, and if they take care of your child who is under age 16 or disabled and receives benefits on your record.
  • \n
    \n If a widow or widower remarries after they reach age 60 (age 50 if disabled), the remarriage will not affect their eligibility for survivors benefits.\n
    \n
\n
\n Widows, widowers, and surviving divorced spouses cannot apply online for survivors benefits. They should contact Social Security at 1-800-772-1213 (TTY 1-800-325-0778) to request an appointment.\n
\n

If applying for disability benefits on a deceased worker s record, they can speed up the application process if they complete an Adult Disability Report and have it available at the time of their appointment.

\n

We use the same definition of disability for widows and widowers as we do for workers.

\n
\n
\n

For Your Surviving Divorced Spouse

\n

If you have a surviving divorced spouse, they could get the same benefits as your widow or widower provided that your marriage lasted 10 years or more.

\n
\n If your surviving divorced spouse qualifies for retirement benefits on their own record they can switch to their own retirement benefit as early as age 62.\n
\n

Benefits paid to a surviving divorced spouse won't affect the benefit amounts your other survivors will receive based on your earnings record.

\n
\n If your surviving divorced spouse remarries after they reach age 60 (age 50 if disabled), the remarriage will not affect their eligibility for survivors benefits.\n
\n

If your former spouse is caring for your child who is under age 16 or disabled and gets benefits on your record, they will not have to meet the length-of-marriage rule. The child must be your natural or legally adopted child.

\n
\n However, if they qualify for benefits as a surviving divorced mother or father who is caring for your child, their benefits may affect the amount of benefits your other survivors will receive based on your earnings record.\n
\n
\n
\n

For Your Children

\n

Your unmarried children who are under 18 (up to age 19 if attending elementary or secondary school full time) can be eligible to receive Social Security benefits when you die.

\n

And your child can get benefits at any age if they were disabled before age 22 and remain disabled.

\n

Besides your natural children, your stepchildren, grandchildren, step grandchildren or adopted children may receive benefits under certain circumstances. For further information, view our publication.

\n
\n
\n

For Your Parents

\n

You must have been providing at least half of your parent s support and your parent must not be eligible to receive a retirement benefit that is higher than the benefit we could pay on your record. Generally, your parent also must not have married after your death; however, there are some exceptions.

\n

In addition to your natural parent, your stepparent or adoptive parent may receive benefits if they became your parent before you were age 16.

\n
\n
\n

How Much Would Your Survivors Receive

\n

How much your family could receive in benefits depends on your average lifetime earnings. The higher your earnings were, the higher their benefits would be. We calculate a basic amount as if you had reached full retirement age at the time you die.

\n
\n If you are already receiving reduced benefits when you die, survivors benefits are based on that amount.\n
\n

These are examples of monthly benefit payments:

\n
    \n
  • Widow or widower, full retirement age or older 100 percent of your benefit amount;
  • \n
  • Widow or widower, age 60 to full retirement age 71 to 99 percent of your basic amount;
  • \n
  • Disabled widow or widower, age 50 through 59 71 percent;
  • \n
  • Widow or widower, any age, caring for a child under age 16 75 percent;
  • \n
  • A child under age 18 (19 if still in elementary or secondary school) or disabled 75 percent; and
  • \n
  • Your dependent parent(s), age 62 or older:\n
      \n
    • One surviving parent 82 percent.
    • \n
    • Two surviving parents 75 percent to each parent.
    • \n
    \n
  • \n
\n

Percentages for a surviving divorced spouse would be the same as above.

\n

There may also be a special lump-sum death payment.

\n

Maximum Family Amount

\n

There's a limit to the amount that family members can receive each month. The limit varies, but it is generally equal to between 150 and 180 percent of the basic benefit rate.

\n

If the sum of the benefits payable to family members is greater than this limit, the benefits will be reduced proportionately. (Any benefits paid to a surviving divorced spouse based on disability or age won't count toward this maximum amount.)

\n

Get your online or check our Benefit Calculators for an estimate of the benefits your family could receive if you died right now.

\n

Other Things You Need To Know

\n

There are limits on how much survivors may earn while they receive benefits.

\n

Benefits for a widow, widower, or surviving divorced spouse may be affected by several additional factors:

\n
\nIf they remarry\n
\n

If your widow, widower, or surviving divorced spouse remarries before they reach age 60 (age 50 if disabled), they cannot receive benefits as a surviving spouse while they're married.

\n

If your widow, widower, or surviving divorced spouse remarries after they reach age 60 (age 50 if disabled), they will continue to qualify for benefits on your Social Security record.

\n

However, if their current spouse is a Social Security beneficiary, they may want to apply for spouse's benefits on their record. If that amount is more than the widow's or widower's benefit on your record, they will receive a combination of benefits that equals the higher amount.

\n
\n
\n
\nIf they're eligible for retirement benefits on their own record\n
\n

If your widow, widower, or surviving divorced spouse receives benefits on your record, they can switch to their own retirement benefit as early as age 62. This assumes they're eligible for retirement benefits and their retirement rate is higher than their rate as a widow, widower, or surviving divorced spouse.

\n

In many cases, a widow or widower can begin receiving one benefit at a reduced rate and then, at full retirement age, switch to the other benefit at an unreduced rate.

\n
\nFull retirement age for retirement benefits may not match full retirement age for survivors benefits.\n
\n
\n
\n
\nIf they will also receive a pension based on work not covered by Social Security\n
\n

If your widow, widower, or surviving divorced spouse will also receive a pension based on work not covered by Social Security, such as government or foreign work, their Social Security benefits as a survivor may be affected.

\n
\n
\n
\n
\n
\n
\n
" +} +``` + +Sample data instance for `dialogue_domain` : + +``` +{ + "dial_id": "8df07b7a98990db27c395cb1f68a962e", + "domain": "dmv", + "turns": [ + { + "turn_id": 1, + "role": "user", + "da": "query_condition", + "references": [ + { + "id_sp": "4", + "label": "precondition", + "doc_id": "Top 5 DMV Mistakes and How to Avoid Them#3_0" + } + ], + "utterance": "Hello, I forgot o update my address, can you help me with that?" + }, + { + "turn_id": 2, + "role": "agent", + "da": "respond_solution", + "references": [ + { + "id_sp": "6", + "label": "solution", + "doc_id": "Top 5 DMV Mistakes and How to Avoid Them#3_0" + }, + { + "id_sp": "7", + "label": "solution", + "doc_id": "Top 5 DMV Mistakes and How to Avoid Them#3_0" + } + ], + "utterance": "hi, you have to report any change of address to DMV within 10 days after moving. You should do this both for the address associated with your license and all the addresses associated with all your vehicles." + }, + { + "turn_id": 3, + "role": "user", + "da": "query_solution", + "references": [ + { + "id_sp": "56", + "label": "solution", + "doc_id": "Top 5 DMV Mistakes and How to Avoid Them#3_0" + } + ], + "utterance": "Can I do my DMV transactions online?" + } + ] +} +``` + + +### Data Fields + +- `document_domain` contains the documents that are indexed by key `domain` and `doc_id` . Each document instance includes the following, + + - `domain`: the domain of the document; + - `doc_id`: the ID of a document; + - `title`: the title of the document; + - `doc_text`: the text content of the document (without HTML markups); + - `spans`: key-value pairs of all spans in the document, with `id_sp` as key. Each span includes the following, + - `id_sp`: the id of a span as noted by `text_id` in `doc_html_ts`; + - `start_sp`/ `end_sp`: the start/end position of the text span in `doc_text`; + - `text_sp`: the text content of the span. + - `id_sec`: the id of the (sub)section (e.g. `

`) or title (`

`) that contains the span. + - `start_sec` / `end_sec`: the start/end position of the (sub)section in `doc_text`. + - `text_sec`: the text of the (sub)section. + - `title`: the title of the (sub)section. + - `parent_titles`: the parent titles of the `title`. + - `doc_html_ts`: the document content with HTML markups and the annotated spans that are indicated by `text_id` attribute, which corresponds to `id_sp`. + - `doc_html_raw`: the document content with HTML markups and without span annotations. + + +- `dialogue_domain` + + Each dialogue instance includes the following, + + - `dial_id`: the ID of a dialogue; + - `domain`: the domain of the document; + - `turns`: a list of dialogue turns. Each turn includes, + - `turn_id`: the time order of the turn; + - `role`: either "agent" or "user"; + - `da`: dialogue act; + - `references`: a list of spans with `id_sp` , `label` and `doc_id`. `references` is empty if a turn is for indicating previous user query not answerable or irrelevant to the document. **Note** that labels "*precondition*"/"*solution*" are fuzzy annotations that indicate whether a span is for describing a conditional context or a solution. + - `utterance`: the human-generated utterance based on the dialogue scene. + + +- `multidoc2dial` + + Each dialogue instance includes the following, + + - `id`: the ID of a QA instance + - `title`: the title of the relevant document; + - `context`: the text content of the relevant document (without HTML markups). + - `question`: user query; + - `da`: dialogue act; + - `answers`: the answers that are grounded in the associated document; + - `text`: the text content of the grounding span; + - `answer_start`: the start position of the grounding span in the associated document (context); + - `utterance`: the human-generated utterance based on the dialogue scene. + - `domain`: domain of the relevant document; + +### Data Splits + +Training, dev and test split for default configuration `multidoc2dial`, with respectively 21451, 4201 and 5 examples, +- Training & dev split for dialogue domain, with 3474 and 661 examples, +- Training split only for document domain, with 488 examples. + +## Dataset Creation + +### Curation Rationale + +[More Information Needed] + +### Source Data + +#### Initial Data Collection and Normalization + +[More Information Needed] + +#### Who are the source language producers? + +[More Information Needed] + +### Annotations + +#### Annotation process + +[More Information Needed] + +#### Who are the annotators? + +[More Information Needed] + +### Personal and Sensitive Information + +[More Information Needed] + +## Considerations for Using the Data + +### Social Impact of Dataset + +[More Information Needed] + +### Discussion of Biases + +[More Information Needed] + +### Other Known Limitations + +[More Information Needed] + +## Additional Information + +### Dataset Curators + +Song Feng, Siva Sankalp Patel, Hui Wan, Sachindra Joshi + +### Licensing Information + +Creative Commons Attribution 3.0 Unported + +### Citation Information + +```bibtex +@inproceedings{feng2021multidoc2dial, + title={MultiDoc2Dial: Modeling Dialogues Grounded in Multiple Documents}, + author={Feng, Song and Patel, Siva Sankalp and Wan, Hui and Joshi, Sachindra}, + booktitle={EMNLP}, + year={2021} +} +``` + +### Contributions + +Thanks to [@songfeng](https://github.com/songfeng) and [@sivasankalpp](https://github.com/sivasankalpp) for adding this dataset. \ No newline at end of file diff --git a/datasets/multidoc2dial/dataset_infos.json b/datasets/multidoc2dial/dataset_infos.json new file mode 100644 index 00000000000..bb112e6553c --- /dev/null +++ b/datasets/multidoc2dial/dataset_infos.json @@ -0,0 +1 @@ +{"dialogue_domain": {"description": "MultiDoc2Dial is a new task and dataset on modeling goal-oriented dialogues grounded in multiple documents. Most previous works treat document-grounded dialogue modeling as a machine reading comprehension task based on a single given document or passage. We aim to address more realistic scenarios where a goal-oriented information-seeking conversation involves multiple topics, and hence is grounded on different documents. \n", "citation": "@inproceedings{feng2021multidoc2dial,\n title={MultiDoc2Dial: Modeling Dialogues Grounded in Multiple Documents},\n author={Feng, Song and Patel, Siva Sankalp and Wan, Hui and Joshi, Sachindra},\n booktitle={EMNLP},\n year={2021}\n}\n", "homepage": "https://doc2dial.github.io/multidoc2dial/", "license": "", "features": {"dial_id": {"dtype": "string", "id": null, "_type": "Value"}, "domain": {"dtype": "string", "id": null, "_type": "Value"}, "turns": [{"turn_id": {"dtype": "int32", "id": null, "_type": "Value"}, "role": {"dtype": "string", "id": null, "_type": "Value"}, "da": {"dtype": "string", "id": null, "_type": "Value"}, "references": [{"id_sp": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"dtype": "string", "id": null, "_type": "Value"}, "doc_id": {"dtype": "string", "id": null, "_type": "Value"}}], "utterance": {"dtype": "string", "id": null, "_type": "Value"}}]}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "multi_doc2dial", "config_name": "dialogue_domain", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 11700598, "num_examples": 3474, "dataset_name": "multi_doc2dial"}, "validation": {"name": "validation", "num_bytes": 2210378, "num_examples": 661, "dataset_name": "multi_doc2dial"}}, "download_checksums": {"https://doc2dial.github.io/multidoc2dial/file/multidoc2dial.zip": {"num_bytes": 6451144, "checksum": "a8051237dd3be50d81c06aca82ed5171716922e35f44bfa5b9c024f090903419"}}, "download_size": 6451144, "post_processing_size": null, "dataset_size": 13910976, "size_in_bytes": 20362120}, "document_domain": {"description": "MultiDoc2Dial is a new task and dataset on modeling goal-oriented dialogues grounded in multiple documents. Most previous works treat document-grounded dialogue modeling as a machine reading comprehension task based on a single given document or passage. We aim to address more realistic scenarios where a goal-oriented information-seeking conversation involves multiple topics, and hence is grounded on different documents. \n", "citation": "@inproceedings{feng2021multidoc2dial,\n title={MultiDoc2Dial: Modeling Dialogues Grounded in Multiple Documents},\n author={Feng, Song and Patel, Siva Sankalp and Wan, Hui and Joshi, Sachindra},\n booktitle={EMNLP},\n year={2021}\n}\n", "homepage": "https://doc2dial.github.io/multidoc2dial/", "license": "", "features": {"domain": {"dtype": "string", "id": null, "_type": "Value"}, "doc_id": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}, "doc_text": {"dtype": "string", "id": null, "_type": "Value"}, "spans": [{"id_sp": {"dtype": "string", "id": null, "_type": "Value"}, "tag": {"dtype": "string", "id": null, "_type": "Value"}, "start_sp": {"dtype": "int32", "id": null, "_type": "Value"}, "end_sp": {"dtype": "int32", "id": null, "_type": "Value"}, "text_sp": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}, "parent_titles": {"feature": {"id_sp": {"dtype": "string", "id": null, "_type": "Value"}, "text": {"dtype": "string", "id": null, "_type": "Value"}, "level": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "id_sec": {"dtype": "string", "id": null, "_type": "Value"}, "start_sec": {"dtype": "int32", "id": null, "_type": "Value"}, "text_sec": {"dtype": "string", "id": null, "_type": "Value"}, "end_sec": {"dtype": "int32", "id": null, "_type": "Value"}}], "doc_html_ts": {"dtype": "string", "id": null, "_type": "Value"}, "doc_html_raw": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "multi_doc2dial", "config_name": "document_domain", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 29378955, "num_examples": 488, "dataset_name": "multi_doc2dial"}}, "download_checksums": {"https://doc2dial.github.io/multidoc2dial/file/multidoc2dial.zip": {"num_bytes": 6451144, "checksum": "a8051237dd3be50d81c06aca82ed5171716922e35f44bfa5b9c024f090903419"}}, "download_size": 6451144, "post_processing_size": null, "dataset_size": 29378955, "size_in_bytes": 35830099}, "multidoc2dial": {"description": "MultiDoc2Dial is a new task and dataset on modeling goal-oriented dialogues grounded in multiple documents. Most previous works treat document-grounded dialogue modeling as a machine reading comprehension task based on a single given document or passage. We aim to address more realistic scenarios where a goal-oriented information-seeking conversation involves multiple topics, and hence is grounded on different documents. \n", "citation": "@inproceedings{feng2021multidoc2dial,\n title={MultiDoc2Dial: Modeling Dialogues Grounded in Multiple Documents},\n author={Feng, Song and Patel, Siva Sankalp and Wan, Hui and Joshi, Sachindra},\n booktitle={EMNLP},\n year={2021}\n}\n", "homepage": "https://doc2dial.github.io/multidoc2dial/", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}, "context": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "da": {"dtype": "string", "id": null, "_type": "Value"}, "answers": {"feature": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "answer_start": {"dtype": "int32", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "utterance": {"dtype": "string", "id": null, "_type": "Value"}, "domain": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "multi_doc2dial", "config_name": "multidoc2dial", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"validation": {"name": "validation", "num_bytes": 24331976, "num_examples": 4201, "dataset_name": "multi_doc2dial"}, "train": {"name": "train", "num_bytes": 126589982, "num_examples": 21451, "dataset_name": "multi_doc2dial"}, "test": {"name": "test", "num_bytes": 33032, "num_examples": 5, "dataset_name": "multi_doc2dial"}}, "download_checksums": {"https://doc2dial.github.io/multidoc2dial/file/multidoc2dial.zip": {"num_bytes": 6451144, "checksum": "a8051237dd3be50d81c06aca82ed5171716922e35f44bfa5b9c024f090903419"}}, "download_size": 6451144, "post_processing_size": null, "dataset_size": 150954990, "size_in_bytes": 157406134}} \ No newline at end of file diff --git a/datasets/multidoc2dial/dummy/dialogue_domain/1.0.0/dummy_data.zip b/datasets/multidoc2dial/dummy/dialogue_domain/1.0.0/dummy_data.zip new file mode 100644 index 00000000000..a93f8828328 Binary files /dev/null and b/datasets/multidoc2dial/dummy/dialogue_domain/1.0.0/dummy_data.zip differ diff --git a/datasets/multidoc2dial/dummy/document_domain/1.0.0/dummy_data.zip b/datasets/multidoc2dial/dummy/document_domain/1.0.0/dummy_data.zip new file mode 100644 index 00000000000..a93f8828328 Binary files /dev/null and b/datasets/multidoc2dial/dummy/document_domain/1.0.0/dummy_data.zip differ diff --git a/datasets/multidoc2dial/dummy/multidoc2dial/1.0.0/dummy_data.zip b/datasets/multidoc2dial/dummy/multidoc2dial/1.0.0/dummy_data.zip new file mode 100644 index 00000000000..a93f8828328 Binary files /dev/null and b/datasets/multidoc2dial/dummy/multidoc2dial/1.0.0/dummy_data.zip differ diff --git a/datasets/multidoc2dial/multidoc2dial.py b/datasets/multidoc2dial/multidoc2dial.py new file mode 100644 index 00000000000..b2aa112b401 --- /dev/null +++ b/datasets/multidoc2dial/multidoc2dial.py @@ -0,0 +1,364 @@ +# coding=utf-8 +# Copyright 2020 The TensorFlow Datasets Authors and the HuggingFace Datasets Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""MultiDoc2Dial: Modeling Dialogues Grounded in Multiple Documents""" + + +import json +import os + +import datasets + + +logger = datasets.logging.get_logger(__name__) + +_CITATION = """\ +@inproceedings{feng2021multidoc2dial, + title={MultiDoc2Dial: Modeling Dialogues Grounded in Multiple Documents}, + author={Feng, Song and Patel, Siva Sankalp and Wan, Hui and Joshi, Sachindra}, + booktitle={EMNLP}, + year={2021} +} +""" + +_DESCRIPTION = """\ +MultiDoc2Dial is a new task and dataset on modeling goal-oriented dialogues grounded in multiple documents. \ +Most previous works treat document-grounded dialogue modeling as a machine reading comprehension task based on a \ +single given document or passage. We aim to address more realistic scenarios where a goal-oriented information-seeking \ +conversation involves multiple topics, and hence is grounded on different documents. +""" + +_HOMEPAGE = "https://doc2dial.github.io/multidoc2dial/" + + +_URL = "https://doc2dial.github.io/multidoc2dial/file/multidoc2dial.zip" + + +class MultiDoc2dial(datasets.GeneratorBasedBuilder): + """MultiDoc2Dial v1.0""" + + VERSION = datasets.Version("1.0.0") + + BUILDER_CONFIGS = [ + datasets.BuilderConfig( + name="dialogue_domain", + version=VERSION, + description="This part of the dataset covers the dialogue domain that has questions, answers and the associated doc ids", + ), + datasets.BuilderConfig( + name="document_domain", + version=VERSION, + description="This part of the dataset covers the document domain which details all the documents in the various domains", + ), + datasets.BuilderConfig( + name="multidoc2dial", + version=VERSION, + description="Load MultiDoc2Dial dataset for machine reading comprehension tasks", + ), + ] + + DEFAULT_CONFIG_NAME = "multidoc2dial" + + def _info(self): + + if self.config.name == "dialogue_domain": + features = datasets.Features( + { + "dial_id": datasets.Value("string"), + "domain": datasets.Value("string"), + "turns": [ + { + "turn_id": datasets.Value("int32"), + "role": datasets.Value("string"), + "da": datasets.Value("string"), + "references": [ + { + "id_sp": datasets.Value("string"), + "label": datasets.Value("string"), + "doc_id": datasets.Value("string"), + } + ], + "utterance": datasets.Value("string"), + } + ], + } + ) + + elif "document_domain" in self.config.name: + features = datasets.Features( + { + "domain": datasets.Value("string"), + "doc_id": datasets.Value("string"), + "title": datasets.Value("string"), + "doc_text": datasets.Value("string"), + "spans": [ + { + "id_sp": datasets.Value("string"), + "tag": datasets.Value("string"), + "start_sp": datasets.Value("int32"), + "end_sp": datasets.Value("int32"), + "text_sp": datasets.Value("string"), + "title": datasets.Value("string"), + "parent_titles": datasets.features.Sequence( + { + "id_sp": datasets.Value("string"), + "text": datasets.Value("string"), + "level": datasets.Value("string"), + } + ), + "id_sec": datasets.Value("string"), + "start_sec": datasets.Value("int32"), + "text_sec": datasets.Value("string"), + "end_sec": datasets.Value("int32"), + } + ], + "doc_html_ts": datasets.Value("string"), + "doc_html_raw": datasets.Value("string"), + } + ) + + else: + features = datasets.Features( + { + "id": datasets.Value("string"), + "title": datasets.Value("string"), + "context": datasets.Value("string"), + "question": datasets.Value("string"), + "da": datasets.Value("string"), + "answers": datasets.features.Sequence( + { + "text": datasets.Value("string"), + "answer_start": datasets.Value("int32"), + } + ), + "utterance": datasets.Value("string"), + "domain": datasets.Value("string"), + } + ) + + return datasets.DatasetInfo( + description=_DESCRIPTION, + features=features, + supervised_keys=None, + homepage=_HOMEPAGE, + citation=_CITATION, + ) + + def _split_generators(self, dl_manager): + + data_dir = dl_manager.download_and_extract(_URL) + + if self.config.name == "dialogue_domain": + return [ + datasets.SplitGenerator( + name=datasets.Split.TRAIN, + gen_kwargs={ + "filepath": os.path.join(data_dir, "multidoc2dial/multidoc2dial_dial_train.json"), + }, + ), + datasets.SplitGenerator( + name=datasets.Split.VALIDATION, + gen_kwargs={ + "filepath": os.path.join(data_dir, "multidoc2dial/multidoc2dial_dial_validation.json"), + }, + ), + ] + elif self.config.name == "document_domain": + return [ + datasets.SplitGenerator( + name=datasets.Split.TRAIN, + gen_kwargs={ + "filepath": os.path.join(data_dir, "multidoc2dial/multidoc2dial_doc.json"), + }, + ) + ] + elif "multidoc2dial_" in self.config.name: + domain = self.config.name.split("_")[-1] + return [ + datasets.SplitGenerator( + name=datasets.Split.VALIDATION, + gen_kwargs={ + "filepath": os.path.join( + data_dir, + "multidoc2dial_domain", + domain, + "multidoc2dial_dial_validation.json", + ), + }, + ), + datasets.SplitGenerator( + name=datasets.Split.TRAIN, + gen_kwargs={ + "filepath": os.path.join( + data_dir, + "multidoc2dial_domain", + domain, + "multidoc2dial_dial_train.json", + ), + }, + ), + datasets.SplitGenerator( + name=datasets.Split.TEST, + gen_kwargs={ + "filepath": os.path.join( + data_dir, + "multidoc2dial_domain", + domain, + "multidoc2dial_dial_test.json", + ), + }, + ), + ] + elif self.config.name == "multidoc2dial": + return [ + datasets.SplitGenerator( + name=datasets.Split.VALIDATION, + gen_kwargs={ + "filepath": os.path.join(data_dir, "multidoc2dial/multidoc2dial_dial_validation.json"), + }, + ), + datasets.SplitGenerator( + name=datasets.Split.TRAIN, + gen_kwargs={ + "filepath": os.path.join(data_dir, "multidoc2dial/multidoc2dial_dial_train.json"), + }, + ), + datasets.SplitGenerator( + name=datasets.Split.TEST, + gen_kwargs={ + "filepath": os.path.join(data_dir, "multidoc2dial/multidoc2dial_dial_test.json"), + }, + ), + ] + + def _load_doc_data_rc(self, filepath): + doc_filepath = os.path.join(os.path.dirname(filepath), "multidoc2dial_doc.json") + with open(doc_filepath, encoding="utf-8") as f: + data = json.load(f)["doc_data"] + return data + + def _get_answers_rc(self, references, spans, doc_text): + """Obtain the grounding annotation for a given dialogue turn""" + if not references: + return [] + start, end = -1, -1 + ls_sp = [] + for ele in references: + id_sp = ele["id_sp"] + start_sp, end_sp = spans[id_sp]["start_sp"], spans[id_sp]["end_sp"] + if start == -1 or start > start_sp: + start = start_sp + if end < end_sp: + end = end_sp + ls_sp.append(doc_text[start_sp:end_sp]) + answer = {"text": doc_text[start:end], "answer_start": start} + return [answer] + + def _generate_examples(self, filepath): + """This function returns the examples in the raw (text) form.""" + if self.config.name == "dialogue_domain": + logger.info("generating examples from = %s", filepath) + with open(filepath, encoding="utf-8") as f: + data = json.load(f) + for domain in data["dial_data"]: + for dialogue in data["dial_data"][domain]: + x = { + "dial_id": dialogue["dial_id"], + "turns": dialogue["turns"], + "domain": domain, + } + + yield dialogue["dial_id"], x + + elif self.config.name == "document_domain": + + logger.info("generating examples from = %s", filepath) + with open(filepath, encoding="utf-8") as f: + data = json.load(f) + for domain in data["doc_data"]: + for doc_id in data["doc_data"][domain]: + + yield doc_id, { + "domain": domain, + "doc_id": doc_id, + "title": data["doc_data"][domain][doc_id]["title"], + "doc_text": data["doc_data"][domain][doc_id]["doc_text"], + "spans": [ + { + "id_sp": data["doc_data"][domain][doc_id]["spans"][i]["id_sp"], + "tag": data["doc_data"][domain][doc_id]["spans"][i]["tag"], + "start_sp": data["doc_data"][domain][doc_id]["spans"][i]["start_sp"], + "end_sp": data["doc_data"][domain][doc_id]["spans"][i]["end_sp"], + "text_sp": data["doc_data"][domain][doc_id]["spans"][i]["text_sp"], + "title": data["doc_data"][domain][doc_id]["spans"][i]["title"], + "parent_titles": data["doc_data"][domain][doc_id]["spans"][i]["parent_titles"], + "id_sec": data["doc_data"][domain][doc_id]["spans"][i]["id_sec"], + "start_sec": data["doc_data"][domain][doc_id]["spans"][i]["start_sec"], + "text_sec": data["doc_data"][domain][doc_id]["spans"][i]["text_sec"], + "end_sec": data["doc_data"][domain][doc_id]["spans"][i]["end_sec"], + } + for i in data["doc_data"][domain][doc_id]["spans"] + ], + "doc_html_ts": data["doc_data"][domain][doc_id]["doc_html_ts"], + "doc_html_raw": data["doc_data"][domain][doc_id]["doc_html_raw"], + } + + elif "multidoc2dial" in self.config.name: + logger.info("generating examples from = %s", filepath) + doc_data = self._load_doc_data_rc(filepath) + d_doc_data = {} + for domain, d_doc in doc_data.items(): + for doc_id, data in d_doc.items(): + d_doc_data[doc_id] = data + with open(filepath, encoding="utf-8") as f: + dial_data = json.load(f)["dial_data"] + for domain, dialogues in dial_data.items(): + for dial in dialogues: + all_prev_utterances = [] + for idx, turn in enumerate(dial["turns"]): + doc_id = turn["references"][0]["doc_id"] + doc = d_doc_data[doc_id] + utterance_line = turn["utterance"].replace("\n", " ").replace("\t", " ") + all_prev_utterances.append("{}: {}".format(turn["role"], utterance_line)) + if turn["role"] == "agent": + continue + if idx + 1 < len(dial["turns"]): + if ( + dial["turns"][idx + 1]["role"] == "agent" + and dial["turns"][idx + 1]["da"] != "respond_no_solution" + ): + turn_to_predict = dial["turns"][idx + 1] + else: + continue + else: + continue + question_str = utterance_line + "[SEP]" + "||".join(reversed(all_prev_utterances[:-1])) + id_ = "{}_{}".format(dial["dial_id"], turn["turn_id"]) + qa = { + "id": id_, + "title": doc_id, + "context": doc["doc_text"], + "question": question_str, + "da": turn["da"], + "answers": self._get_answers_rc( + turn_to_predict["references"], + doc["spans"], + doc["doc_text"], + ), + "utterance": turn_to_predict["utterance"], + "domain": domain, + } + yield id_, qa