Datasets:
gem_id
string
| gem_parent_id
string
| dart_id
int32
0
30.5k
| tripleset
list
| subtree_was_extended
bool
| target_sources
list
| target
string
| references
list
|
---|---|---|---|---|---|---|---|
"dart-train-0" | "dart-train-0" | 0 | [
[
"First Clearing",
"LOCATION",
"On NYS 52 1 Mi. Youngsville"
],
[
"On NYS 52 1 Mi. Youngsville",
"CITY_OR_TOWN",
"Callicoon, New York"
]
] | false | [
"WikiTableQuestions_mturk"
] | "First Clearing based on Callicoon, New York and location at On NYS 52 1 Mi. Youngsville" | [] |
"dart-train-1" | "dart-train-1" | 1 | [
[
"[TABLECONTEXT]",
"MARKER_NAME",
"Old Turnpike"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"List of New York State Historic Markers in Sullivan County, New York"
]
] | true | [
"WikiTableQuestions_mturk"
] | "Old Turnpike is a Historic Marker in Sullivan County, New York." | [] |
"dart-train-2" | "dart-train-2" | 2 | [
[
"Antalya 15, Turkey",
"SURFACE",
"Hard"
],
[
"Antalya 15, Turkey",
"OPPONENT",
"Tereza Martincová"
],
[
"Antalya 15, Turkey",
"SCORE",
"6-4, 6-3"
],
[
"15 April 2013",
"TOURNAMENT",
"Antalya 15, Turkey"
]
] | false | [
"WikiTableQuestions_mturk"
] | "Tereza Martincová" | [] |
"dart-train-3" | "dart-train-3" | 3 | [
[
"2 April 2012",
"TOURNAMENT",
"Ribeirão Preto, Brazil"
],
[
"Ribeirão Preto, Brazil",
"SURFACE",
"Hard"
]
] | false | [
"WikiTableQuestions_mturk"
] | "Beatriz Haddad Maia played on 2 April 2012 in Ribeirão Preto, Brazil on a hard surface." | [] |
"dart-train-4" | "dart-train-4" | 4 | [
[
"5",
"STADIUM",
"shea stadium"
]
] | false | [
"WikiSQL_decl_sents"
] | "The week 5 game is played in Shea Stadium." | [] |
"dart-train-5" | "dart-train-5" | 5 | [
[
"Northwestern College",
"NICKNAME",
"Red Raiders"
],
[
"Northwestern College",
"LOCATION",
"Orange City, Iowa"
]
] | true | [
"WikiSQL_decl_sents"
] | "The team whose nickname is red raiders is located in the orange city, iowa" | [] |
"dart-train-6" | "dart-train-6" | 6 | [
[
"University of Mississippi",
"NEW_CONFERENCE",
"SELC"
]
] | false | [
"WikiSQL_decl_sents"
] | "University of mississippi was in the selc new conference." | [] |
"dart-train-7" | "dart-train-7" | 7 | [
[
"University of Mississippi",
"NEW_CLASSIFICATION",
"MCLA Division I"
],
[
"University of Mississippi",
"YEARS",
"2008-2009"
]
] | true | [
"WikiSQL_decl_sents"
] | "The years that the new classification was MCLA division i are 2008-2009." | [] |
"dart-train-8" | "dart-train-8" | 8 | [
[
"University of Nebraska at Omaha",
"NICKNAME",
"Mavericks"
]
] | false | [
"WikiSQL_decl_sents"
] | "The nickname of the team of University of Nebraska at Omaha is mavericks." | [] |
"dart-train-9" | "dart-train-9" | 9 | [
[
"William Wasmund",
"FIELD_GOALS",
"0"
],
[
"William Wasmund",
"EXTRA_POINTS",
"0"
],
[
"William Wasmund",
"POINTS",
"5"
],
[
"William Wasmund",
"TOUCHDOWNS",
"1"
]
] | false | [
"WikiSQL_decl_sents"
] | "William Wasmund scored 5.0 points" | [] |
"dart-train-10" | "dart-train-10" | 10 | [
[
"We're Already Here",
"ORIGINAL_AIR_DATE",
"June16,2009"
]
] | false | [
"WikiSQL_lily"
] | "We're Already Here aired on June16,2009" | [] |
"dart-train-11" | "dart-train-11" | 11 | [
[
"She's a Lump",
"DIRECTED_BY",
"Rohn Schmidt"
],
[
"She's a Lump",
"ORIGINAL_AIR_DATE",
"June23,2009"
]
] | false | [
"WikiSQL_lily"
] | "She's a Lump directed by Rohn Schmidt aired on June 23, 2009" | [] |
"dart-train-12" | "dart-train-12" | 12 | [
[
"Watch Siggybaby Burn",
"ORIGINAL_AIR_DATE",
"June30,2009"
],
[
"Watch Siggybaby Burn",
"WRITER(S)",
"Denitria Harris-Lawrence Jessica Mecklenburg"
]
] | false | [
"WikiSQL_lily"
] | "Watch Siggybaby Burn wirtten by Denitria Harris-Lawrence Jessica Mecklenburg aired on June 23, 2010" | [] |
"dart-train-13" | "dart-train-13" | 13 | [
[
"What Would You Do?",
"DIRECTED_BY",
"Tricia Brock"
],
[
"What Would You Do?",
"WRITER(S)",
"Randy Walker"
],
[
"What Would You Do?",
"ORIGINAL_AIR_DATE",
"July7,2009"
]
] | false | [
"WikiSQL_lily"
] | "What Would You Do? directed by Tricia Brock and written by Randy Walker aired on June 23, 2011" | [] |
"dart-train-14" | "dart-train-14" | 14 | [
[
"Mooooooooo",
"WRITER(S)",
"Elle Johnson Annie Brunner"
],
[
"Mooooooooo",
"DIRECTED_BY",
"Artie Mandelberg"
]
] | false | [
"WikiSQL_lily"
] | "Mooooooooo is directed by Artie Mandelberg and written by Elle Johnson Annie Brunner" | [] |
"dart-train-15" | "dart-train-15" | 15 | [
[
"hear the birds",
"SEASON_3_EP_#",
"12"
]
] | false | [
"WikiSQL_decl_sents"
] | "The title of season 3 episode 12 is "Hear the Birds."" | [] |
"dart-train-16" | "dart-train-16" | 16 | [
[
"5",
"BRONZE",
"1"
],
[
"5",
"GOLD",
"3"
],
[
"Russia (RUS)",
"TOTAL",
"5"
]
] | false | [
"WikiTableQuestions_mturk"
] | "Biathlon at the 1994 Winter Olympics" | [] |
"dart-train-17" | "dart-train-17" | 17 | [
[
"France (FRA)",
"TOTAL",
"3"
],
[
"France (FRA)",
"RANK",
"4"
]
] | false | [
"WikiTableQuestions_mturk"
] | "Biathlon at the 1994 Winter Olympics rank 4 nations in France (FRA)" | [] |
"dart-train-18" | "dart-train-18" | 18 | [
[
"show 78",
"GARFIELD_EPISODE_1",
"the first annual garfield watchers test"
],
[
"show 78",
"GARFIELD_EPISODE_2",
"the record breaker"
]
] | true | [
"WikiSQL_decl_sents"
] | "The record breaker has Garfield episode 1 as the first annual Garfield watchers test." | [] |
"dart-train-19" | "dart-train-19" | 19 | [
[
"show 86",
"ORIGINAL_AIR_DATE",
"october 31, 1992"
],
[
"show 86",
"U.S._ACRES_EPISODE",
"who done it?"
]
] | true | [
"WikiSQL_decl_sents"
] | "The original air date of the u.s. acres episode who done it is october 31, 1992." | [] |
"dart-train-20" | "dart-train-20" | 20 | [
[
"show 84",
"GARFIELD_EPISODE_1",
"the worst pizza in the history of mankind"
]
] | false | [
"WikiSQL_decl_sents"
] | "In Garfield Episode 1, The worst pizza in the history of mankind is "show 84"." | [] |
"dart-train-21" | "dart-train-21" | 21 | [
[
"1981",
"POPULATION_OF_ENGLAND_AND_WALES_000",
"49634"
],
[
"1981",
"MUSLIM_(%_OF_TOTAL_POPULATION)",
"1.11"
]
] | true | [
"WikiSQL_decl_sents"
] | "The Muslim percentage of the 49634 population of England and Wales is 1.11." | [] |
"dart-train-22" | "dart-train-22" | 22 | [
[
"2001",
"MUSLIM_(%_OF_TOTAL_POPULATION)",
"3.07"
],
[
"2001",
"REGISTERED_MOSQUES",
"614"
]
] | true | [
"WikiSQL_decl_sents"
] | "The percentage of Muslims during a time where there were 614 registered mosques was 3.07." | [] |
"dart-train-23" | "dart-train-23" | 23 | [
[
"USL W-League",
"DIVISION",
"2"
],
[
"2003",
"LEAGUE",
"USL W-League"
],
[
"[TABLECONTEXT]",
"YEAR",
"2003"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Colorado Rapids Women"
]
] | true | [
"WikiTableQuestions_lily"
] | "The Colorado Rapids Women was a team in the USL W-League 2nd Division in 2003." | [] |
"dart-train-24" | "dart-train-24" | 24 | [
[
"2005",
"REGULAR_SEASON",
"6th, Western"
],
[
"2005",
"LEAGUE",
"USL W-League"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Colorado Rapids Women"
],
[
"[TABLECONTEXT]",
"YEAR",
"2005"
]
] | true | [
"WikiTableQuestions_lily"
] | "The USL W-League team Colorado Rapids finished the 2005 regular season 6th in the Western conference." | [] |
"dart-train-25" | "dart-train-25" | 25 | [
[
"2008",
"PLAYOFFS",
"Did not qualify"
],
[
"[TABLECONTEXT]",
"YEAR",
"2008"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Colorado Rapids Women"
],
[
"2008",
"LEAGUE",
"USL W-League"
]
] | true | [
"WikiTableQuestions_lily"
] | "The Colorado Rapids did not qualify the playoffs for USL W-League in 2008." | [] |
"dart-train-26" | "dart-train-26" | 26 | [
[
"2010",
"PLAYOFFS",
"Did not qualify"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Colorado Rapids Women"
],
[
"2010",
"REGULAR_SEASON",
"6th, Western"
],
[
"[TABLECONTEXT]",
"YEAR",
"2010"
]
] | true | [
"WikiTableQuestions_lily"
] | "Despite finishing 6th in the Western Conference, the Colorado Rapids did not qualify playoffs in 2010." | [] |
"dart-train-27" | "dart-train-27" | 27 | [
[
"2013",
"REGULAR_SEASON",
"4th, Western"
],
[
"2013",
"LEAGUE",
"USL W-League"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Colorado Rapids Women"
],
[
"[TABLECONTEXT]",
"YEAR",
"2013"
],
[
"USL W-League",
"DIVISION",
"1"
]
] | true | [
"WikiTableQuestions_lily"
] | "The division 1 team Colorado Rapids wrapped up the regular season 4th place in the Western Conference in the USL W-League." | [] |
"dart-train-28" | "dart-train-28" | 28 | [
[
"[TABLECONTEXT]",
"YEAR",
"2004"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Colorado Rapids Women"
],
[
"2004",
"LEAGUE",
"USL W-League"
],
[
"USL W-League",
"DIVISION",
"1"
]
] | true | [
"WikiTableQuestions_mturk"
] | "The Colorado Rapids Women played in division 1 of the USL W-League in 2004." | [] |
"dart-train-29" | "dart-train-29" | 29 | [
[
"[TABLECONTEXT]",
"[TITLE]",
"Athletics at the 2002 Asian Games"
],
[
"[TABLECONTEXT]",
"COUNTRY",
"India (IND)"
],
[
"India (IND)",
"SILVER",
"6"
],
[
"India (IND)",
"GOLD",
"7"
]
] | true | [
"WikiTableQuestions_mturk"
] | "India won 7 gold medals and 6 silver medals at the 2002 Asian Games." | [] |
"dart-train-30" | "dart-train-30" | 30 | [
[
"[TABLECONTEXT]",
"[TITLE]",
"Athletics at the 2002 Asian Games"
],
[
"[TABLECONTEXT]",
"COUNTRY",
"Kazakhstan (KAZ)"
],
[
"Kazakhstan (KAZ)",
"BRONZE",
"5"
]
] | true | [
"WikiTableQuestions_mturk"
] | "Kazakhstan won 5 Bronze medals at the 2002 Asian Games." | [] |
"dart-train-31" | "dart-train-31" | 31 | [
[
"1999",
"WINNINGTEAM",
"Melbourne Storm (1)"
],
[
"1999",
"CLIVE_CHURCHILL_MEDAL",
"Brett Kimmorley"
]
] | true | [
"WikiSQL_decl_sents"
] | "Brett Kimmorley, who was chosen for the Clive Churchill Medal, belonged to Melbourne Storm." | [] |
"dart-train-32" | "dart-train-32" | 32 | [
[
"Nikolett Listár",
"TIME",
"23.87"
],
[
"23.87",
"RANK",
"10"
],
[
"Nikolett Listár",
"HEAT",
"2"
]
] | false | [
"WikiTableQuestions_mturk"
] | "Nikolett Listár has a 10th rank and 2 heat the time was 23.87" | [] |
"dart-train-33" | "dart-train-33" | 33 | [
[
"[TABLECONTEXT]",
"NAME",
"Thandiwe Nyathy"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Athletics at the 2011 All-Africa Games - Women's 5000 metres"
],
[
"Thandiwe Nyathy",
"RANK",
"9"
]
] | true | [
"WikiTableQuestions_mturk"
] | "In the 2011 All-Africa Games, Thandiwe Nyathy was ranked 9 for the women's 5000 metres." | [] |
"dart-train-34" | "dart-train-34" | 34 | [
[
"Albasty Fossae",
"DIAMETER_(KM)",
"500.0"
],
[
"Albasty Fossae",
"LATITUDE",
"9.0S"
]
] | true | [
"WikiSQL_decl_sents"
] | "You can find the diameter (km) of 500.0 at 9.0s." | [] |
"dart-train-35" | "dart-train-35" | 35 | [
[
"Naijok Fossae",
"LATITUDE",
"70.2S"
]
] | false | [
"WikiSQL_decl_sents"
] | "The name origin of naijok fossae can be found at 70.2s." | [] |
"dart-train-36" | "dart-train-36" | 36 | [
[
"Perunitsa Fossae",
"LATITUDE",
"10.0S"
]
] | false | [
"WikiSQL_decl_sents"
] | "Perunitsa fossae is at latitude 10.0s." | [] |
"dart-train-37" | "dart-train-37" | 37 | [
[
"september 23-24, 2008",
"POLL_SOURCE",
"survey usa"
],
[
"september 23-24, 2008",
"DEMOCRAT:_JAY_NIXON",
"54%"
],
[
"september 23-24, 2008",
"LEAD_MARGIN",
"17"
]
] | true | [
"WikiSQL_decl_sents"
] | "Survey usa's the poll source that claimed the Lead Margin was 17 and the Democrat: Jay Nixon had 54% of the votes." | [] |
"dart-train-38" | "dart-train-38" | 38 | [
[
"[TABLECONTEXT]",
"[TITLE]",
"Chinese FA Super Cup"
],
[
"2003",
"FA_CUP_WINNER",
"Beijing Hyundai (now Beijing Guoan)"
],
[
"[TABLECONTEXT]",
"SEASON",
"2003"
]
] | true | [
"WikiTableQuestions_mturk"
] | "Beijing Hyundai won the Chinese FA Super Cup in the 2003 season." | [] |
"dart-train-39" | "dart-train-39" | 39 | [
[
"Jose Panganiban",
"AREA_KM2",
"214.44"
]
] | false | [
"WikiSQL_decl_sents"
] | "Jose Panganiban has an area of exactly 214.44 sq. km." | [] |
"dart-train-40" | "dart-train-40" | 40 | [
[
"Santa Elena",
"AREA_KM2",
"199.35"
]
] | false | [
"WikiSQL_decl_sents"
] | "The municipality Santa Elena has an area of exactly 199.35 sq. km." | [] |
"dart-train-41" | "dart-train-41" | 41 | [
[
"[TABLECONTEXT]",
"DATE",
"18 March 1987"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Marek Leśniak"
],
[
"18 March 1987",
"COMPETITION",
"International Friendly"
],
[
"International Friendly",
"OPPONENT",
"Finland"
]
] | true | [
"WikiTableQuestions_lily"
] | "Marek Leśniak played Finland on March 18, 1987." | [] |
"dart-train-42" | "dart-train-42" | 42 | [
[
"International Friendly",
"OPPONENT",
"Romania"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Marek Leśniak"
],
[
"International Friendly",
"SCORE",
"1-0"
],
[
"[TABLECONTEXT]",
"DATE",
"2 September 1987"
],
[
"2 September 1987",
"COMPETITION",
"International Friendly"
]
] | true | [
"WikiTableQuestions_lily"
] | "In an international friendly, Marek Leśniak defeated Romania with a score of 1-0." | [] |
"dart-train-43" | "dart-train-43" | 43 | [
[
"International Friendly",
"VENUE",
"Zawisza Bydgoszcz Stadium, Bydgoszcz, Poland"
],
[
"International Friendly",
"OPPONENT",
"Romania"
],
[
"2 September 1987",
"COMPETITION",
"International Friendly"
]
] | true | [
"WikiTableQuestions_lily"
] | "The September 2, 1987 match against Romania was held at Zawisza Bydgoszcz Stadium in Bydgoszcz, Poland." | [] |
"dart-train-44" | "dart-train-44" | 44 | [
[
"13 April 1993",
"COMPETITION",
"International Friendly"
],
[
"International Friendly",
"OPPONENT",
"Finland"
],
[
"International Friendly",
"VENUE",
"Stadion Radomiaka Radom, Radom, Poland"
],
[
"[TABLECONTEXT]",
"DATE",
"13 April 1993"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Marek Leśniak"
]
] | true | [
"WikiTableQuestions_lily"
] | "On April 13, 1993, Marek Leśniak played Finland at the Stadion Radomiaka Radom in Radom, Poland." | [] |
"dart-train-45" | "dart-train-45" | 45 | [
[
"1994 FIFA World Cup qualification",
"OPPONENT",
"San Marino"
],
[
"1994 FIFA World Cup qualification",
"SCORE",
"0-1"
],
[
"[TABLECONTEXT]",
"DATE",
"19 May 1993"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Marek Leśniak"
],
[
"19 May 1993",
"COMPETITION",
"1994 FIFA World Cup qualification"
]
] | true | [
"WikiTableQuestions_lily"
] | "Marek Leśniak lost to San Marino in the 1994 FIFA World Cup qualification." | [] |
"dart-train-46" | "dart-train-46" | 46 | [
[
"23 September 1987",
"COMPETITION",
"UEFA Euro 1988 qualifying"
],
[
"UEFA Euro 1988 qualifying",
"SCORE",
"3-1"
]
] | false | [
"WikiTableQuestions_mturk"
] | "Marek Lesniak scored 3-1 on 23 september 1987 in UEFA Euro 1988 qualifying competition." | [] |
"dart-train-47" | "dart-train-47" | 47 | [
[
"73",
"DATE",
"June 17"
],
[
"June 17",
"SITE/STADIUM",
"Rosenblatt Stadium"
],
[
"June 17",
"SCORE",
"5-3"
],
[
"[TABLECONTEXT]",
"NUMBER",
"73"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"2008 Fresno State Bulldogs baseball team"
],
[
"June 17",
"ATTENDANCE",
"23,314"
],
[
"June 17",
"OPPONENT",
"North Carolina"
]
] | true | [
"WikiTableQuestions_lily"
] | "The Bulldogs beat North Carolina 5-3 in front of more than 23,000 fans at the Rosenblatt Stadium." | [] |
"dart-train-48" | "dart-train-48" | 48 | [
[
"75",
"DATE",
"June 22"
],
[
"June 22",
"OPPONENT",
"North Carolina"
]
] | true | [
"WikiTableQuestions_lily"
] | "Their 75th game will be against North Carolina." | [] |
"dart-train-49" | "dart-train-49" | 49 | [
[
"June 23",
"SCORE",
"7-6"
],
[
"June 23",
"OVERALL_RECORD",
"45-31"
],
[
"June 23",
"OPPONENT",
"Georgia"
]
] | false | [
"WikiTableQuestions_lily"
] | "They lost 7-6 to Georgia on 23 June, bringing their overall record to 45-31." | [] |
"dart-train-50" | "dart-train-50" | 50 | [
[
"June 24",
"ATTENDANCE",
"17,223"
],
[
"June 24",
"SITE/STADIUM",
"Rosenblatt Stadium"
]
] | true | [
"WikiTableQuestions_lily"
] | "17,223 spectators filled the seats of Rosenblatt Stadium." | [] |
"dart-train-51" | "dart-train-51" | 51 | [
[
"June 25",
"NCAAT_RECORD",
"10-4"
],
[
"[TABLECONTEXT]",
"NUMBER",
"78"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"2008 Fresno State Bulldogs baseball team"
],
[
"June 25",
"OVERALL_RECORD",
"47-31"
],
[
"78",
"DATE",
"June 25"
]
] | true | [
"WikiTableQuestions_lily"
] | "Fresno State ended the season 47-31, with a 10-4 record in the NCAA tournament" | [] |
"dart-train-52" | "dart-train-52" | 52 | [
[
"August 2",
"OPPONENT",
"Pirates"
],
[
"[TABLECONTEXT]",
"DATE",
"August 2"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"1994 Philadelphia Phillies season"
],
[
"August 2",
"SCORE",
"2-3"
]
] | true | [
"WikiTableQuestions_lily"
] | "On August 2, 1994, the Phillies lost 2-3 to the Pirates." | [] |
"dart-train-53" | "dart-train-53" | 53 | [
[
"August 6",
"OPPONENT",
"Expos"
],
[
"[TABLECONTEXT]",
"DATE",
"August 6"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"1994 Philadelphia Phillies season"
],
[
"August 6",
"SCORE",
"3-4 (11)"
]
] | true | [
"WikiTableQuestions_lily"
] | "The Expos beat the Phillies 4-3 in 11 innings." | [] |
"dart-train-54" | "dart-train-54" | 54 | [
[
"August 9",
"WIN",
"Curt Schilling (2-8)"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"1994 Philadelphia Phillies season"
],
[
"[TABLECONTEXT]",
"DATE",
"August 9"
]
] | true | [
"WikiTableQuestions_lily"
] | "Curt Schilling pitched for the Phillies on August 9." | [] |
"dart-train-55" | "dart-train-55" | 55 | [
[
"August 11",
"OPPONENT",
"Mets"
],
[
"August 11",
"SCORE",
"2-1 (15)"
],
[
"August 11",
"ATTENDANCE",
"37,605"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"1994 Philadelphia Phillies season"
],
[
"[TABLECONTEXT]",
"DATE",
"August 11"
]
] | true | [
"WikiTableQuestions_lily"
] | "37, 605 fans attended the August 11, 2004 game when the Philadelphia Phillies beat the Mets 2-1 in 15 innings." | [] |
"dart-train-56" | "dart-train-56" | 56 | [
[
"August 8",
"SCORE",
"2-3"
],
[
"August 8",
"WIN",
"Bobby J. Jones (12-7)"
],
[
"August 8",
"ATTENDANCE",
"35,977"
]
] | false | [
"WikiTableQuestions_mturk"
] | "Bobby J .Jones scored 2-3 on August 8, with 35,977 people in attendance." | [] |
"dart-train-57" | "dart-train-57" | 57 | [
[
"August 5",
"WIN",
"Pedro Martínez (10-5)"
],
[
"August 5",
"ATTENDANCE",
"33,642"
]
] | false | [
"WikiTableQuestions_mturk"
] | "Pedro Martinez (10-5) won the August 5 game in front of 33,642 attendees." | [] |
"dart-train-58" | "dart-train-58" | 58 | [
[
"Goemon Ishikawa XIII/Samurai",
"ORIGINAL_JAPANESE",
"Makio Inoue"
]
] | false | [
"WikiSQL_decl_sents"
] | "The character is goemon ishikawa xiii/samurai with makio inoue in original japanese." | [] |
"dart-train-59" | "dart-train-59" | 59 | [
[
"Dance of the Seven Veils",
"GENRE",
"incidental music"
],
[
"Dance of the Seven Veils",
"COMPOSER",
"Granville Bantock"
],
[
"Dance of the Seven Veils",
"NOTES",
"staged London, 1918"
]
] | false | [
"WikiTableQuestions_mturk"
] | "Granville Bantock wrote an incidental music piece entitled "Dance of the Seven Veils" that was first played on stage in London in 1918." | [] |
"dart-train-60" | "dart-train-60" | 60 | [
[
"6",
"RUNNER-UP",
"sergio garcía"
],
[
"6",
"TOURNAMENT",
"volvo masters andalucia"
]
] | true | [
"WikiSQL_decl_sents"
] | "The runner up at the Volvo Masters Andalucia was sergio garcía." | [] |
"dart-train-61" | "dart-train-61" | 61 | [
[
"5",
"TOURNAMENT",
"nordic open"
],
[
"5",
"WINNING_SCORE",
"68-67-65-66=266"
]
] | true | [
"WikiSQL_decl_sents"
] | "The winning score was 68-67-65-66=266 in nordic open." | [] |
"dart-train-62" | "dart-train-62" | 62 | [
[
"Covenant College",
"JOINED",
"2010"
],
[
"[TABLECONTEXT]",
"INSTITUTION",
"Covenant College"
],
[
"Covenant College",
"NICKNAME",
"Scots (men's) Lady Scots (women's)"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Great South Athletic Conference Former members"
]
] | true | [
"WikiSQL_decl_sents"
] | "The nickname of the students of the school that joined the Conference in 2010's scots (men's) lady scots (women's." | [] |
"dart-train-63" | "dart-train-63" | 63 | [
[
"Maryville College",
"NICKNAME",
"Scots"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Great South Athletic Conference Former members"
],
[
"Maryville College",
"LOCATION",
"Maryville, Tennessee"
],
[
"[TABLECONTEXT]",
"INSTITUTION",
"Maryville College"
]
] | true | [
"WikiSQL_decl_sents"
] | "The nickname of the school in Maryville, Tennessee is scots." | [] |
"dart-train-64" | "dart-train-64" | 64 | [
[
"[TABLECONTEXT]",
"[TITLE]",
"Great South Athletic Conference Former members"
],
[
"Stillman College",
"LOCATION",
"Tuscaloosa, Alabama"
],
[
"Stillman College",
"NICKNAME",
"Tigers"
],
[
"[TABLECONTEXT]",
"INSTITUTION",
"Stillman College"
]
] | true | [
"WikiSQL_decl_sents"
] | "The school with nickname Tigers located is in tuscaloosa, alabama." | [] |
"dart-train-65" | "dart-train-65" | 65 | [
[
"Dave Ostlund",
"NATIONALITY",
"United States"
]
] | false | [
"WikiSQL_decl_sents"
] | "Dave ostlund are all the players from the united states." | [] |
"dart-train-66" | "dart-train-66" | 66 | [
[
"Oct 16",
"OPPONENT",
"vs. Toronto Argonauts"
],
[
"Oct 16",
"SCORE",
"27-11"
],
[
"8",
"DATE",
"Oct 16"
]
] | false | [
"WikiTableQuestions_mturk"
] | "The Rough Riders season Opponent for vs. Toronto Argonauts and second week 8 for the date Oct 16. it scored by27–11." | [] |
"dart-train-67" | "dart-train-67" | 67 | [
[
"2",
"DATE",
"Sept 4"
],
[
"Sept 4",
"SCORE",
"21-2"
]
] | false | [
"WikiTableQuestions_mturk"
] | "The Ottawa Rough Riders of the Canadian Football League ended the 1954 season with a record of 2 wins and 12 losses, finishing fourth in the CFL's Interprovincial Rugby Football Union." | [] |
"dart-train-68" | "dart-train-68" | 68 | [
[
"RBMK Reactors",
"GROSS_CAPACITY_(MW)",
"1000"
],
[
"RBMK Reactors",
"REACTOR_TYPE",
"RBMK-1000"
],
[
"RBMK Reactors",
"LOCATION_CHERNOBYL_1_CHERNOBYL_2_CHERNOBYL_3_CHERNOBYL_4_CHERNOBYL_5_IGNALINA_1_IGNALINA_2_IGNALINA_3_KURSK_1_KURSK_2_KURSK_3_KURSK_4_KURSK_5_KURSK_6_LENINGRAD_1_LENINGRAD_2_LENINGRAD_3_LENINGRAD_4_SMOLENSK_1_SMOLENSK_2_SMOLENSK_3_SMOLENSK_4_DIRECTORATE_FOR_CONSTRUCTION_OF_KOSTOMA_NPP_(FOR_KOSTROMA_1_AND_2)_TABLE_31._TECHNOLOGY_AND_SOVIET_ENERGY_AVAILABILITY_-_NOVEMBER_1981_-_NTIS_ORDER_#PB82-133455_(FOR_IGNALINA_4)",
"Chernobyl-5"
],
[
"RBMK Reactors",
"STATUS",
"construction cancelled in 1988"
],
[
"RBMK Reactors",
"NET_CAPACITY_(MW)",
"950"
]
] | true | [
"WikiSQL_decl_sents"
] | "List all the locations chernobyl - 5 chernobyl- 6 net capacity is 950." | [] |
"dart-train-69" | "dart-train-69" | 69 | [
[
"2006",
"COMPETITION",
"Commonwealth Games"
],
[
"[TABLECONTEXT]",
"YEAR",
"2006"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Olivia McKoy"
]
] | true | [
"WikiTableQuestions_mturk"
] | "Olivia McKoy got 3rd in the Javelin throw at the 2006 Commonwealth Games." | [] |
"dart-train-70" | "dart-train-70" | 70 | [
[
"Manuel Poggiali",
"GRID",
"1"
]
] | false | [
"WikiTableQuestions_mturk"
] | "Rider Manuel Poggiali has a grid value 1." | [] |
"dart-train-71" | "dart-train-71" | 71 | [
[
"[TABLECONTEXT]",
"RIDER",
"Steve Jenkner"
],
[
"Steve Jenkner",
"POSITION",
"3"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"2002 Catalan motorcycle Grand Prix"
],
[
"Steve Jenkner",
"GRID",
"3"
]
] | true | [
"WikiTableQuestions_mturk"
] | "Steve Jenker, at the 2002 Catalan motorcycle Grand Prix, was position 3 and grid 3. " | [] |
"dart-train-72" | "dart-train-72" | 72 | [
[
"Mirko Giansanti",
"GRID",
"16"
],
[
"Mirko Giansanti",
"TIME/RETIRED",
"+22.839"
]
] | false | [
"WikiTableQuestions_mturk"
] | "2002 Catalan motorcycle Grand Prix Rider Mirko Giansanti Time/Retired +22.839 Grid is 16." | [] |
"dart-train-73" | "dart-train-73" | 73 | [
[
"Pioneer Square U",
"TRANSIT_CONNECTIONS",
"Metro , RapidRide , ST Express , Community Transit"
]
] | false | [
"WikiSQL_decl_sents"
] | "The transit connections from Pioneer Square U are metro, rapidride, st express, community transit." | [] |
"dart-train-74" | "dart-train-74" | 74 | [
[
"Columbia City",
"TRANSIT_CONNECTIONS",
"Metro"
],
[
"Columbia City",
"CITY/NEIGHBORHOOD",
"Columbia City, Seattle"
]
] | true | [
"WikiSQL_decl_sents"
] | "The transit connection in Columbia City, Seattle is metro." | [] |
"dart-train-75" | "dart-train-75" | 75 | [
[
"July 1",
"ATTENDANCE",
"21,004"
],
[
"[TABLECONTEXT]",
"DATE",
"July 1"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"1982 Toronto Blue Jays season"
],
[
"July 1",
"OPPONENT",
"Mariners"
]
] | true | [
"WikiTableQuestions_lily"
] | "21,004 people attended the July 1 game between the Mariners and the Toronto Blue Jays." | [] |
"dart-train-76" | "dart-train-76" | 76 | [
[
"[TABLECONTEXT]",
"[TITLE]",
"1982 Toronto Blue Jays season"
],
[
"July 6",
"OPPONENT",
"@ Rangers"
],
[
"July 6",
"SCORE",
"4 - 3"
],
[
"[TABLECONTEXT]",
"DATE",
"July 6"
]
] | true | [
"WikiTableQuestions_lily"
] | "The score was 4 - 3 in the July 6 game between the Rangers and Blue Jays." | [] |
"dart-train-77" | "dart-train-77" | 77 | [
[
"[TABLECONTEXT]",
"DATE",
"July 11"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"1982 Toronto Blue Jays season"
],
[
"July 11",
"OPPONENT",
"White Sox"
]
] | true | [
"WikiTableQuestions_lily"
] | "The White Sox played the Blue Jays on July 10, 1982." | [] |
"dart-train-78" | "dart-train-78" | 78 | [
[
"July 18",
"ATTENDANCE",
"15,512"
],
[
"July 18",
"OPPONENT",
"Rangers"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"1982 Toronto Blue Jays season"
],
[
"[TABLECONTEXT]",
"DATE",
"July 18"
]
] | true | [
"WikiTableQuestions_lily"
] | "15,512 people attended the July 18, 1982 game between the Rangers and Blue Jays." | [] |
"dart-train-79" | "dart-train-79" | 79 | [
[
"[TABLECONTEXT]",
"[TITLE]",
"1982 Toronto Blue Jays season"
],
[
"[TABLECONTEXT]",
"DATE",
"July 30"
],
[
"July 30",
"OPPONENT",
"Tigers"
]
] | true | [
"WikiTableQuestions_lily"
] | "The July 30, 1982 game was between the Tigers and Blue Jays." | [] |
"dart-train-80" | "dart-train-80" | 80 | [
[
"Clapham",
"STARTED",
"20 August"
],
[
"Clapham",
"ENDED",
"20 November"
],
[
"Clapham",
"LOAN_CLUB",
"Wolverhampton Wanderers"
]
] | false | [
"WikiSQL_lily"
] | "Clapham was loaned by the Wolverhampton Wanderers from 20 August to 20 November" | [] |
"dart-train-81" | "dart-train-81" | 81 | [
[
"Clapham",
"P",
"DF"
]
] | false | [
"WikiSQL_decl_sents"
] | "Clapham played position df." | [] |
"dart-train-82" | "dart-train-82" | 82 | [
[
"Andrews",
"COUNTRY",
"ENG"
],
[
"Andrews",
"STARTED",
"1 October"
],
[
"Andrews",
"LOAN_CLUB",
"Coventry City"
],
[
"Andrews",
"ENDED",
"27 October"
]
] | false | [
"WikiSQL_lily"
] | "British player Andrew was loaned by the Coventry City from 1 October to 27 October" | [] |
"dart-train-83" | "dart-train-83" | 83 | [
[
"de Vries",
"LOAN_CLUB",
"Leicester City"
],
[
"de Vries",
"AGE",
"32"
],
[
"de Vries",
"P",
"FW"
]
] | false | [
"WikiSQL_lily"
] | "32 year-old FW de Vries was loaned by the Leicester City" | [] |
"dart-train-84" | "dart-train-84" | 84 | [
[
"de Vries",
"STARTED",
"1 October"
]
] | false | [
"WikiSQL_decl_sents"
] | "De Vries started on October 1." | [] |
"dart-train-85" | "dart-train-85" | 85 | [
[
"Kishishev",
"ENDED",
"23 January"
],
[
"Kishishev",
"STARTED",
"23 October"
],
[
"Kishishev",
"COUNTRY",
"BUL"
]
] | false | [
"WikiSQL_lily"
] | "Bulgarian player Kishishev's loan started on 23 October and ended on 23 January" | [] |
"dart-train-86" | "dart-train-86" | 86 | [
[
"Ameobi",
"COUNTRY",
"ENG"
],
[
"Ameobi",
"P",
"FW"
],
[
"Ameobi",
"AGE",
"19"
],
[
"Ameobi",
"ENDED",
"4 May"
],
[
"Ameobi",
"STARTED",
"15 November"
],
[
"Ameobi",
"LOAN_CLUB",
"Scunthorpe United"
]
] | false | [
"WikiSQL_lily"
] | "19 year-old British FW Ameobi was loaned by the Scunthorpe United from 15 November to 4 May" | [] |
"dart-train-87" | "dart-train-87" | 87 | [
[
"[TABLECONTEXT]",
"[TITLE]",
"United States Ambassador to Mexico"
],
[
"William Shaler",
"TO",
"1812"
],
[
"[TABLECONTEXT]",
"REPRESENTATIVE",
"William Shaler"
],
[
"William Shaler",
"FROM",
"1810"
]
] | true | [
"WikiTableQuestions_lily"
] | "William Shaler was the United States Ambassador to Mexico from 1810 to 1812." | [] |
"dart-train-88" | "dart-train-88" | 88 | [
[
"John H. Robinson",
"APPOINTED_BY",
"James Madison"
],
[
"John H. Robinson",
"TITLE",
"Special Diplomatic Agent"
]
] | true | [
"WikiTableQuestions_lily"
] | "James Madison appointed John H. Robinson as Special Diplomatic Agent." | [] |
"dart-train-89" | "dart-train-89" | 89 | [
[
"[TABLECONTEXT]",
"REPRESENTATIVE",
"William A. Slacum"
],
[
"William A. Slacum",
"FROM",
"1835"
],
[
"William A. Slacum",
"TO",
"1836"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"United States Ambassador to Mexico"
],
[
"William A. Slacum",
"TITLE",
"Special Diplomatic Agent"
]
] | true | [
"WikiTableQuestions_lily"
] | "From 1835 to 1836, William A. Slacum was the Special Diplomatic Agent serving as the United States Ambassador to Mexico." | [] |
"dart-train-90" | "dart-train-90" | 90 | [
[
"Powhatan Ellis",
"TITLE",
"Minister"
],
[
"Powhatan Ellis",
"APPOINTED_BY",
"Martin Van Buren"
],
[
"Powhatan Ellis",
"TO",
"1842"
],
[
"Powhatan Ellis",
"FROM",
"1839"
]
] | false | [
"WikiTableQuestions_lily"
] | "Powhatan Ellis was a minister who served under Martin Van Burenf rom 1839 to 1842." | [] |
"dart-train-91" | "dart-train-91" | 91 | [
[
"[TABLECONTEXT]",
"[TITLE]",
"United States Ambassador to Mexico"
],
[
"Moses Yale Beach",
"APPOINTED_BY",
"John Tyler"
],
[
"[TABLECONTEXT]",
"REPRESENTATIVE",
"Moses Yale Beach"
]
] | true | [
"WikiTableQuestions_lily"
] | "Moses Yale Beach served as the United States Ambassador to Mexico under John Tyler." | [] |
"dart-train-92" | "dart-train-92" | 92 | [
[
"Anthony Butler",
"FROM",
"1829"
],
[
"Anthony Butler",
"TITLE",
"Special Diplomatic Agent"
]
] | false | [
"WikiTableQuestions_mturk"
] | "Anthony Butler was given the title of Special Diplomatic Agent." | [] |
"dart-train-93" | "dart-train-93" | 93 | [
[
"3yo Maiden",
"GROUP",
"NA"
],
[
"3yo Maiden",
"TIME",
"1:11.65"
]
] | true | [
"WikiSQL_decl_sents"
] | "Na had the time 1:11.65." | [] |
"dart-train-94" | "dart-train-94" | 94 | [
[
"Weekend Hussler 2007-08 Season as a Three Year Old Race Record",
"RESULT",
"10th"
],
[
"Weekend Hussler 2007-08 Season as a Three Year Old Race Record",
"WEIGHT_(KG)",
"51.5"
],
[
"10th",
"RACE",
"Emirates Stakes"
],
[
"Emirates Stakes",
"JOCKEY",
"D. Nikolic"
]
] | true | [
"WikiSQL_decl_sents"
] | "Jockey d. nikolic weighted 51.5 kg" | [] |
"dart-train-95" | "dart-train-95" | 95 | [
[
"Emirates Stakes",
"TIME",
"1:35.98"
],
[
"Emirates Stakes",
"WINNER/2ND",
"1st - Tears I Cry"
]
] | true | [
"WikiSQL_decl_sents"
] | "1st - tears i cry won with a time of 1:35.98." | [] |
"dart-train-96" | "dart-train-96" | 96 | [
[
"[TABLECONTEXT]",
"[TITLE]",
"Athena Chu"
],
[
"To Miss with Love",
"YEAR",
"1992"
],
[
"[TABLECONTEXT]",
"TITLE",
"To Miss with Love"
]
] | true | [
"WikiTableQuestions_lily"
] | "Athena Chu was in the 1992 movie To Miss with Love." | [] |
"dart-train-97" | "dart-train-97" | 97 | [
[
"[TABLECONTEXT]",
"TITLE",
"Shaolin Kung Fu Kids"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Athena Chu"
],
[
"Shaolin Kung Fu Kids",
"ROLE",
"Sister Lam"
]
] | true | [
"WikiTableQuestions_lily"
] | "Sister Lam in Shaolin Kung Fu Kids was played by Athena Chu." | [] |
"dart-train-98" | "dart-train-98" | 98 | [
[
"Step into the Dark",
"YEAR",
"1998"
],
[
"[TABLECONTEXT]",
"TITLE",
"Step into the Dark"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Athena Chu"
],
[
"Step into the Dark",
"ROLE",
"Faith Ching"
]
] | true | [
"WikiTableQuestions_lily"
] | "Athena Chu had the role of Faith Ching in the 1998 movie Step into the Dark." | [] |
"dart-train-99" | "dart-train-99" | 99 | [
[
"Taxi Hunter",
"ROLE",
"Yan"
],
[
"[TABLECONTEXT]",
"[TITLE]",
"Athena Chu"
],
[
"[TABLECONTEXT]",
"TITLE",
"Taxi Hunter"
],
[
"Taxi Hunter",
"YEAR",
"1993"
]
] | true | [
"WikiTableQuestions_mturk"
] | "Athena Chu acted as Yan in Taxi Hunter in 1993" | [] |
Dataset Card for GEM/dart
Link to Main Data Card
You can find the main data card on the GEM Website.
Dataset Summary
DART is an English dataset aggregating multiple other data-to-text dataset in a common triple-based format. The new format is completely flat, thus not requiring a model to learn hierarchical structures, while still retaining the full information.
You can load the dataset via:
import datasets
data = datasets.load_dataset('GEM/dart')
The data loader can be found here.
website
n/a
paper
authors
Linyong Nan, Dragomir Radev, Rui Zhang, Amrit Rau, Abhinand Sivaprasad, Chiachun Hsieh, Xiangru Tang, Aadit Vyas, Neha Verma, Pranav Krishna, Yangxiaokang Liu, Nadia Irwanto, Jessica Pan, Faiaz Rahman, Ahmad Zaidi, Mutethia Mutuma, Yasin Tarabar, Ankit Gupta, Tao Yu, Yi Chern Tan, Xi Victoria Lin, Caiming Xiong, Richard Socher, Nazneen Fatema Rajani
Dataset Overview
Where to find the Data and its Documentation
Download
Paper
BibTex
@inproceedings{nan-etal-2021-dart,
title = "{DART}: Open-Domain Structured Data Record to Text Generation",
author = "Nan, Linyong and
Radev, Dragomir and
Zhang, Rui and
Rau, Amrit and
Sivaprasad, Abhinand and
Hsieh, Chiachun and
Tang, Xiangru and
Vyas, Aadit and
Verma, Neha and
Krishna, Pranav and
Liu, Yangxiaokang and
Irwanto, Nadia and
Pan, Jessica and
Rahman, Faiaz and
Zaidi, Ahmad and
Mutuma, Mutethia and
Tarabar, Yasin and
Gupta, Ankit and
Yu, Tao and
Tan, Yi Chern and
Lin, Xi Victoria and
Xiong, Caiming and
Socher, Richard and
Rajani, Nazneen Fatema",
booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
month = jun,
year = "2021",
address = "Online",
publisher = "Association for Computational Linguistics",
url = "https://aclanthology.org/2021.naacl-main.37",
doi = "10.18653/v1/2021.naacl-main.37",
pages = "432--447",
abstract = "We present DART, an open domain structured DAta Record to Text generation dataset with over 82k instances (DARTs). Data-to-text annotations can be a costly process, especially when dealing with tables which are the major source of structured data and contain nontrivial structures. To this end, we propose a procedure of extracting semantic triples from tables that encodes their structures by exploiting the semantic dependencies among table headers and the table title. Our dataset construction framework effectively merged heterogeneous sources from open domain semantic parsing and spoken dialogue systems by utilizing techniques including tree ontology annotation, question-answer pair to declarative sentence conversion, and predicate unification, all with minimum post-editing. We present systematic evaluation on DART as well as new state-of-the-art results on WebNLG 2017 to show that DART (1) poses new challenges to existing data-to-text datasets and (2) facilitates out-of-domain generalization. Our data and code can be found at https://github.com/Yale-LILY/dart.",
}
Contact Name
Dragomir Radev, Rui Zhang, Nazneen Rajani
Contact Email
{dragomir.radev, r.zhang}@yale.edu, {nazneen.rajani}@salesforce.com
Has a Leaderboard?
yes
Leaderboard Link
Leaderboard Details
Several state-of-the-art table-to-text models were evaluated on DART, such as BART (Lewis et al., 2020), Seq2Seq-Att (MELBOURNE) and End-to-End Transformer (Castro Ferreira et al., 2019). The leaderboard reports BLEU, METEOR, TER, MoverScore, BERTScore and BLEURT scores.
Languages and Intended Use
Multilingual?
no
Covered Dialects
It is an aggregated from multiple other datasets that use general US-American or British English without differentiation between dialects.
Covered Languages
English
Whose Language?
The dataset is aggregated from multiple others that were crowdsourced on different platforms.
License
mit: MIT License
Intended Use
The dataset is aimed to further research in natural language generation from semantic data.
Primary Task
Data-to-Text
Communicative Goal
The speaker is required to produce coherent sentences and construct a trees structured ontology of the column headers.
Credit
Curation Organization Type(s)
academic
, industry
Curation Organization(s)
Yale University, Salesforce Research, Penn State University, The University of Hong Kong, MIT
Dataset Creators
Linyong Nan, Dragomir Radev, Rui Zhang, Amrit Rau, Abhinand Sivaprasad, Chiachun Hsieh, Xiangru Tang, Aadit Vyas, Neha Verma, Pranav Krishna, Yangxiaokang Liu, Nadia Irwanto, Jessica Pan, Faiaz Rahman, Ahmad Zaidi, Mutethia Mutuma, Yasin Tarabar, Ankit Gupta, Tao Yu, Yi Chern Tan, Xi Victoria Lin, Caiming Xiong, Richard Socher, Nazneen Fatema Rajani
Who added the Dataset to GEM?
Miruna Clinciu contributed the original data card and Yacine Jernite wrote the initial data loader. Sebastian Gehrmann migrated the data card and the loader to the new format.
Dataset Structure
Data Fields
-tripleset
: a list of tuples, each tuple has 3 items
-subtree_was_extended
: a boolean variable (true or false)
-annotations
: a list of dict, each with source and text keys.
-source
: a string mentioning the name of the source table.
-text
: a sentence string.
Reason for Structure
The structure is supposed to be able more complex structures beyond "flat" attribute-value pairs, instead encoding hierarchical relationships.
How were labels chosen?
They are a combination of those from existing datasets and new annotations that take advantage of the hierarchical structure
Example Instance
{
"tripleset": [
[
"Ben Mauk",
"High school",
"Kenton"
],
[
"Ben Mauk",
"College",
"Wake Forest Cincinnati"
]
],
"subtree_was_extended": false,
"annotations": [
{
"source": "WikiTableQuestions_lily",
"text": "Ben Mauk, who attended Kenton High School, attended Wake Forest Cincinnati for college."
}
]
}
Data Splits
|Input Unit | Examples | Vocab Size | Words per SR | Sents per SR | Tables | | ------------- | ------------- || ------------- || ------------- || ------------- || ------------- | |Triple Set | 82,191 | 33.2K | 21.6 | 1.5 | 5,623 |
| Train | Dev | Test| | ------------- | ------------- || ------------- | | 62,659 | 6,980 | 12,552|
Statistics of DART decomposed by different collection methods. DART exhibits a great deal of topical variety in terms of the number of unique predicates, the number of unique triples, and the vocabulary size. These statistics are computed from DART v1.1.1; the number of unique predicates reported is post-unification (see Section 3.4). SR: Surface Realization. (details in Table 1 and 2).
Splitting Criteria
For WebNLG 2017 and Cleaned E2E, DART use the original data splits. For the new annotation on WikiTableQuestions and WikiSQL, random splitting will make train, dev, and test splits contain similar tables and similar <triple-set, sentence> examples. They are thus split based on Jaccard similarity such that no training examples has a similarity with a test example of over 0.5
Dataset in GEM
Rationale for Inclusion in GEM
Why is the Dataset in GEM?
DART is a large and open-domain structured DAta Record to Text generation corpus with high-quality sentence annotations with each input being a set of entity-relation triples following a tree-structured ontology.
Similar Datasets
yes
Unique Language Coverage
no
Difference from other GEM datasets
The tree structure is unique among GEM datasets
Ability that the Dataset measures
Reasoning, surface realization
GEM-Specific Curation
Modificatied for GEM?
no
Additional Splits?
no
Getting Started with the Task
Pointers to Resources
Experimental results on DART shows that BART model as the highest performance among three models with a BLEU score of 37.06. This is attributed to BART’s generalization ability due to pretraining (Table 4).
Previous Results
Previous Results
Measured Model Abilities
Reasoning, surface realization
Metrics
BLEU
, MoverScore
, BERT-Score
, BLEURT
Proposed Evaluation
The leaderboard uses the combination of BLEU, METEOR, TER, MoverScore, BERTScore, PARENT and BLEURT to overcome the limitations of the n-gram overlap metrics.
A small scale human annotation of 100 data points was conducted along the dimensions of (1) fluency - a sentence is natural and grammatical, and (2) semantic faithfulness - a sentence is supported by the input triples.
Previous results available?
yes
Other Evaluation Approaches
n/a
Relevant Previous Results
BART currently achieves the best performance according to the leaderboard.
Dataset Curation
Original Curation
Original Curation Rationale
The dataset creators encourage through DART further research in natural language generation from semantic data. DART provides high-quality sentence annotations with each input being a set of entity-relation triples in a tree structure.
Communicative Goal
The speaker is required to produce coherent sentences and construct a trees structured ontology of the column headers.
Sourced from Different Sources
yes
Source Details
- human annotation on open-domain Wikipedia tables from WikiTableQuestions (Pasupat and Liang, 2015) and WikiSQL (Zhong et al., 2017)
- automatic conversion of questions in WikiSQL to declarative sentences
- incorporation of existing datasets including WebNLG 2017 (Gardent et al., 2017a,b; Shimorina and Gardent, 2018) and Cleaned E2E (Novikova et al., 2017b; Dušek et al., 2018, 2019)
Language Data
How was Language Data Obtained?
Found
, Created for the dataset
Where was it found?
Offline media collection
Creation Process
Creators proposed a two-stage annotation process for constructing triple set sentence pairs based on a tree-structured ontology of each table. First, internal skilled annotators denote the parent column for each column header. Then, a larger number of annotators provide a sentential description of an automatically-chosen subset of table cells in a row. To form a triple set sentence pair, the highlighted cells can be converted to a connected triple set automatically according to the column ontology for the given table.
Language Producers
No further information about the MTurk workers has been provided.
Topics Covered
The sub-datasets are from Wikipedia, DBPedia, and artificially created restaurant data.
Data Validation
validated by crowdworker
Was Data Filtered?
not filtered
Structured Annotations
Additional Annotations?
none
Annotation Service?
no
Consent
Any Consent Policy?
no
Justification for Using the Data
The new annotations are based on Wikipedia which is in the public domain and the other two datasets permit reuse (with attribution)
Private Identifying Information (PII)
Contains PII?
no PII
Justification for no PII
None of the datasets talk about individuals
Maintenance
Any Maintenance Plan?
no
Broader Social Context
Previous Work on the Social Impact of the Dataset
Usage of Models based on the Data
no
Impact on Under-Served Communities
Addresses needs of underserved Communities?
no
Discussion of Biases
Any Documented Social Biases?
no
Are the Language Producers Representative of the Language?
No, the annotators are raters on crowdworking platforms and thus only represent their demographics.
Considerations for Using the Data
PII Risks and Liability
Licenses
Copyright Restrictions on the Dataset
open license - commercial use allowed
Copyright Restrictions on the Language Data
open license - commercial use allowed
Known Technical Limitations
Technical Limitations
The dataset may contain some social biases, as the input sentences are based on Wikipedia (WikiTableQuestions, WikiSQL, WebNLG). Studies have shown that the English Wikipedia contains gender biases(Dinan et al., 2020), racial biases([Papakyriakopoulos et al., 2020 (https://dl.acm.org/doi/pdf/10.1145/3351095.3372843)) and geographical bias(Livingstone et al., 2010). More info.
Unsuited Applications
The end-to-end transformer has the lowest performance since the transformer model needs intermediate pipeline planning steps to have higher performance. Similar findings can be found in Castro Ferreira et al., 2019.
- Downloads last month
- 809