gem_id
string
gem_parent_id
string
dart_id
int32
0
30.5k
tripleset
list
subtree_was_extended
bool
target_sources
list
target
string
references
list
"dart-train-0"
"dart-train-0"
0
[ [ "First Clearing", "LOCATION", "On NYS 52 1 Mi. Youngsville" ], [ "On NYS 52 1 Mi. Youngsville", "CITY_OR_TOWN", "Callicoon, New York" ] ]
false
[ "WikiTableQuestions_mturk" ]
"First Clearing based on Callicoon, New York and location at On NYS 52 1 Mi. Youngsville"
[]
"dart-train-1"
"dart-train-1"
1
[ [ "[TABLECONTEXT]", "MARKER_NAME", "Old Turnpike" ], [ "[TABLECONTEXT]", "[TITLE]", "List of New York State Historic Markers in Sullivan County, New York" ] ]
true
[ "WikiTableQuestions_mturk" ]
"Old Turnpike is a Historic Marker in Sullivan County, New York."
[]
"dart-train-2"
"dart-train-2"
2
[ [ "Antalya 15, Turkey", "SURFACE", "Hard" ], [ "Antalya 15, Turkey", "OPPONENT", "Tereza Martincová" ], [ "Antalya 15, Turkey", "SCORE", "6-4, 6-3" ], [ "15 April 2013", "TOURNAMENT", "Antalya 15, Turkey" ] ]
false
[ "WikiTableQuestions_mturk" ]
"Tereza Martincová"
[]
"dart-train-3"
"dart-train-3"
3
[ [ "2 April 2012", "TOURNAMENT", "Ribeirão Preto, Brazil" ], [ "Ribeirão Preto, Brazil", "SURFACE", "Hard" ] ]
false
[ "WikiTableQuestions_mturk" ]
"Beatriz Haddad Maia played on 2 April 2012 in Ribeirão Preto, Brazil on a hard surface."
[]
"dart-train-4"
"dart-train-4"
4
[ [ "5", "STADIUM", "shea stadium" ] ]
false
[ "WikiSQL_decl_sents" ]
"The week 5 game is played in Shea Stadium."
[]
"dart-train-5"
"dart-train-5"
5
[ [ "Northwestern College", "NICKNAME", "Red Raiders" ], [ "Northwestern College", "LOCATION", "Orange City, Iowa" ] ]
true
[ "WikiSQL_decl_sents" ]
"The team whose nickname is red raiders is located in the orange city, iowa"
[]
"dart-train-6"
"dart-train-6"
6
[ [ "University of Mississippi", "NEW_CONFERENCE", "SELC" ] ]
false
[ "WikiSQL_decl_sents" ]
"University of mississippi was in the selc new conference."
[]
"dart-train-7"
"dart-train-7"
7
[ [ "University of Mississippi", "NEW_CLASSIFICATION", "MCLA Division I" ], [ "University of Mississippi", "YEARS", "2008-2009" ] ]
true
[ "WikiSQL_decl_sents" ]
"The years that the new classification was MCLA division i are 2008-2009."
[]
"dart-train-8"
"dart-train-8"
8
[ [ "University of Nebraska at Omaha", "NICKNAME", "Mavericks" ] ]
false
[ "WikiSQL_decl_sents" ]
"The nickname of the team of University of Nebraska at Omaha is mavericks."
[]
"dart-train-9"
"dart-train-9"
9
[ [ "William Wasmund", "FIELD_GOALS", "0" ], [ "William Wasmund", "EXTRA_POINTS", "0" ], [ "William Wasmund", "POINTS", "5" ], [ "William Wasmund", "TOUCHDOWNS", "1" ] ]
false
[ "WikiSQL_decl_sents" ]
"William Wasmund scored 5.0 points"
[]
"dart-train-10"
"dart-train-10"
10
[ [ "We're Already Here", "ORIGINAL_AIR_DATE", "June16,2009" ] ]
false
[ "WikiSQL_lily" ]
"We're Already Here aired on June16,2009"
[]
"dart-train-11"
"dart-train-11"
11
[ [ "She's a Lump", "DIRECTED_BY", "Rohn Schmidt" ], [ "She's a Lump", "ORIGINAL_AIR_DATE", "June23,2009" ] ]
false
[ "WikiSQL_lily" ]
"She's a Lump directed by Rohn Schmidt aired on June 23, 2009"
[]
"dart-train-12"
"dart-train-12"
12
[ [ "Watch Siggybaby Burn", "ORIGINAL_AIR_DATE", "June30,2009" ], [ "Watch Siggybaby Burn", "WRITER(S)", "Denitria Harris-Lawrence Jessica Mecklenburg" ] ]
false
[ "WikiSQL_lily" ]
"Watch Siggybaby Burn wirtten by Denitria Harris-Lawrence Jessica Mecklenburg aired on June 23, 2010"
[]
"dart-train-13"
"dart-train-13"
13
[ [ "What Would You Do?", "DIRECTED_BY", "Tricia Brock" ], [ "What Would You Do?", "WRITER(S)", "Randy Walker" ], [ "What Would You Do?", "ORIGINAL_AIR_DATE", "July7,2009" ] ]
false
[ "WikiSQL_lily" ]
"What Would You Do? directed by Tricia Brock and written by Randy Walker aired on June 23, 2011"
[]
"dart-train-14"
"dart-train-14"
14
[ [ "Mooooooooo", "WRITER(S)", "Elle Johnson Annie Brunner" ], [ "Mooooooooo", "DIRECTED_BY", "Artie Mandelberg" ] ]
false
[ "WikiSQL_lily" ]
"Mooooooooo is directed by Artie Mandelberg and written by Elle Johnson Annie Brunner"
[]
"dart-train-15"
"dart-train-15"
15
[ [ "hear the birds", "SEASON_3_EP_#", "12" ] ]
false
[ "WikiSQL_decl_sents" ]
"The title of season 3 episode 12 is "Hear the Birds.""
[]
"dart-train-16"
"dart-train-16"
16
[ [ "5", "BRONZE", "1" ], [ "5", "GOLD", "3" ], [ "Russia (RUS)", "TOTAL", "5" ] ]
false
[ "WikiTableQuestions_mturk" ]
"Biathlon at the 1994 Winter Olympics"
[]
"dart-train-17"
"dart-train-17"
17
[ [ "France (FRA)", "TOTAL", "3" ], [ "France (FRA)", "RANK", "4" ] ]
false
[ "WikiTableQuestions_mturk" ]
"Biathlon at the 1994 Winter Olympics rank 4 nations in France (FRA)"
[]
"dart-train-18"
"dart-train-18"
18
[ [ "show 78", "GARFIELD_EPISODE_1", "the first annual garfield watchers test" ], [ "show 78", "GARFIELD_EPISODE_2", "the record breaker" ] ]
true
[ "WikiSQL_decl_sents" ]
"The record breaker has Garfield episode 1 as the first annual Garfield watchers test."
[]
"dart-train-19"
"dart-train-19"
19
[ [ "show 86", "ORIGINAL_AIR_DATE", "october 31, 1992" ], [ "show 86", "U.S._ACRES_EPISODE", "who done it?" ] ]
true
[ "WikiSQL_decl_sents" ]
"The original air date of the u.s. acres episode who done it is october 31, 1992."
[]
"dart-train-20"
"dart-train-20"
20
[ [ "show 84", "GARFIELD_EPISODE_1", "the worst pizza in the history of mankind" ] ]
false
[ "WikiSQL_decl_sents" ]
"In Garfield Episode 1, The worst pizza in the history of mankind is "show 84"."
[]
"dart-train-21"
"dart-train-21"
21
[ [ "1981", "POPULATION_OF_ENGLAND_AND_WALES_000", "49634" ], [ "1981", "MUSLIM_(%_OF_TOTAL_POPULATION)", "1.11" ] ]
true
[ "WikiSQL_decl_sents" ]
"The Muslim percentage of the 49634 population of England and Wales is 1.11."
[]
"dart-train-22"
"dart-train-22"
22
[ [ "2001", "MUSLIM_(%_OF_TOTAL_POPULATION)", "3.07" ], [ "2001", "REGISTERED_MOSQUES", "614" ] ]
true
[ "WikiSQL_decl_sents" ]
"The percentage of Muslims during a time where there were 614 registered mosques was 3.07."
[]
"dart-train-23"
"dart-train-23"
23
[ [ "USL W-League", "DIVISION", "2" ], [ "2003", "LEAGUE", "USL W-League" ], [ "[TABLECONTEXT]", "YEAR", "2003" ], [ "[TABLECONTEXT]", "[TITLE]", "Colorado Rapids Women" ] ]
true
[ "WikiTableQuestions_lily" ]
"The Colorado Rapids Women was a team in the USL W-League 2nd Division in 2003."
[]
"dart-train-24"
"dart-train-24"
24
[ [ "2005", "REGULAR_SEASON", "6th, Western" ], [ "2005", "LEAGUE", "USL W-League" ], [ "[TABLECONTEXT]", "[TITLE]", "Colorado Rapids Women" ], [ "[TABLECONTEXT]", "YEAR", "2005" ] ]
true
[ "WikiTableQuestions_lily" ]
"The USL W-League team Colorado Rapids finished the 2005 regular season 6th in the Western conference."
[]
"dart-train-25"
"dart-train-25"
25
[ [ "2008", "PLAYOFFS", "Did not qualify" ], [ "[TABLECONTEXT]", "YEAR", "2008" ], [ "[TABLECONTEXT]", "[TITLE]", "Colorado Rapids Women" ], [ "2008", "LEAGUE", "USL W-League" ] ]
true
[ "WikiTableQuestions_lily" ]
"The Colorado Rapids did not qualify the playoffs for USL W-League in 2008."
[]
"dart-train-26"
"dart-train-26"
26
[ [ "2010", "PLAYOFFS", "Did not qualify" ], [ "[TABLECONTEXT]", "[TITLE]", "Colorado Rapids Women" ], [ "2010", "REGULAR_SEASON", "6th, Western" ], [ "[TABLECONTEXT]", "YEAR", "2010" ] ]
true
[ "WikiTableQuestions_lily" ]
"Despite finishing 6th in the Western Conference, the Colorado Rapids did not qualify playoffs in 2010."
[]
"dart-train-27"
"dart-train-27"
27
[ [ "2013", "REGULAR_SEASON", "4th, Western" ], [ "2013", "LEAGUE", "USL W-League" ], [ "[TABLECONTEXT]", "[TITLE]", "Colorado Rapids Women" ], [ "[TABLECONTEXT]", "YEAR", "2013" ], [ "USL W-League", "DIVISION", "1" ] ]
true
[ "WikiTableQuestions_lily" ]
"The division 1 team Colorado Rapids wrapped up the regular season 4th place in the Western Conference in the USL W-League."
[]
"dart-train-28"
"dart-train-28"
28
[ [ "[TABLECONTEXT]", "YEAR", "2004" ], [ "[TABLECONTEXT]", "[TITLE]", "Colorado Rapids Women" ], [ "2004", "LEAGUE", "USL W-League" ], [ "USL W-League", "DIVISION", "1" ] ]
true
[ "WikiTableQuestions_mturk" ]
"The Colorado Rapids Women played in division 1 of the USL W-League in 2004."
[]
"dart-train-29"
"dart-train-29"
29
[ [ "[TABLECONTEXT]", "[TITLE]", "Athletics at the 2002 Asian Games" ], [ "[TABLECONTEXT]", "COUNTRY", "India (IND)" ], [ "India (IND)", "SILVER", "6" ], [ "India (IND)", "GOLD", "7" ] ]
true
[ "WikiTableQuestions_mturk" ]
"India won 7 gold medals and 6 silver medals at the 2002 Asian Games."
[]
"dart-train-30"
"dart-train-30"
30
[ [ "[TABLECONTEXT]", "[TITLE]", "Athletics at the 2002 Asian Games" ], [ "[TABLECONTEXT]", "COUNTRY", "Kazakhstan (KAZ)" ], [ "Kazakhstan (KAZ)", "BRONZE", "5" ] ]
true
[ "WikiTableQuestions_mturk" ]
"Kazakhstan won 5 Bronze medals at the 2002 Asian Games."
[]
"dart-train-31"
"dart-train-31"
31
[ [ "1999", "WINNINGTEAM", "Melbourne Storm (1)" ], [ "1999", "CLIVE_CHURCHILL_MEDAL", "Brett Kimmorley" ] ]
true
[ "WikiSQL_decl_sents" ]
"Brett Kimmorley, who was chosen for the Clive Churchill Medal, belonged to Melbourne Storm."
[]
"dart-train-32"
"dart-train-32"
32
[ [ "Nikolett Listár", "TIME", "23.87" ], [ "23.87", "RANK", "10" ], [ "Nikolett Listár", "HEAT", "2" ] ]
false
[ "WikiTableQuestions_mturk" ]
"Nikolett Listár has a 10th rank and 2 heat the time was 23.87"
[]
"dart-train-33"
"dart-train-33"
33
[ [ "[TABLECONTEXT]", "NAME", "Thandiwe Nyathy" ], [ "[TABLECONTEXT]", "[TITLE]", "Athletics at the 2011 All-Africa Games - Women's 5000 metres" ], [ "Thandiwe Nyathy", "RANK", "9" ] ]
true
[ "WikiTableQuestions_mturk" ]
"In the 2011 All-Africa Games, Thandiwe Nyathy was ranked 9 for the women's 5000 metres."
[]
"dart-train-34"
"dart-train-34"
34
[ [ "Albasty Fossae", "DIAMETER_(KM)", "500.0" ], [ "Albasty Fossae", "LATITUDE", "9.0S" ] ]
true
[ "WikiSQL_decl_sents" ]
"You can find the diameter (km) of 500.0 at 9.0s."
[]
"dart-train-35"
"dart-train-35"
35
[ [ "Naijok Fossae", "LATITUDE", "70.2S" ] ]
false
[ "WikiSQL_decl_sents" ]
"The name origin of naijok fossae can be found at 70.2s."
[]
"dart-train-36"
"dart-train-36"
36
[ [ "Perunitsa Fossae", "LATITUDE", "10.0S" ] ]
false
[ "WikiSQL_decl_sents" ]
"Perunitsa fossae is at latitude 10.0s."
[]
"dart-train-37"
"dart-train-37"
37
[ [ "september 23-24, 2008", "POLL_SOURCE", "survey usa" ], [ "september 23-24, 2008", "DEMOCRAT:_JAY_NIXON", "54%" ], [ "september 23-24, 2008", "LEAD_MARGIN", "17" ] ]
true
[ "WikiSQL_decl_sents" ]
"Survey usa's the poll source that claimed the Lead Margin was 17 and the Democrat: Jay Nixon had 54% of the votes."
[]
"dart-train-38"
"dart-train-38"
38
[ [ "[TABLECONTEXT]", "[TITLE]", "Chinese FA Super Cup" ], [ "2003", "FA_CUP_WINNER", "Beijing Hyundai (now Beijing Guoan)" ], [ "[TABLECONTEXT]", "SEASON", "2003" ] ]
true
[ "WikiTableQuestions_mturk" ]
"Beijing Hyundai won the Chinese FA Super Cup in the 2003 season."
[]
"dart-train-39"
"dart-train-39"
39
[ [ "Jose Panganiban", "AREA_KM2", "214.44" ] ]
false
[ "WikiSQL_decl_sents" ]
"Jose Panganiban has an area of exactly 214.44 sq. km."
[]
"dart-train-40"
"dart-train-40"
40
[ [ "Santa Elena", "AREA_KM2", "199.35" ] ]
false
[ "WikiSQL_decl_sents" ]
"The municipality Santa Elena has an area of exactly 199.35 sq. km."
[]
"dart-train-41"
"dart-train-41"
41
[ [ "[TABLECONTEXT]", "DATE", "18 March 1987" ], [ "[TABLECONTEXT]", "[TITLE]", "Marek Leśniak" ], [ "18 March 1987", "COMPETITION", "International Friendly" ], [ "International Friendly", "OPPONENT", "Finland" ] ]
true
[ "WikiTableQuestions_lily" ]
"Marek Leśniak played Finland on March 18, 1987."
[]
"dart-train-42"
"dart-train-42"
42
[ [ "International Friendly", "OPPONENT", "Romania" ], [ "[TABLECONTEXT]", "[TITLE]", "Marek Leśniak" ], [ "International Friendly", "SCORE", "1-0" ], [ "[TABLECONTEXT]", "DATE", "2 September 1987" ], [ "2 September 1987", "COMPETITION", "International Friendly" ] ]
true
[ "WikiTableQuestions_lily" ]
"In an international friendly, Marek Leśniak defeated Romania with a score of 1-0."
[]
"dart-train-43"
"dart-train-43"
43
[ [ "International Friendly", "VENUE", "Zawisza Bydgoszcz Stadium, Bydgoszcz, Poland" ], [ "International Friendly", "OPPONENT", "Romania" ], [ "2 September 1987", "COMPETITION", "International Friendly" ] ]
true
[ "WikiTableQuestions_lily" ]
"The September 2, 1987 match against Romania was held at Zawisza Bydgoszcz Stadium in Bydgoszcz, Poland."
[]
"dart-train-44"
"dart-train-44"
44
[ [ "13 April 1993", "COMPETITION", "International Friendly" ], [ "International Friendly", "OPPONENT", "Finland" ], [ "International Friendly", "VENUE", "Stadion Radomiaka Radom, Radom, Poland" ], [ "[TABLECONTEXT]", "DATE", "13 April 1993" ], [ "[TABLECONTEXT]", "[TITLE]", "Marek Leśniak" ] ]
true
[ "WikiTableQuestions_lily" ]
"On April 13, 1993, Marek Leśniak played Finland at the Stadion Radomiaka Radom in Radom, Poland."
[]
"dart-train-45"
"dart-train-45"
45
[ [ "1994 FIFA World Cup qualification", "OPPONENT", "San Marino" ], [ "1994 FIFA World Cup qualification", "SCORE", "0-1" ], [ "[TABLECONTEXT]", "DATE", "19 May 1993" ], [ "[TABLECONTEXT]", "[TITLE]", "Marek Leśniak" ], [ "19 May 1993", "COMPETITION", "1994 FIFA World Cup qualification" ] ]
true
[ "WikiTableQuestions_lily" ]
"Marek Leśniak lost to San Marino in the 1994 FIFA World Cup qualification."
[]
"dart-train-46"
"dart-train-46"
46
[ [ "23 September 1987", "COMPETITION", "UEFA Euro 1988 qualifying" ], [ "UEFA Euro 1988 qualifying", "SCORE", "3-1" ] ]
false
[ "WikiTableQuestions_mturk" ]
"Marek Lesniak scored 3-1 on 23 september 1987 in UEFA Euro 1988 qualifying competition."
[]
"dart-train-47"
"dart-train-47"
47
[ [ "73", "DATE", "June 17" ], [ "June 17", "SITE/STADIUM", "Rosenblatt Stadium" ], [ "June 17", "SCORE", "5-3" ], [ "[TABLECONTEXT]", "NUMBER", "73" ], [ "[TABLECONTEXT]", "[TITLE]", "2008 Fresno State Bulldogs baseball team" ], [ "June 17", "ATTENDANCE", "23,314" ], [ "June 17", "OPPONENT", "North Carolina" ] ]
true
[ "WikiTableQuestions_lily" ]
"The Bulldogs beat North Carolina 5-3 in front of more than 23,000 fans at the Rosenblatt Stadium."
[]
"dart-train-48"
"dart-train-48"
48
[ [ "75", "DATE", "June 22" ], [ "June 22", "OPPONENT", "North Carolina" ] ]
true
[ "WikiTableQuestions_lily" ]
"Their 75th game will be against North Carolina."
[]
"dart-train-49"
"dart-train-49"
49
[ [ "June 23", "SCORE", "7-6" ], [ "June 23", "OVERALL_RECORD", "45-31" ], [ "June 23", "OPPONENT", "Georgia" ] ]
false
[ "WikiTableQuestions_lily" ]
"They lost 7-6 to Georgia on 23 June, bringing their overall record to 45-31."
[]
"dart-train-50"
"dart-train-50"
50
[ [ "June 24", "ATTENDANCE", "17,223" ], [ "June 24", "SITE/STADIUM", "Rosenblatt Stadium" ] ]
true
[ "WikiTableQuestions_lily" ]
"17,223 spectators filled the seats of Rosenblatt Stadium."
[]
"dart-train-51"
"dart-train-51"
51
[ [ "June 25", "NCAAT_RECORD", "10-4" ], [ "[TABLECONTEXT]", "NUMBER", "78" ], [ "[TABLECONTEXT]", "[TITLE]", "2008 Fresno State Bulldogs baseball team" ], [ "June 25", "OVERALL_RECORD", "47-31" ], [ "78", "DATE", "June 25" ] ]
true
[ "WikiTableQuestions_lily" ]
"Fresno State ended the season 47-31, with a 10-4 record in the NCAA tournament"
[]
"dart-train-52"
"dart-train-52"
52
[ [ "August 2", "OPPONENT", "Pirates" ], [ "[TABLECONTEXT]", "DATE", "August 2" ], [ "[TABLECONTEXT]", "[TITLE]", "1994 Philadelphia Phillies season" ], [ "August 2", "SCORE", "2-3" ] ]
true
[ "WikiTableQuestions_lily" ]
"On August 2, 1994, the Phillies lost 2-3 to the Pirates."
[]
"dart-train-53"
"dart-train-53"
53
[ [ "August 6", "OPPONENT", "Expos" ], [ "[TABLECONTEXT]", "DATE", "August 6" ], [ "[TABLECONTEXT]", "[TITLE]", "1994 Philadelphia Phillies season" ], [ "August 6", "SCORE", "3-4 (11)" ] ]
true
[ "WikiTableQuestions_lily" ]
"The Expos beat the Phillies 4-3 in 11 innings."
[]
"dart-train-54"
"dart-train-54"
54
[ [ "August 9", "WIN", "Curt Schilling (2-8)" ], [ "[TABLECONTEXT]", "[TITLE]", "1994 Philadelphia Phillies season" ], [ "[TABLECONTEXT]", "DATE", "August 9" ] ]
true
[ "WikiTableQuestions_lily" ]
"Curt Schilling pitched for the Phillies on August 9."
[]
"dart-train-55"
"dart-train-55"
55
[ [ "August 11", "OPPONENT", "Mets" ], [ "August 11", "SCORE", "2-1 (15)" ], [ "August 11", "ATTENDANCE", "37,605" ], [ "[TABLECONTEXT]", "[TITLE]", "1994 Philadelphia Phillies season" ], [ "[TABLECONTEXT]", "DATE", "August 11" ] ]
true
[ "WikiTableQuestions_lily" ]
"37, 605 fans attended the August 11, 2004 game when the Philadelphia Phillies beat the Mets 2-1 in 15 innings."
[]
"dart-train-56"
"dart-train-56"
56
[ [ "August 8", "SCORE", "2-3" ], [ "August 8", "WIN", "Bobby J. Jones (12-7)" ], [ "August 8", "ATTENDANCE", "35,977" ] ]
false
[ "WikiTableQuestions_mturk" ]
"Bobby J .Jones scored 2-3 on August 8, with 35,977 people in attendance."
[]
"dart-train-57"
"dart-train-57"
57
[ [ "August 5", "WIN", "Pedro Martínez (10-5)" ], [ "August 5", "ATTENDANCE", "33,642" ] ]
false
[ "WikiTableQuestions_mturk" ]
"Pedro Martinez (10-5) won the August 5 game in front of 33,642 attendees."
[]
"dart-train-58"
"dart-train-58"
58
[ [ "Goemon Ishikawa XIII/Samurai", "ORIGINAL_JAPANESE", "Makio Inoue" ] ]
false
[ "WikiSQL_decl_sents" ]
"The character is goemon ishikawa xiii/samurai with makio inoue in original japanese."
[]
"dart-train-59"
"dart-train-59"
59
[ [ "Dance of the Seven Veils", "GENRE", "incidental music" ], [ "Dance of the Seven Veils", "COMPOSER", "Granville Bantock" ], [ "Dance of the Seven Veils", "NOTES", "staged London, 1918" ] ]
false
[ "WikiTableQuestions_mturk" ]
"Granville Bantock wrote an incidental music piece entitled "Dance of the Seven Veils" that was first played on stage in London in 1918."
[]
"dart-train-60"
"dart-train-60"
60
[ [ "6", "RUNNER-UP", "sergio garcía" ], [ "6", "TOURNAMENT", "volvo masters andalucia" ] ]
true
[ "WikiSQL_decl_sents" ]
"The runner up at the Volvo Masters Andalucia was sergio garcía."
[]
"dart-train-61"
"dart-train-61"
61
[ [ "5", "TOURNAMENT", "nordic open" ], [ "5", "WINNING_SCORE", "68-67-65-66=266" ] ]
true
[ "WikiSQL_decl_sents" ]
"The winning score was 68-67-65-66=266 in nordic open."
[]
"dart-train-62"
"dart-train-62"
62
[ [ "Covenant College", "JOINED", "2010" ], [ "[TABLECONTEXT]", "INSTITUTION", "Covenant College" ], [ "Covenant College", "NICKNAME", "Scots (men's) Lady Scots (women's)" ], [ "[TABLECONTEXT]", "[TITLE]", "Great South Athletic Conference Former members" ] ]
true
[ "WikiSQL_decl_sents" ]
"The nickname of the students of the school that joined the Conference in 2010's scots (men's) lady scots (women's."
[]
"dart-train-63"
"dart-train-63"
63
[ [ "Maryville College", "NICKNAME", "Scots" ], [ "[TABLECONTEXT]", "[TITLE]", "Great South Athletic Conference Former members" ], [ "Maryville College", "LOCATION", "Maryville, Tennessee" ], [ "[TABLECONTEXT]", "INSTITUTION", "Maryville College" ] ]
true
[ "WikiSQL_decl_sents" ]
"The nickname of the school in Maryville, Tennessee is scots."
[]
"dart-train-64"
"dart-train-64"
64
[ [ "[TABLECONTEXT]", "[TITLE]", "Great South Athletic Conference Former members" ], [ "Stillman College", "LOCATION", "Tuscaloosa, Alabama" ], [ "Stillman College", "NICKNAME", "Tigers" ], [ "[TABLECONTEXT]", "INSTITUTION", "Stillman College" ] ]
true
[ "WikiSQL_decl_sents" ]
"The school with nickname Tigers located is in tuscaloosa, alabama."
[]
"dart-train-65"
"dart-train-65"
65
[ [ "Dave Ostlund", "NATIONALITY", "United States" ] ]
false
[ "WikiSQL_decl_sents" ]
"Dave ostlund are all the players from the united states."
[]
"dart-train-66"
"dart-train-66"
66
[ [ "Oct 16", "OPPONENT", "vs. Toronto Argonauts" ], [ "Oct 16", "SCORE", "27-11" ], [ "8", "DATE", "Oct 16" ] ]
false
[ "WikiTableQuestions_mturk" ]
"The Rough Riders season Opponent for vs. Toronto Argonauts and second week 8 for the date Oct 16. it scored by27–11."
[]
"dart-train-67"
"dart-train-67"
67
[ [ "2", "DATE", "Sept 4" ], [ "Sept 4", "SCORE", "21-2" ] ]
false
[ "WikiTableQuestions_mturk" ]
"The Ottawa Rough Riders of the Canadian Football League ended the 1954 season with a record of 2 wins and 12 losses, finishing fourth in the CFL's Interprovincial Rugby Football Union."
[]
"dart-train-68"
"dart-train-68"
68
[ [ "RBMK Reactors", "GROSS_CAPACITY_(MW)", "1000" ], [ "RBMK Reactors", "REACTOR_TYPE", "RBMK-1000" ], [ "RBMK Reactors", "LOCATION_CHERNOBYL_1_CHERNOBYL_2_CHERNOBYL_3_CHERNOBYL_4_CHERNOBYL_5_IGNALINA_1_IGNALINA_2_IGNALINA_3_KURSK_1_KURSK_2_KURSK_3_KURSK_4_KURSK_5_KURSK_6_LENINGRAD_1_LENINGRAD_2_LENINGRAD_3_LENINGRAD_4_SMOLENSK_1_SMOLENSK_2_SMOLENSK_3_SMOLENSK_4_DIRECTORATE_FOR_CONSTRUCTION_OF_KOSTOMA_NPP_(FOR_KOSTROMA_1_AND_2)_TABLE_31._TECHNOLOGY_AND_SOVIET_ENERGY_AVAILABILITY_-_NOVEMBER_1981_-_NTIS_ORDER_#PB82-133455_(FOR_IGNALINA_4)", "Chernobyl-5" ], [ "RBMK Reactors", "STATUS", "construction cancelled in 1988" ], [ "RBMK Reactors", "NET_CAPACITY_(MW)", "950" ] ]
true
[ "WikiSQL_decl_sents" ]
"List all the locations chernobyl - 5 chernobyl- 6 net capacity is 950."
[]
"dart-train-69"
"dart-train-69"
69
[ [ "2006", "COMPETITION", "Commonwealth Games" ], [ "[TABLECONTEXT]", "YEAR", "2006" ], [ "[TABLECONTEXT]", "[TITLE]", "Olivia McKoy" ] ]
true
[ "WikiTableQuestions_mturk" ]
"Olivia McKoy got 3rd in the Javelin throw at the 2006 Commonwealth Games."
[]
"dart-train-70"
"dart-train-70"
70
[ [ "Manuel Poggiali", "GRID", "1" ] ]
false
[ "WikiTableQuestions_mturk" ]
"Rider Manuel Poggiali has a grid value 1."
[]
"dart-train-71"
"dart-train-71"
71
[ [ "[TABLECONTEXT]", "RIDER", "Steve Jenkner" ], [ "Steve Jenkner", "POSITION", "3" ], [ "[TABLECONTEXT]", "[TITLE]", "2002 Catalan motorcycle Grand Prix" ], [ "Steve Jenkner", "GRID", "3" ] ]
true
[ "WikiTableQuestions_mturk" ]
"Steve Jenker, at the 2002 Catalan motorcycle Grand Prix, was position 3 and grid 3. "
[]
"dart-train-72"
"dart-train-72"
72
[ [ "Mirko Giansanti", "GRID", "16" ], [ "Mirko Giansanti", "TIME/RETIRED", "+22.839" ] ]
false
[ "WikiTableQuestions_mturk" ]
"2002 Catalan motorcycle Grand Prix Rider Mirko Giansanti Time/Retired +22.839 Grid is 16."
[]
"dart-train-73"
"dart-train-73"
73
[ [ "Pioneer Square U", "TRANSIT_CONNECTIONS", "Metro , RapidRide , ST Express , Community Transit" ] ]
false
[ "WikiSQL_decl_sents" ]
"The transit connections from Pioneer Square U are metro, rapidride, st express, community transit."
[]
"dart-train-74"
"dart-train-74"
74
[ [ "Columbia City", "TRANSIT_CONNECTIONS", "Metro" ], [ "Columbia City", "CITY/NEIGHBORHOOD", "Columbia City, Seattle" ] ]
true
[ "WikiSQL_decl_sents" ]
"The transit connection in Columbia City, Seattle is metro."
[]
"dart-train-75"
"dart-train-75"
75
[ [ "July 1", "ATTENDANCE", "21,004" ], [ "[TABLECONTEXT]", "DATE", "July 1" ], [ "[TABLECONTEXT]", "[TITLE]", "1982 Toronto Blue Jays season" ], [ "July 1", "OPPONENT", "Mariners" ] ]
true
[ "WikiTableQuestions_lily" ]
"21,004 people attended the July 1 game between the Mariners and the Toronto Blue Jays."
[]
"dart-train-76"
"dart-train-76"
76
[ [ "[TABLECONTEXT]", "[TITLE]", "1982 Toronto Blue Jays season" ], [ "July 6", "OPPONENT", "@ Rangers" ], [ "July 6", "SCORE", "4 - 3" ], [ "[TABLECONTEXT]", "DATE", "July 6" ] ]
true
[ "WikiTableQuestions_lily" ]
"The score was 4 - 3 in the July 6 game between the Rangers and Blue Jays."
[]
"dart-train-77"
"dart-train-77"
77
[ [ "[TABLECONTEXT]", "DATE", "July 11" ], [ "[TABLECONTEXT]", "[TITLE]", "1982 Toronto Blue Jays season" ], [ "July 11", "OPPONENT", "White Sox" ] ]
true
[ "WikiTableQuestions_lily" ]
"The White Sox played the Blue Jays on July 10, 1982."
[]
"dart-train-78"
"dart-train-78"
78
[ [ "July 18", "ATTENDANCE", "15,512" ], [ "July 18", "OPPONENT", "Rangers" ], [ "[TABLECONTEXT]", "[TITLE]", "1982 Toronto Blue Jays season" ], [ "[TABLECONTEXT]", "DATE", "July 18" ] ]
true
[ "WikiTableQuestions_lily" ]
"15,512 people attended the July 18, 1982 game between the Rangers and Blue Jays."
[]
"dart-train-79"
"dart-train-79"
79
[ [ "[TABLECONTEXT]", "[TITLE]", "1982 Toronto Blue Jays season" ], [ "[TABLECONTEXT]", "DATE", "July 30" ], [ "July 30", "OPPONENT", "Tigers" ] ]
true
[ "WikiTableQuestions_lily" ]
"The July 30, 1982 game was between the Tigers and Blue Jays."
[]
"dart-train-80"
"dart-train-80"
80
[ [ "Clapham", "STARTED", "20 August" ], [ "Clapham", "ENDED", "20 November" ], [ "Clapham", "LOAN_CLUB", "Wolverhampton Wanderers" ] ]
false
[ "WikiSQL_lily" ]
"Clapham was loaned by the Wolverhampton Wanderers from 20 August to 20 November"
[]
"dart-train-81"
"dart-train-81"
81
[ [ "Clapham", "P", "DF" ] ]
false
[ "WikiSQL_decl_sents" ]
"Clapham played position df."
[]
"dart-train-82"
"dart-train-82"
82
[ [ "Andrews", "COUNTRY", "ENG" ], [ "Andrews", "STARTED", "1 October" ], [ "Andrews", "LOAN_CLUB", "Coventry City" ], [ "Andrews", "ENDED", "27 October" ] ]
false
[ "WikiSQL_lily" ]
"British player Andrew was loaned by the Coventry City from 1 October to 27 October"
[]
"dart-train-83"
"dart-train-83"
83
[ [ "de Vries", "LOAN_CLUB", "Leicester City" ], [ "de Vries", "AGE", "32" ], [ "de Vries", "P", "FW" ] ]
false
[ "WikiSQL_lily" ]
"32 year-old FW de Vries was loaned by the Leicester City"
[]
"dart-train-84"
"dart-train-84"
84
[ [ "de Vries", "STARTED", "1 October" ] ]
false
[ "WikiSQL_decl_sents" ]
"De Vries started on October 1."
[]
"dart-train-85"
"dart-train-85"
85
[ [ "Kishishev", "ENDED", "23 January" ], [ "Kishishev", "STARTED", "23 October" ], [ "Kishishev", "COUNTRY", "BUL" ] ]
false
[ "WikiSQL_lily" ]
"Bulgarian player Kishishev's loan started on 23 October and ended on 23 January"
[]
"dart-train-86"
"dart-train-86"
86
[ [ "Ameobi", "COUNTRY", "ENG" ], [ "Ameobi", "P", "FW" ], [ "Ameobi", "AGE", "19" ], [ "Ameobi", "ENDED", "4 May" ], [ "Ameobi", "STARTED", "15 November" ], [ "Ameobi", "LOAN_CLUB", "Scunthorpe United" ] ]
false
[ "WikiSQL_lily" ]
"19 year-old British FW Ameobi was loaned by the Scunthorpe United from 15 November to 4 May"
[]
"dart-train-87"
"dart-train-87"
87
[ [ "[TABLECONTEXT]", "[TITLE]", "United States Ambassador to Mexico" ], [ "William Shaler", "TO", "1812" ], [ "[TABLECONTEXT]", "REPRESENTATIVE", "William Shaler" ], [ "William Shaler", "FROM", "1810" ] ]
true
[ "WikiTableQuestions_lily" ]
"William Shaler was the United States Ambassador to Mexico from 1810 to 1812."
[]
"dart-train-88"
"dart-train-88"
88
[ [ "John H. Robinson", "APPOINTED_BY", "James Madison" ], [ "John H. Robinson", "TITLE", "Special Diplomatic Agent" ] ]
true
[ "WikiTableQuestions_lily" ]
"James Madison appointed John H. Robinson as Special Diplomatic Agent."
[]
"dart-train-89"
"dart-train-89"
89
[ [ "[TABLECONTEXT]", "REPRESENTATIVE", "William A. Slacum" ], [ "William A. Slacum", "FROM", "1835" ], [ "William A. Slacum", "TO", "1836" ], [ "[TABLECONTEXT]", "[TITLE]", "United States Ambassador to Mexico" ], [ "William A. Slacum", "TITLE", "Special Diplomatic Agent" ] ]
true
[ "WikiTableQuestions_lily" ]
"From 1835 to 1836, William A. Slacum was the Special Diplomatic Agent serving as the United States Ambassador to Mexico."
[]
"dart-train-90"
"dart-train-90"
90
[ [ "Powhatan Ellis", "TITLE", "Minister" ], [ "Powhatan Ellis", "APPOINTED_BY", "Martin Van Buren" ], [ "Powhatan Ellis", "TO", "1842" ], [ "Powhatan Ellis", "FROM", "1839" ] ]
false
[ "WikiTableQuestions_lily" ]
"Powhatan Ellis was a minister who served under Martin Van Burenf rom 1839 to 1842."
[]
"dart-train-91"
"dart-train-91"
91
[ [ "[TABLECONTEXT]", "[TITLE]", "United States Ambassador to Mexico" ], [ "Moses Yale Beach", "APPOINTED_BY", "John Tyler" ], [ "[TABLECONTEXT]", "REPRESENTATIVE", "Moses Yale Beach" ] ]
true
[ "WikiTableQuestions_lily" ]
"Moses Yale Beach served as the United States Ambassador to Mexico under John Tyler."
[]
"dart-train-92"
"dart-train-92"
92
[ [ "Anthony Butler", "FROM", "1829" ], [ "Anthony Butler", "TITLE", "Special Diplomatic Agent" ] ]
false
[ "WikiTableQuestions_mturk" ]
"Anthony Butler was given the title of Special Diplomatic Agent."
[]
"dart-train-93"
"dart-train-93"
93
[ [ "3yo Maiden", "GROUP", "NA" ], [ "3yo Maiden", "TIME", "1:11.65" ] ]
true
[ "WikiSQL_decl_sents" ]
"Na had the time 1:11.65."
[]
"dart-train-94"
"dart-train-94"
94
[ [ "Weekend Hussler 2007-08 Season as a Three Year Old Race Record", "RESULT", "10th" ], [ "Weekend Hussler 2007-08 Season as a Three Year Old Race Record", "WEIGHT_(KG)", "51.5" ], [ "10th", "RACE", "Emirates Stakes" ], [ "Emirates Stakes", "JOCKEY", "D. Nikolic" ] ]
true
[ "WikiSQL_decl_sents" ]
"Jockey d. nikolic weighted 51.5 kg"
[]
"dart-train-95"
"dart-train-95"
95
[ [ "Emirates Stakes", "TIME", "1:35.98" ], [ "Emirates Stakes", "WINNER/2ND", "1st - Tears I Cry" ] ]
true
[ "WikiSQL_decl_sents" ]
"1st - tears i cry won with a time of 1:35.98."
[]
"dart-train-96"
"dart-train-96"
96
[ [ "[TABLECONTEXT]", "[TITLE]", "Athena Chu" ], [ "To Miss with Love", "YEAR", "1992" ], [ "[TABLECONTEXT]", "TITLE", "To Miss with Love" ] ]
true
[ "WikiTableQuestions_lily" ]
"Athena Chu was in the 1992 movie To Miss with Love."
[]
"dart-train-97"
"dart-train-97"
97
[ [ "[TABLECONTEXT]", "TITLE", "Shaolin Kung Fu Kids" ], [ "[TABLECONTEXT]", "[TITLE]", "Athena Chu" ], [ "Shaolin Kung Fu Kids", "ROLE", "Sister Lam" ] ]
true
[ "WikiTableQuestions_lily" ]
"Sister Lam in Shaolin Kung Fu Kids was played by Athena Chu."
[]
"dart-train-98"
"dart-train-98"
98
[ [ "Step into the Dark", "YEAR", "1998" ], [ "[TABLECONTEXT]", "TITLE", "Step into the Dark" ], [ "[TABLECONTEXT]", "[TITLE]", "Athena Chu" ], [ "Step into the Dark", "ROLE", "Faith Ching" ] ]
true
[ "WikiTableQuestions_lily" ]
"Athena Chu had the role of Faith Ching in the 1998 movie Step into the Dark."
[]
"dart-train-99"
"dart-train-99"
99
[ [ "Taxi Hunter", "ROLE", "Yan" ], [ "[TABLECONTEXT]", "[TITLE]", "Athena Chu" ], [ "[TABLECONTEXT]", "TITLE", "Taxi Hunter" ], [ "Taxi Hunter", "YEAR", "1993" ] ]
true
[ "WikiTableQuestions_mturk" ]
"Athena Chu acted as Yan in Taxi Hunter in 1993"
[]

Dataset Card for GEM/dart

Link to Main Data Card

You can find the main data card on the GEM Website.

Dataset Summary

DART is an English dataset aggregating multiple other data-to-text dataset in a common triple-based format. The new format is completely flat, thus not requiring a model to learn hierarchical structures, while still retaining the full information.

You can load the dataset via:

import datasets
data = datasets.load_dataset('GEM/dart')

The data loader can be found here.

website

n/a

paper

ACL Anthology

authors

Linyong Nan, Dragomir Radev, Rui Zhang, Amrit Rau, Abhinand Sivaprasad, Chiachun Hsieh, Xiangru Tang, Aadit Vyas, Neha Verma, Pranav Krishna, Yangxiaokang Liu, Nadia Irwanto, Jessica Pan, Faiaz Rahman, Ahmad Zaidi, Mutethia Mutuma, Yasin Tarabar, Ankit Gupta, Tao Yu, Yi Chern Tan, Xi Victoria Lin, Caiming Xiong, Richard Socher, Nazneen Fatema Rajani

Dataset Overview

Where to find the Data and its Documentation

Download

Github

Paper

ACL Anthology

BibTex

@inproceedings{nan-etal-2021-dart,
    title = "{DART}: Open-Domain Structured Data Record to Text Generation",
    author = "Nan, Linyong  and
      Radev, Dragomir  and
      Zhang, Rui  and
      Rau, Amrit  and
      Sivaprasad, Abhinand  and
      Hsieh, Chiachun  and
      Tang, Xiangru  and
      Vyas, Aadit  and
      Verma, Neha  and
      Krishna, Pranav  and
      Liu, Yangxiaokang  and
      Irwanto, Nadia  and
      Pan, Jessica  and
      Rahman, Faiaz  and
      Zaidi, Ahmad  and
      Mutuma, Mutethia  and
      Tarabar, Yasin  and
      Gupta, Ankit  and
      Yu, Tao  and
      Tan, Yi Chern  and
      Lin, Xi Victoria  and
      Xiong, Caiming  and
      Socher, Richard  and
      Rajani, Nazneen Fatema",
    booktitle = "Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies",
    month = jun,
    year = "2021",
    address = "Online",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/2021.naacl-main.37",
    doi = "10.18653/v1/2021.naacl-main.37",
    pages = "432--447",
    abstract = "We present DART, an open domain structured DAta Record to Text generation dataset with over 82k instances (DARTs). Data-to-text annotations can be a costly process, especially when dealing with tables which are the major source of structured data and contain nontrivial structures. To this end, we propose a procedure of extracting semantic triples from tables that encodes their structures by exploiting the semantic dependencies among table headers and the table title. Our dataset construction framework effectively merged heterogeneous sources from open domain semantic parsing and spoken dialogue systems by utilizing techniques including tree ontology annotation, question-answer pair to declarative sentence conversion, and predicate unification, all with minimum post-editing. We present systematic evaluation on DART as well as new state-of-the-art results on WebNLG 2017 to show that DART (1) poses new challenges to existing data-to-text datasets and (2) facilitates out-of-domain generalization. Our data and code can be found at https://github.com/Yale-LILY/dart.",
}

Contact Name

Dragomir Radev, Rui Zhang, Nazneen Rajani

Contact Email

{dragomir.radev, r.zhang}@yale.edu, {nazneen.rajani}@salesforce.com

Has a Leaderboard?

yes

Leaderboard Link

Leaderboard

Leaderboard Details

Several state-of-the-art table-to-text models were evaluated on DART, such as BART (Lewis et al., 2020), Seq2Seq-Att (MELBOURNE) and End-to-End Transformer (Castro Ferreira et al., 2019). The leaderboard reports BLEU, METEOR, TER, MoverScore, BERTScore and BLEURT scores.

Languages and Intended Use

Multilingual?

no

Covered Dialects

It is an aggregated from multiple other datasets that use general US-American or British English without differentiation between dialects.

Covered Languages

English

Whose Language?

The dataset is aggregated from multiple others that were crowdsourced on different platforms.

License

mit: MIT License

Intended Use

The dataset is aimed to further research in natural language generation from semantic data.

Primary Task

Data-to-Text

Communicative Goal

The speaker is required to produce coherent sentences and construct a trees structured ontology of the column headers.

Credit

Curation Organization Type(s)

academic, industry

Curation Organization(s)

Yale University, Salesforce Research, Penn State University, The University of Hong Kong, MIT

Dataset Creators

Linyong Nan, Dragomir Radev, Rui Zhang, Amrit Rau, Abhinand Sivaprasad, Chiachun Hsieh, Xiangru Tang, Aadit Vyas, Neha Verma, Pranav Krishna, Yangxiaokang Liu, Nadia Irwanto, Jessica Pan, Faiaz Rahman, Ahmad Zaidi, Mutethia Mutuma, Yasin Tarabar, Ankit Gupta, Tao Yu, Yi Chern Tan, Xi Victoria Lin, Caiming Xiong, Richard Socher, Nazneen Fatema Rajani

Who added the Dataset to GEM?

Miruna Clinciu contributed the original data card and Yacine Jernite wrote the initial data loader. Sebastian Gehrmann migrated the data card and the loader to the new format.

Dataset Structure

Data Fields

-tripleset: a list of tuples, each tuple has 3 items -subtree_was_extended: a boolean variable (true or false) -annotations: a list of dict, each with source and text keys. -source: a string mentioning the name of the source table. -text: a sentence string.

Reason for Structure

The structure is supposed to be able more complex structures beyond "flat" attribute-value pairs, instead encoding hierarchical relationships.

How were labels chosen?

They are a combination of those from existing datasets and new annotations that take advantage of the hierarchical structure

Example Instance

 {
    "tripleset": [
      [
        "Ben Mauk",
        "High school",
        "Kenton"
      ],
      [
        "Ben Mauk",
        "College",
        "Wake Forest Cincinnati"
      ]
    ],
    "subtree_was_extended": false,
    "annotations": [
      {
        "source": "WikiTableQuestions_lily",
        "text": "Ben Mauk, who attended Kenton High School, attended Wake Forest Cincinnati for college."
      }
    ]
  }

Data Splits

|Input Unit | Examples | Vocab Size | Words per SR | Sents per SR | Tables | | ------------- | ------------- || ------------- || ------------- || ------------- || ------------- | |Triple Set | 82,191 | 33.2K | 21.6 | 1.5 | 5,623 |

| Train | Dev | Test| | ------------- | ------------- || ------------- | | 62,659 | 6,980 | 12,552|

Statistics of DART decomposed by different collection methods. DART exhibits a great deal of topical variety in terms of the number of unique predicates, the number of unique triples, and the vocabulary size. These statistics are computed from DART v1.1.1; the number of unique predicates reported is post-unification (see Section 3.4). SR: Surface Realization. (details in Table 1 and 2).

Splitting Criteria

For WebNLG 2017 and Cleaned E2E, DART use the original data splits. For the new annotation on WikiTableQuestions and WikiSQL, random splitting will make train, dev, and test splits contain similar tables and similar <triple-set, sentence> examples. They are thus split based on Jaccard similarity such that no training examples has a similarity with a test example of over 0.5

Dataset in GEM

Rationale for Inclusion in GEM

Why is the Dataset in GEM?

DART is a large and open-domain structured DAta Record to Text generation corpus with high-quality sentence annotations with each input being a set of entity-relation triples following a tree-structured ontology.

Similar Datasets

yes

Unique Language Coverage

no

Difference from other GEM datasets

The tree structure is unique among GEM datasets

Ability that the Dataset measures

Reasoning, surface realization

GEM-Specific Curation

Modificatied for GEM?

no

Additional Splits?

no

Getting Started with the Task

Pointers to Resources

Experimental results on DART shows that BART model as the highest performance among three models with a BLEU score of 37.06. This is attributed to BART’s generalization ability due to pretraining (Table 4).

Previous Results

Previous Results

Measured Model Abilities

Reasoning, surface realization

Metrics

BLEU, MoverScore, BERT-Score, BLEURT

Proposed Evaluation

The leaderboard uses the combination of BLEU, METEOR, TER, MoverScore, BERTScore, PARENT and BLEURT to overcome the limitations of the n-gram overlap metrics.
A small scale human annotation of 100 data points was conducted along the dimensions of (1) fluency - a sentence is natural and grammatical, and (2) semantic faithfulness - a sentence is supported by the input triples.

Previous results available?

yes

Other Evaluation Approaches

n/a

Relevant Previous Results

BART currently achieves the best performance according to the leaderboard.

Dataset Curation

Original Curation

Original Curation Rationale

The dataset creators encourage through DART further research in natural language generation from semantic data. DART provides high-quality sentence annotations with each input being a set of entity-relation triples in a tree structure.

Communicative Goal

The speaker is required to produce coherent sentences and construct a trees structured ontology of the column headers.

Sourced from Different Sources

yes

Source Details

Language Data

How was Language Data Obtained?

Found, Created for the dataset

Where was it found?

Offline media collection

Creation Process

Creators proposed a two-stage annotation process for constructing triple set sentence pairs based on a tree-structured ontology of each table. First, internal skilled annotators denote the parent column for each column header. Then, a larger number of annotators provide a sentential description of an automatically-chosen subset of table cells in a row. To form a triple set sentence pair, the highlighted cells can be converted to a connected triple set automatically according to the column ontology for the given table.

Language Producers

No further information about the MTurk workers has been provided.

Topics Covered

The sub-datasets are from Wikipedia, DBPedia, and artificially created restaurant data.

Data Validation

validated by crowdworker

Was Data Filtered?

not filtered

Structured Annotations

Additional Annotations?

none

Annotation Service?

no

Consent

Any Consent Policy?

no

Justification for Using the Data

The new annotations are based on Wikipedia which is in the public domain and the other two datasets permit reuse (with attribution)

Private Identifying Information (PII)

Contains PII?

no PII

Justification for no PII

None of the datasets talk about individuals

Maintenance

Any Maintenance Plan?

no

Broader Social Context

Previous Work on the Social Impact of the Dataset

Usage of Models based on the Data

no

Impact on Under-Served Communities

Addresses needs of underserved Communities?

no

Discussion of Biases

Any Documented Social Biases?

no

Are the Language Producers Representative of the Language?

No, the annotators are raters on crowdworking platforms and thus only represent their demographics.

Considerations for Using the Data

PII Risks and Liability

Licenses

Copyright Restrictions on the Dataset

open license - commercial use allowed

Copyright Restrictions on the Language Data

open license - commercial use allowed

Known Technical Limitations

Technical Limitations

The dataset may contain some social biases, as the input sentences are based on Wikipedia (WikiTableQuestions, WikiSQL, WebNLG). Studies have shown that the English Wikipedia contains gender biases(Dinan et al., 2020), racial biases([Papakyriakopoulos et al., 2020 (https://dl.acm.org/doi/pdf/10.1145/3351095.3372843)) and geographical bias(Livingstone et al., 2010). More info.

Unsuited Applications

The end-to-end transformer has the lowest performance since the transformer model needs intermediate pipeline planning steps to have higher performance. Similar findings can be found in Castro Ferreira et al., 2019.

Downloads last month
809
Edit dataset card
Evaluate models HF Leaderboard