UNPKG

vega-datasets

Version:

Common repository for example datasets used by Vega related projects.

1,516 lines (1,515 loc) 136 kB
{ "name": "vega-datasets", "description": "Common repository for example datasets used by Vega related projects. \nBSD-3-Clause license applies only to package code and infrastructure. Users should verify their use of datasets \ncomplies with the license terms of the original sources. Dataset license information, where included, \nis a reference starting point only and is provided without any warranty of accuracy or completeness.\n", "homepage": "git+http://github.com/vega/vega-datasets.git", "licenses": [ { "name": "BSD-3-Clause", "path": "https://opensource.org/license/bsd-3-clause", "title": "The 3-Clause BSD License" } ], "contributors": [ { "title": "Vega", "path": "https://vega.github.io" }, { "title": "vega-datasets contributors", "path": "https://github.com/vega/vega-datasets/graphs/contributors" } ], "version": "3.1.0", "created": "2025-03-31T17:24:58.659124+00:00", "resources": [ { "name": "7zip.png", "type": "file", "description": "Application icon from open-source software project. Used in [Image-based Scatter Plot example](https://vega.github.io/vega-lite/examples/scatter_image.html).", "licenses": [ { "title": "GNU Lesser General Public License", "path": "https://www.7-zip.org/license.txt" } ], "sources": [ { "title": "7-Zip", "path": "https://www.7-zip.org/" } ], "path": "7zip.png", "scheme": "file", "format": "png", "mediatype": "image/png", "encoding": "utf-8", "hash": "sha1:6586d6c00887cd48850099c174a42bb1677ade0c", "bytes": 3969 }, { "name": "airports.csv", "type": "table", "description": "Airports in the United States and its territories, including major commercial, regional, \nand municipal airports. Contains information about each airport's location (latitude/longitude \ncoordinates), identification codes, name, city, state, and country.", "licenses": [ { "name": "other-open", "title": "https://www.usa.gov/government-works" } ], "sources": [ { "title": "Airports (Data.gov)", "path": "https://catalog.data.gov/dataset/airports-5e97a" } ], "path": "airports.csv", "scheme": "file", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", "hash": "sha1:608ba6d51fa70584c3fa1d31eb94533302553838", "bytes": 210365, "schema": { "fields": [ { "name": "iata", "type": "string" }, { "name": "name", "type": "string" }, { "name": "city", "type": "string" }, { "name": "state", "type": "string" }, { "name": "country", "type": "string" }, { "name": "latitude", "type": "number" }, { "name": "longitude", "type": "number" } ] } }, { "name": "annual-precip.json", "type": "json", "description": "A raster grid of global annual precipitation for the year 2016 at a resolution 1 degree of lon/lat per cell.", "licenses": [ { "title": "Public Domain", "path": "https://www.weather.gov/disclaimer/" } ], "sources": [ { "title": "Climate Forecast System Version 2", "path": "https://www.cpc.ncep.noaa.gov/products/people/wwang/cfsv2_fcst_history/" } ], "path": "annual-precip.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:719e73406cfc08f16dda651513ae1113edd75845", "bytes": 266265 }, { "name": "anscombe.json", "type": "table", "description": "Eleven (x,y) pairs of numbers, with means x̄=9.0 and ȳ=7.5, and identical linear regression \nlines (same slope and intercept) and correlation coefficients (approximately 0.816). When plotted, reveals starkly \ndifferent patterns: one shows a linear relationship, another a non-linear curve, the third a near-perfect linear \nrelationship disrupted by a single outlier, and the fourth a near-vertical line of points where a single outlier \nentirely dictates the regression.\n\nIn his 1973 paper \"Graphs in Statistical Analysis\" Yale Professor [Francis Anscombe](https://archives.yale.edu/repositories/12/resources/3711) uses these four datasets \nto argue that visualization is essential to good statistical work, not merely an optional supplement. This was a radical position at a \ntime when most statistical analysis was done through batch processing on mainframes with no graphical output. Serves \nas a powerful demonstration that identical summary statistics can mask radically different patterns in data, making the case that \nstatistical analysis should combine both numerical calculations and graphical examination. \n", "sources": [ { "title": "Anscombe's quartet (Wikipedia)", "path": "https://en.wikipedia.org/wiki/Anscombe%27s_quartet#Data" }, { "title": "Anscombe, F. J. (1973). Graphs in Statistical Analysis. The American Statistician, 27(1):17-21.", "path": "https://www.jstor.org/stable/2682899" } ], "path": "anscombe.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:11ae97090b6263bdf0c8661156a44a5b782e0787", "bytes": 1703, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "Series", "type": "string" }, { "name": "X", "type": "integer" }, { "name": "Y", "type": "number" } ] } }, { "name": "barley.json", "type": "table", "description": "Yields of barley varieties from experiments conducted by the Minnesota Agricultural\nExperiment Station (MAES) across six sites in Minnesota. The USDA Technical Bulletin No. 735\n(December 1940) republished these yields data with explicit credit to MAES as the source.\n\nIt was analyzed by agronomists F.R. Immer, H.K. Hayes, and L. Powers in the 1934 paper \"Statistical Determination of Barley Varietal Adaption\".\n\nR.A. Fisher popularized its use in the field of statistics when he included it in his book \"The Design of Experiments\".\n\nSince then it has been used to demonstrate new visualization techniques, including the trellis charts developed by Richard Becker, William Cleveland and others in the 1990s.\n", "licenses": [ { "name": "Dataset collected by Minnesota Agricultural Experiment Station - license status unspecified" } ], "sources": [ { "title": "The Design of Experiments Reference", "path": "https://en.wikipedia.org/wiki/The_Design_of_Experiments" }, { "title": "Wiebe, G. A., Reinbach-Welch, L., Cowan, P. R. (1940). Yields of Barley Varieties in the United States and Canada, 1932-36. United States: U.S. Department of Agriculture.", "path": "https://books.google.com/books?id=OUfxLocnpKkC&pg=PA19" } ], "path": "barley.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:8dc50de2509b6e197ce95c24c98f90d9d1ab138c", "bytes": 8487, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "yield", "type": "number" }, { "name": "variety", "type": "string" }, { "name": "year", "type": "integer" }, { "name": "site", "type": "string" } ] } }, { "name": "birdstrikes.csv", "type": "table", "description": "Records of reported wildlife strikes received by the U.S. FAA", "licenses": [ { "title": "U.S. Government Dataset", "path": "https://resources.data.gov/open-licenses/" } ], "sources": [ { "title": "FAA Wildlife Strike Database", "path": "http://wildlife.faa.gov" } ], "path": "birdstrikes.csv", "scheme": "file", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", "hash": "sha1:1b8b190c9bc02ef7bcbfe5a8a70f61b1616d3f6c", "bytes": 1223329, "schema": { "fields": [ { "name": "Airport Name", "type": "string" }, { "name": "Aircraft Make Model", "type": "string" }, { "name": "Effect Amount of damage", "type": "string" }, { "name": "Flight Date", "type": "date" }, { "name": "Aircraft Airline Operator", "type": "string" }, { "name": "Origin State", "type": "string" }, { "name": "Phase of flight", "type": "string" }, { "name": "Wildlife Size", "type": "string" }, { "name": "Wildlife Species", "type": "string" }, { "name": "Time of day", "type": "string" }, { "name": "Cost Other", "type": "integer" }, { "name": "Cost Repair", "type": "integer" }, { "name": "Cost Total $", "type": "integer" }, { "name": "Speed IAS in knots", "type": "integer" } ] } }, { "name": "budget.json", "type": "table", "description": "Historical and forecasted federal revenue/receipts produced in 2016 by the U.S. Office of Management and Budget.", "licenses": [ { "title": "U.S. Government Dataset", "path": "https://resources.data.gov/open-licenses/" } ], "sources": [ { "title": "Office of Management and Budget - Budget FY 2016 - Receipts", "path": "https://www.govinfo.gov/app/details/BUDGET-2016-DB/BUDGET-2016-DB-3" } ], "path": "budget.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:5b18c08b28fb782f54ca98ce6a1dd220f269adf1", "bytes": 391353, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "Source Category Code", "type": "integer" }, { "name": "Source category name", "type": "string" }, { "name": "Source subcategory", "type": "integer" }, { "name": "Source subcategory name", "type": "string" }, { "name": "Agency code", "type": "integer" }, { "name": "Agency name", "type": "string" }, { "name": "Bureau code", "type": "integer" }, { "name": "Bureau name", "type": "string" }, { "name": "Account code", "type": "integer" }, { "name": "Account name", "type": "string" }, { "name": "Treasury Agency code", "type": "integer" }, { "name": "On- or off-budget", "type": "string" }, { "name": "1962", "type": "string" }, { "name": "1963", "type": "string" }, { "name": "1964", "type": "string" }, { "name": "1965", "type": "string" }, { "name": "1966", "type": "string" }, { "name": "1967", "type": "string" }, { "name": "1968", "type": "string" }, { "name": "1969", "type": "string" }, { "name": "1970", "type": "string" }, { "name": "1971", "type": "string" }, { "name": "1972", "type": "string" }, { "name": "1973", "type": "string" }, { "name": "1974", "type": "string" }, { "name": "1975", "type": "string" }, { "name": "1976", "type": "string" }, { "name": "TQ", "type": "string" }, { "name": "1977", "type": "string" }, { "name": "1978", "type": "string" }, { "name": "1979", "type": "string" }, { "name": "1980", "type": "string" }, { "name": "1981", "type": "string" }, { "name": "1982", "type": "string" }, { "name": "1983", "type": "string" }, { "name": "1984", "type": "string" }, { "name": "1985", "type": "string" }, { "name": "1986", "type": "string" }, { "name": "1987", "type": "string" }, { "name": "1988", "type": "string" }, { "name": "1989", "type": "string" }, { "name": "1990", "type": "string" }, { "name": "1991", "type": "string" }, { "name": "1992", "type": "string" }, { "name": "1993", "type": "string" }, { "name": "1994", "type": "string" }, { "name": "1995", "type": "string" }, { "name": "1996", "type": "string" }, { "name": "1997", "type": "string" }, { "name": "1998", "type": "string" }, { "name": "1999", "type": "string" }, { "name": "2000", "type": "string" }, { "name": "2001", "type": "string" }, { "name": "2002", "type": "string" }, { "name": "2003", "type": "string" }, { "name": "2004", "type": "string" }, { "name": "2005", "type": "string" }, { "name": "2006", "type": "string" }, { "name": "2007", "type": "string" }, { "name": "2008", "type": "string" }, { "name": "2009", "type": "string" }, { "name": "2010", "type": "string" }, { "name": "2011", "type": "string" }, { "name": "2012", "type": "string" }, { "name": "2013", "type": "string" }, { "name": "2014", "type": "string" }, { "name": "2015", "type": "string" }, { "name": "2016", "type": "string" }, { "name": "2017", "type": "string" }, { "name": "2018", "type": "string" }, { "name": "2019", "type": "string" }, { "name": "2020", "type": "string" } ] } }, { "name": "budgets.json", "type": "table", "description": "U.S. federal budget projections and actual outcomes from 1980 through 2010. Originally [analyzed](https://archive.nytimes.com/www.nytimes.com/interactive/2010/02/02/us/politics/20100201-budget-porcupine-graphic.html) by The New York Times in 2010. \nReveals how budget forecasts made in any given year compared to what actually happened, \nwith positive values indicating surpluses (briefly seen around 2000) and negative values \nrepresenting deficits (reaching a particularly large value of -$1.78 trillion during the 2008-2009 financial crisis).", "licenses": [ { "title": "U.S. Government Dataset", "path": "https://resources.data.gov/open-licenses/" } ], "sources": [ { "title": "Office of Management and Budget", "path": "https://www.whitehouse.gov/omb/" } ], "path": "budgets.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:8a909e24f698a3b0f6c637c30ec95e7e17df7ef6", "bytes": 18079, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "budgetYear", "type": "integer", "description": "The year for which the budget outcome is being reported" }, { "name": "forecastYear", "type": "integer", "description": "The year for which the budget was forecast" }, { "name": "value", "type": "number", "description": "The budget outcome or projection value (in trillions of 2010 dollars)" } ] } }, { "name": "burtin.json", "type": "table", "description": "Compares the performance of three antibiotics against 16 different bacteria. Based on graphic designer \nWill Burtin's 1951 visualization of antibiotic effectiveness, originally published in Scope Magazine and\nfeatured as an example in the Protovis project, a precursor to D3.js.\n\nNumerical values represent the minimum inhibitory concentration (MIC) of each antibiotic, \nmeasured in units per milliliter, with lower values indicating higher antibiotic\neffectiveness.\n\nAs noted in the Protovis example, \"Recreating this display revealed some minor errors in the original: a missing grid line at 0.01 μg/ml, and an exaggeration of some values for penicillin\".\n\nThe vega-datsets version is largely consistent with the Protovis version, with one correction (changing 'Brucella antracis' to the correct 'Bacillus anthracis') and the addition of a new column, 'Genus', to group related bacterial species together.\n\nThe caption of the original 1951 [visualization](https://graphicdesignarchives.org/wp-content/uploads/wmgda_8616c.jpg) \nreads as follows:\n\n> #### Antibacterial ranges of Neomycin, Penicillin and Streptomycin\n>\n>\n> The chart compares the in vitro sensitivities to neomycin of some of the common pathogens (gram+ in red and gram- in blue) with their sensitivities to penicillin, and streptomycin.\n>\n> The effectiveness of the antibiotics is expressed as the highest dilution in μ/ml. which inhibits the test organism.\n>\n> High dilutions are toward the periphery; consequently the length of the colored bar is proportional to the effectiveness.\n>\n> It is apparent that neomycin is especially effective against Staph. albus and aureus, Streph. fecalis, A. aerogenes, S. typhosa, E. coli, Ps. aeruginosa, Br. abortus, K. pneumoniae, Pr. vulgaris, S. schottmuelleri and M. tuberculosis.\n>\n> Unfortunately, some strains of proteus, pseudomonas and hemolytic streptococcus are resistant to neomycin, although the majority of these are sensitive to neomycin.\n>\n> It also inhibits actinomycetes, but is inactive against viruses and fungi. Its mode of action is not understood.\n", "licenses": [ { "title": "BSD License (via Protovis)", "path": "https://mbostock.github.io/protovis/" } ], "sources": [ { "title": "Scope Magazine", "path": "https://graphicdesignarchives.org/projects/scope-magazine-vol-iii-5/" }, { "title": "Protovis Antibiotics Example", "path": "https://mbostock.github.io/protovis/ex/antibiotics-burtin.html" } ], "path": "burtin.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:d8a82abaad7dba4f9cd8cee402ba3bf07e70d0e4", "bytes": 2743, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "Bacteria", "type": "string" }, { "name": "Penicillin", "type": "number" }, { "name": "Streptomycin", "type": "number" }, { "name": "Neomycin", "type": "number" }, { "name": "Gram_Staining", "type": "string" }, { "name": "Genus", "type": "string" } ] } }, { "name": "cars.json", "type": "table", "description": "Collection of car specifications and performance metrics from various automobile manufacturers.", "licenses": [ { "title": "The original was distributed in 1982 for educational and scientific purposes.", "path": "http://lib.stat.cmu.edu/datasets/cars.desc" } ], "sources": [ { "title": "StatLib Datasets Archive", "path": "http://lib.stat.cmu.edu/datasets/" } ], "path": "cars.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:1d56d3fa6da01af9ece2d6397892fe5bb6f47c3d", "bytes": 100492, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "Name", "type": "string" }, { "name": "Miles_per_Gallon", "type": "integer" }, { "name": "Cylinders", "type": "integer" }, { "name": "Displacement", "type": "number" }, { "name": "Horsepower", "type": "integer" }, { "name": "Weight_in_lbs", "type": "integer" }, { "name": "Acceleration", "type": "number" }, { "name": "Year", "type": "date" }, { "name": "Origin", "type": "string" } ] } }, { "name": "co2-concentration.csv", "type": "table", "description": "Atmospheric CO2 concentration measurements from Mauna Loa Observatory, Hawaii. \nContains monthly readings from 1958-2020 with two key measurements:\n1. CO2 concentrations in millionths of a [mole](https://en.wikipedia.org/wiki/Mole_(unit)) of CO2 \nper mole of air (parts per million), reported on the 2012 \nSIO manometric mole fraction scale\n2. Seasonally adjusted values where a [4-harmonic fit](https://en.wikipedia.org/wiki/Harmonic_analysis) with linear gain factor \nhas been subtracted to remove the quasi-regular seasonal cycle\nValues are adjusted to 24:00 hours on the 15th of each month. \nOnly includes rows with valid data.\n", "licenses": [ { "title": "Creative Commons Attribution 4.0", "path": "https://creativecommons.org/licenses/by/4.0/" } ], "sources": [ { "title": "Scripps CO2 Program", "path": "https://scrippsco2.ucsd.edu/data/atmospheric_co2/primary_mlo_co2_record" }, { "title": "In-situ CO2 Data", "path": "https://scrippsco2.ucsd.edu/assets/data/atmospheric/stations/in_situ_co2/monthly/monthly_in_situ_co2_mlo.csv" } ], "path": "co2-concentration.csv", "scheme": "file", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", "hash": "sha1:b8715cbd2a8d0c139020a73fdb4d231f8bde193a", "bytes": 18547, "schema": { "fields": [ { "name": "Date", "type": "date" }, { "name": "CO2", "type": "number" }, { "name": "adjusted CO2", "type": "number" } ] } }, { "name": "countries.json", "type": "table", "description": "Key demographic indicators (life expectancy at birth and fertility rate measured \nas babies per woman) for various countries from 1955 to 2000 at 5-year intervals. Includes both \ncurrent values and adjacent time period values (previous and next) for each indicator. Gapminder's \n[data documentation](https://www.gapminder.org/data/documentation/) notes that its philosophy is to fill data gaps with \nestimates and use current geographic boundaries for historical data. Gapminder states that it \naims to \"show people the big picture\" rather than support detailed numeric analysis.", "licenses": [ { "title": "Creative Commons Attribution 4.0 International", "path": "https://www.gapminder.org/free-material/" } ], "sources": [ { "title": "Gapminder Foundation - Life Expectancy", "path": "https://docs.google.com/spreadsheets/d/1RehxZjXd7_rG8v2pJYV6aY0J3LAsgUPDQnbY4dRdiSs/edit?gid=176703676#gid=176703676", "version": "14" }, { "title": "Gapminder Foundation - Fertility", "path": "https://docs.google.com/spreadsheets/d/1aLtIpAWvDGGa9k2XXEz6hZugWn0wCd5nmzaRPPjbYNA/edit?gid=176703676#gid=176703676", "version": "14" } ], "path": "countries.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:0070959b7f1a09475baa5099098240ae81026e72", "bytes": 99457, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "_comment", "type": "string" }, { "name": "year", "type": "integer", "description": "Years from 1955 to 2000 at 5-year intervals" }, { "name": "fertility", "type": "number", "description": "Fertility rate (average number of children per woman) for the given year" }, { "name": "life_expect", "type": "number", "description": "Life expectancy in years for the given year" }, { "name": "n_fertility", "type": "number", "description": "Fertility rate for the next 5-year interval" }, { "name": "n_life_expect", "type": "number", "description": "Life expectancy for the next 5-year interval" }, { "name": "country", "type": "string", "description": "Name of the country" } ] } }, { "name": "crimea.json", "type": "table", "description": "Monthly mortality rates from British military hospitals during the Crimean War (1854-1856), which informed \nFlorence Nightingale's groundbreaking work in public health. Nightingale credits Dr. William Farr for \ncompiling the data from the 1858 [Medical and Surgical History of the British Army](http://resource.nlm.nih.gov/62510370R). Categorizes \ndeaths into \"zymotic\" diseases (preventable infectious diseases), wounds/injuries, and other causes. \nCovering the period from April 1854 to March 1856, it includes monthly army strength \nalongside mortality figures. Transformed by Nightingale into her now-famous [polar area \ndiagrams](https://iiif.lib.harvard.edu/manifests/view/drs:7420433$25i). \n\nThe annual mortality rates plotted in the chart can be calculated using the formula \n> (Deaths × 1000 × 12) ÷ Army Size. \n\nAs [The Lancet](https://pmc.ncbi.nlm.nih.gov/articles/PMC7252134/) argued in 2020, Nightingale's \ninnovative visualizations proved that \"far more men died of disease, infection, and exposure \nthan in battle—a fact that shocked the British nation.\" Her work also vividly illustrated \nthe dramatic impact of sanitary reforms, particularly in reducing preventable deaths.", "licenses": [ { "title": "Harvard Library - Digitized Content Copyright & Viewer Terms of Use", "path": "https://library.harvard.edu/privacy-terms-use-copyright-information#digitizedcontent" } ], "sources": [ { "title": "Nightingale, Florence. A contribution to the sanitary history of the British army during the late war with Russia. London : John W. Parker and Son, 1859. Table II. Table showing the Estimated Average Monthly Strength of the Army; and the Deaths and Annual Rate of Mortality per 1,000 in each month, from April 1854, to March 1856 (inclusive), in the Hospitals of the Army in the East.\n", "path": "https://nrs.lib.harvard.edu/urn-3:hms.count:1177146?n=21" } ], "path": "crimea.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:d2df500c612051a21fe324237a465a62d5fe01b6", "bytes": 2183, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "date", "type": "date", "description": "First day of each month during the observation period, in ISO 8601 format (YYYY-MM-DD)" }, { "name": "wounds", "type": "integer", "description": "Deaths from \"Wounds and Injuries\" which comprised: Luxatio (dislocation), Sub-Luxatio (partial dislocation), Vulnus Sclopitorum (gunshot wounds), Vulnus Incisum (incised wounds), Contusio (bruising), Fractura (fractures), Ambustio (burns) and Concussio-Cerebri (brain concussion)\n" }, { "name": "other", "type": "integer", "description": "Deaths from All Other Causes" }, { "name": "disease", "type": "integer", "description": "Deaths from Zymotic Diseases (preventable infectious diseases)" }, { "name": "army_size", "type": "integer", "description": "Estimated Average Monthly Strength of the Army" } ] } }, { "name": "disasters.csv", "type": "table", "description": "Annual number of deaths from disasters, sourced from EM-DAT (Emergency Events Database) \nmaintained by the Centre for Research on the Epidemiology of Disasters (CRED) at UCLouvain, Belgium. \nProcessed by Our World in Data to standardize country names and world region definitions, converting units,\ncalculating derived indicators, and adapting metadata. Deaths are reported as absolute numbers.", "licenses": [ { "title": "EM-DAT terms of use", "path": "https://doc.emdat.be/docs/legal/terms-of-use/" }, { "title": "Creative Commons BY license (Our World in Data)", "path": "https://creativecommons.org/licenses/by/4.0/" } ], "sources": [ { "title": "EM-DAT: The Emergency Events Database", "path": "https://www.emdat.be" }, { "title": "Hannah Ritchie, Pablo Rosado and Max Roser (2022) - Natural Disasters", "path": "https://ourworldindata.org/natural-catastrophes" } ], "path": "disasters.csv", "scheme": "file", "format": "csv", "mediatype": "text/csv", "encoding": "utf-8", "hash": "sha1:0584ed86190870b0089d9ea67c94f3dd3feb0ec8", "bytes": 18840, "schema": { "fields": [ { "name": "Entity", "type": "string" }, { "name": "Year", "type": "integer" }, { "name": "Deaths", "type": "integer" } ] } }, { "name": "driving.json", "type": "table", "description": "Tracks the relationship between driving habits and gasoline prices \nin the United States during a period spanning multiple significant events, including \nthe cheap gas era, Arab oil embargo, energy crisis, record low prices, and the \n\"swing backward\" from 1956 to 2010.\n", "sources": [ { "title": "New York Times (citing U.S. Energy Information Administration, Federal Highway Administration, and Brookings Institution)", "path": "https://archive.nytimes.com/www.nytimes.com/imagepages/2010/05/02/business/02metrics.html" } ], "path": "driving.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:33d0afc57fb1005e69cd3e8a6c77a26670d91979", "bytes": 3461, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "side", "type": "string", "description": "Label positioning indicator used in the original visualization to optimize \nreadability and prevent overlap", "categories": [ "left", "right", "top", "bottom" ] }, { "name": "year", "type": "integer", "description": "Year of observation from 1956 to 2010" }, { "name": "miles", "type": "integer", "description": "Miles driven per capita per year, ranging from approximately 4,000 to 10,000 miles" }, { "name": "gas", "type": "number", "description": "Price of a gallon of regular grade gasoline, adjusted for inflation" } ] } }, { "name": "earthquakes.json", "type": "json", "description": "Represents approximately one week of continuous monitoring from USGS's \"all earthquakes\" \nreal-time feed, which includes 1,703 seismic events of all magnitudes recorded by the \nUSGS Earthquake Hazards Program from January 31 to February 7, 2018 (UTC). ", "licenses": [ { "title": "U.S. Public Domain", "path": "https://www.usgs.gov/information-policies-and-instructions/copyrights-and-credits" } ], "sources": [ { "title": "USGS Earthquake Feed", "path": "https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/all_week.geojson" } ], "path": "earthquakes.json", "scheme": "file", "format": "geojson", "mediatype": "text/geojson", "encoding": "utf-8", "hash": "sha1:ed4c47436c09d5cc5f428c233fbd8074c0346fd0", "bytes": 1219853 }, { "name": "ffox.png", "type": "file", "description": "Application icon from open-source software project. Used in [Image-based Scatter Plot example](https://vega.github.io/vega-lite/examples/scatter_image.html).", "licenses": [ { "title": "Mozilla Trademark License", "path": "https://www.mozilla.org/en-US/foundation/trademarks/policy/" } ], "sources": [ { "title": "Mozilla Firefox", "path": "https://www.mozilla.org/firefox/" } ], "path": "ffox.png", "scheme": "file", "format": "png", "mediatype": "image/png", "encoding": "utf-8", "hash": "sha1:0691709484a75e9d8ee55a22b1980d67d239c2c4", "bytes": 17628 }, { "name": "flare-dependencies.json", "type": "table", "description": "Indicates, with `flare.json`, relationships among classes in a software hierarchy.", "path": "flare-dependencies.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:10bbe538daaa34014cd5173b331f7d3c10bfda49", "bytes": 34600, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "source", "type": "integer" }, { "name": "target", "type": "integer" } ] } }, { "name": "flare.json", "type": "table", "description": "Indicates, with `flare-dependencies.json`, relationships among classes in a software hierarchy.", "path": "flare.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:d232ea60f875de87a7d8fc414876e19356a98b6b", "bytes": 20638, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "id", "type": "integer" }, { "name": "name", "type": "string" } ] } }, { "name": "flights-10k.json", "type": "table", "description": "Flight delay statistics (10,000 rows) from U.S. Bureau of Transportation Statistics. \nCollected under regulatory reporting requirements (14 CFR Part 234), which mandate \nthat qualifying airlines report on-time performance data to BTS. Transformed using \n`/scripts/flights.py`", "licenses": [ { "name": "other-open", "path": "https://www.ecfr.gov/current/title-14/chapter-II/subchapter-A/part-234", "title": "Data Collected Under U.S. DOT Regulatory Requirements - License Terms Not Explicitly Specified" } ], "sources": [ { "title": "U.S. Bureau of Transportation Statistics", "path": "https://www.transtats.bts.gov/DL_SelectFields.asp?gnoyr_VQ=FGJ&QO_fu146_anzr=b0-gvzr" } ], "path": "flights-10k.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:769a34f3d0442be8f356651463fe925ad8b3759d", "bytes": 892400, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "date", "type": "string" }, { "name": "delay", "type": "integer" }, { "name": "distance", "type": "integer" }, { "name": "origin", "type": "string" }, { "name": "destination", "type": "string" } ] } }, { "name": "flights-200k.arrow", "type": "table", "description": "Flight delay statistics (200,000 rows) from U.S. Bureau of Transportation Statistics. \nCollected under regulatory reporting requirements (14 CFR Part 234), which mandate \nthat qualifying airlines report on-time performance data to BTS. Transformed using \n`/scripts/flights.py`", "licenses": [ { "name": "other-open", "path": "https://www.ecfr.gov/current/title-14/chapter-II/subchapter-A/part-234", "title": "Data Collected Under U.S. DOT Regulatory Requirements - License Terms Not Explicitly Specified" } ], "sources": [ { "title": "U.S. Bureau of Transportation Statistics", "path": "https://www.transtats.bts.gov/DL_SelectFields.asp?gnoyr_VQ=FGJ&QO_fu146_anzr=b0-gvzr" } ], "path": "flights-200k.arrow", "scheme": "file", "format": ".arrow", "mediatype": "application/vnd.apache.arrow.file", "hash": "sha1:74f6b3cf8b779e3ff204be2f5a9762763d50a095", "bytes": 1600864, "schema": { "fields": [ { "name": "delay", "type": "integer" }, { "name": "distance", "type": "integer" }, { "name": "time", "type": "number" } ] } }, { "name": "flights-200k.json", "type": "table", "description": "Flight delay statistics (200,000 rows) from U.S. Bureau of Transportation Statistics. \nCollected under regulatory reporting requirements (14 CFR Part 234), which mandate \nthat qualifying airlines report on-time performance data to BTS. Transformed using \n`/scripts/flights.py`", "licenses": [ { "name": "other-open", "path": "https://www.ecfr.gov/current/title-14/chapter-II/subchapter-A/part-234", "title": "Data Collected Under U.S. DOT Regulatory Requirements - License Terms Not Explicitly Specified" } ], "sources": [ { "title": "U.S. Bureau of Transportation Statistics", "path": "https://www.transtats.bts.gov/DL_SelectFields.asp?gnoyr_VQ=FGJ&QO_fu146_anzr=b0-gvzr" } ], "path": "flights-200k.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:4722e02637cf5f38ad9ea5d1f48cae7872dce22d", "bytes": 9863892, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "delay", "type": "integer" }, { "name": "distance", "type": "integer" }, { "name": "time", "type": "number" } ] } }, { "name": "flights-20k.json", "type": "table", "description": "Flight delay statistics (20,000 rows) from U.S. Bureau of Transportation Statistics. \nCollected under regulatory reporting requirements (14 CFR Part 234), which mandate \nthat qualifying airlines report on-time performance data to BTS. Transformed using \n`/scripts/flights.py`", "licenses": [ { "name": "other-open", "path": "https://www.ecfr.gov/current/title-14/chapter-II/subchapter-A/part-234", "title": "Data Collected Under DOT Regulatory Requirements - License Terms Not Explicitly Specified" } ], "sources": [ { "title": "U.S. Bureau of Transportation Statistics", "path": "https://www.transtats.bts.gov/DL_SelectFields.asp?gnoyr_VQ=FGJ&QO_fu146_anzr=b0-gvzr" } ], "path": "flights-20k.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:20c920b46db4f664bed3e1420b8348527cd7c41e", "bytes": 1784867, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "date", "type": "string" }, { "name": "delay", "type": "integer" }, { "name": "distance", "type": "integer" }, { "name": "origin", "type": "string" }, { "name": "destination", "type": "string" } ] } }, { "name": "flights-2k.json", "type": "table", "description": "Flight delay statistics (2,000 rows) from U.S. Bureau of Transportation Statistics. \nCollected under regulatory reporting requirements (14 CFR Part 234), which mandate \nthat qualifying airlines report on-time performance data to BTS. Transformed using \n`/scripts/flights.py`", "licenses": [ { "name": "other-open", "path": "https://www.ecfr.gov/current/title-14/chapter-II/subchapter-A/part-234", "title": "Data Collected Under U.S. DOT Regulatory Requirements - License Terms Not Explicitly Specified" } ], "sources": [ { "title": "U.S. Bureau of Transportation Statistics", "path": "https://www.transtats.bts.gov/DL_SelectFields.asp?gnoyr_VQ=FGJ&QO_fu146_anzr=b0-gvzr" } ], "path": "flights-2k.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:d9221dc7cd477209bf87e680be3c881d8fee53cd", "bytes": 178495, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [ { "name": "date", "type": "string" }, { "name": "delay", "type": "integer" }, { "name": "distance", "type": "integer" }, { "name": "origin", "type": "string" }, { "name": "destination", "type": "string" } ] } }, { "name": "flights-3m.parquet", "type": "table", "description": "Flight delay statistics (3 million rows) from U.S. Bureau of Transportation Statistics. \nCollected under regulatory reporting requirements (14 CFR Part 234), which mandate \nthat qualifying airlines report on-time performance data to BTS. Transformed using \n`/scripts/flights.py`", "licenses": [ { "name": "other-open", "path": "https://www.ecfr.gov/current/title-14/chapter-II/subchapter-A/part-234", "title": "Data Collected Under U.S. DOT Regulatory Requirements - License Terms Not Explicitly Specified" } ], "sources": [ { "title": "U.S. Bureau of Transportation Statistics", "path": "https://www.transtats.bts.gov/DL_SelectFields.asp?gnoyr_VQ=FGJ&QO_fu146_anzr=b0-gvzr" } ], "path": "flights-3m.parquet", "scheme": "file", "format": "parquet", "mediatype": "application/parquet", "hash": "sha1:9c4e0b480a1a60954a7e5c6bcc43e1c91a73caaa", "bytes": 13493022, "schema": { "fields": [ { "name": "date", "type": "datetime" }, { "name": "delay", "type": "integer" }, { "name": "distance", "type": "integer" }, { "name": "origin", "type": "string" }, { "name": "destination", "type": "string" } ] } }, { "name": "flights-5k.json", "type": "table", "description": "Flight delay statistics (5,000 rows) from U.S. Bureau of Transportation Statistics. \nCollected under regulatory reporting requirements (14 CFR Part 234), which mandate \nthat qualifying airlines report on-time performance data to BTS. Transformed using \n`/scripts/flights.py`", "licenses": [ { "name": "other-open", "path": "https://www.ecfr.gov/current/title-14/chapter-II/subchapter-A/part-234", "title": "Data Collected Under U.S. DOT Regulatory Requirements - License Terms Not Explicitly Specified" } ], "sources": [ { "title": "U.S. Bureau of Transportation Statistics", "path": "https://www.transtats.bts.gov/DL_SelectFields.asp?gnoyr_VQ=FGJ&QO_fu146_anzr=b0-gvzr" } ], "path": "flights-5k.json", "scheme": "file", "format": "json", "mediatype": "text/json", "encoding": "utf-8", "hash": "sha1:8459fa09e3ba8197928b5dba0b9f5cc380629758", "bytes": 446167, "dialect": { "json": { "keyed": true } }, "schema": { "fields": [