@misc{4320a064a5864344b5089cdc8c8665e4,
title = "Data-Centric AI and the Open Energy Data Initiative (OEDI)",
abstract = "This presentation emphasizes the critical importance of data-centric AI. The limitations of model-centric AI when dealing with poor or insufficient data are highlighted, and it is illustrated how training models on inaccurate or noisy data leads to suboptimal results. This talk advocates for a hybrid approach that combines a focus on data quality and model parameters to achieve optimal results. The Open Energy Data Initiative (OEDI) is introduced as a valuable resource for obtaining high-quality energy-related datasets, hosting nearly 2,000 publicly accessible datasets, including 99 solar-related datasets, totaling over 2.7 petabytes of data. OEDI's data lakes enable users to query and work with data without extensive transfers. In conclusion, the significance of data-centric AI and adherence to data curation best practices is emphasized, positioning OEDI as a prime source of high-quality data for AI and machine learning in the renewable energy sector.",
keywords = "AI, artificial intelligence, best practices, data, data curation, data-centric, machine learning, OEDI, open energy data initiative, solar",
author = "Nicole Taverna",
year = "2023",
language = "American English",
series = "Presented at the Solar Applications of Artificial Intelligence and Machine Learning Workshop, 31 October - 1 November 2023, Alexandria, Virginia",
type = "Other",
}