@misc{aa1b52f1d60e413f81b1e3d559d17416,
title = "An Update on the Geothermal Data Repository's Data Standards and Pipelines: Geospatial Data and Distributed Acoustic Sensing Data",
abstract = "The Department of Energy's (DOE) Geothermal Data Repository (GDR) team has implemented data standards and automated data pipelines for the following data types: 1) drilling data, 2) geospatial datasets, and 3) DAS data. An additional data pipeline is proposed for stimulation data. These data standards and pipelines are intended to improve the real-world applicability of geothermal machine learning outputs through improving the quality of data. More specifically, through standardizing high-value datasets, the GDR is reducing project-specific data curation requirements, allowing more time to be spent on actual research. By automating this process, the burden of standardization is taken off of the user, overall increasing the availability of standardized data. This paper provides an update on the GDR's transition toward data standardization through automated data pipelines and calls for feedback from the community on how we can improve this process.",
keywords = "cloud-optimized, DAS data, data curation, data lake, data pipeline, data science, data standard, data-centric, GDR, geospatial data, geothermal data, Geothermal Data Repository, machine learning",
author = "Nicole Taverna and Jon Weers and Sean Porse and Arlene Anderson and Zachary Frone and Emily Holt and Scott Mello",
year = "2023",
language = "American English",
series = "Presented at the 2023 Geothermal Rising Conference, 1-5 October 2023, Reno, Nevada",
type = "Other",
}