- Toward data lakes as central building blocks for data management and analysis
(Philipp Wieder, Hendrik Nolte),
2022-01-01
DOI
BIBTEX
@article{2_114449
abstract = {"Data lakes are a fundamental building block for many industrial data analysis solutions and becoming increasingly popular in research. Often associated with big data use cases, data lakes are, for example, used as central data management systems of research institutions or as the core entity of machine learning pipelines. The basic underlying idea of retaining data in its native format within a data lake facilitates a large range of use cases and improves data reusability, especially when compared to the schema-on-write approach applied in data warehouses, where data is transformed prior to the actual storage to fit a predefined schema. Storing such massive amounts of raw data, however, has its very own challenges, spanning from the general data modeling, and indexing for concise querying to the integration of suitable and scalable compute capabilities. In this contribution, influential papers of the last decade have been selected to provide a comprehensive overview of developments and obtained results. The papers are analyzed with regard to the applicability of their input to data lakes that serve as central data management systems of research institutions. To achieve this, contributions to data lake architectures, metadata models, data provenance, workflow support, and FAIR principles are investigated. Last, but not least, these capabilities are mapped onto the requirements of two common research personae to identify open challenges. With that, potential research topics are determined, which have to be tackled toward the applicability of data lakes as central building blocks for research data management."}
author = {Philipp Wieder and Hendrik Nolte}
doi = {10.3389/fdata.2022.945720}
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/114449}
title = {Toward data lakes as central building blocks for data management and analysis}
year = {2022}
month = {01}
}
- Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes
(Hendrik Nolte, Philipp Wieder),
2022-01-01
DOI
BIBTEX
@article{2_121151
author = {Hendrik Nolte and Philipp Wieder}
doi = {10.1162/dint_a_00141}
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121151}
title = {Realising Data-Centric Scientific Workflows with Provenance-Capturing on Data Lakes}
year = {2022}
month = {01}
}
- Canonical Workflow for Experimental Research
(Dirk Betz, Claudia Biniossek, Christophe Blanchi, Felix Henninger, Thomas Lauer, Philipp Wieder, Peter Wittenburg, Martin ZĂĽnkeler),
2022-01-01
DOI
BIBTEX
@article{2_121152
author = {Dirk Betz and Claudia Biniossek and Christophe Blanchi and Felix Henninger and Thomas Lauer and Philipp Wieder and Peter Wittenburg and Martin ZĂĽnkeler}
doi = {10.1162/dint_a_00123}
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121152}
title = {Canonical Workflow for Experimental Research}
year = {2022}
month = {01}
}
- Certification Schemes for Research Infrastructures
(Felix Helfer, Stefan Buddenbohm, Thomas Eckart, Philipp Wieder),
2021-01-01
BIBTEX
@misc{2_108259
abstract = {"This working paper discusses the use and importance of various certification systems for the field of modern research infrastructures. For infrastructures such as CLARIAH-DE, reliable storage, management and dissemination of research data is an essential task. The certification of various areas, such as the technical architecture used, the work processes used or the qualification level of the staff, is an established procedure to ensure compliance with a variety of standards and quality criteria and to demonstrate the quality and reliability of an infrastructure to researchers, funders and comparable consortia. The working paper conducts this discussion based on an overview of selected certification systems that are of particular importance for CLARIAH-DE, but also for other research infrastructures. In addition to formalised certifications, the paper also addresses the areas of software-specific and self-assessment-based procedures and the different roles of the actors involved."}
address = {Göttingen}
author = {Felix Helfer and Stefan Buddenbohm and Thomas Eckart and Philipp Wieder}
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/108259}
title = {Certification Schemes for Research Infrastructures}
year = {2021}
month = {01}
}
- Sekundäre Nutzung von hausärztlichen Routinedaten ist machbar – Bericht vom RADAR Projekt
(Johannes Hauswaldt, Thomas Bahls, Arne Blumentritt, Iris Demmer, Johannes Drepper, Roland Groh, Stephanie Heinemann, Wolfgang Hoffmann, Valérie Kempter, Johannes Pung, Otto Rienhoff, Falk Schlegelmilch, Philipp Wieder, Ramin Yahyapour, Eva Hummers),
2021-01-01
DOI
BIBTEX
@article{2_97749
abstract = {"Zusammenfassung Ziel der Studie „Real world“-Daten aus der ambulanten Gesundheitsversorgung sind in Deutschland nur schwer systematisch und longitudinal zu erlangen. Unsere Vision ist eine permanente Datenablage mit repräsentativen, de-identifizierten Patienten- und Versorgungsdaten, längsschnittlich, fortwährend aktualisiert und von verschiedenen Versorgern, mit der Möglichkeit zur Verknüpfung mit weiteren Daten, etwa aus Patientenbefragungen oder biologischer Forschung, zugänglich für andere Forscher. Wir berichten methodische Vorgehensweisen und Ergebnisse aus dem RADAR Projekt.Methodik Untersuchung des Rechtsrahmens, Entwicklung prototypischer technischer Abläufe und Lösungen, mit Machbarkeitsstudie zur Evaluation von technischer und inhaltlicher Funktionalität sowie Eignung für Fragestellungen der Versorgungsforschung.Ergebnisse Ab 2016 entwickelte ein interdisziplinäres Wissenschaftlerteam ein Datenschutzkonzept für Exporte von Versorgungsdaten aus elektronischen Praxisverwaltungssystemen. Eine technische und organisatorische Forschungsinfrastruktur im ambulanten Sektor wurden entwickelt und im Anwendungsfall „Orale Antikoagulation“ (OAK) umgesetzt. In 7 niedersächsischen Hausarztpraxen wurden 100 Patienten gewonnen und nach informierter Einwilligung ihre ausgewählten Behandlungsdaten, reduziert auf 40 relevante Datenfelder, über die Behandlungsdatentransfer-Schnittstelle extrahiert, unmittelbar vor Ort in identifizierende bzw. medizinische Daten getrennt und verschlüsselt zur Treuhandstelle (THS) bzw. an den Datenhalter übertragen. 75 Patienten, die die Einschlusskriterien erfüllten (mind. 1 Jahr Behandlung mit OAK), erhielten einen Lebensqualitäts-Fragebogen über die THS per Post. Von 66 Rücksendungen wurden 63 Fragebogenergebnisse mit den Behandlungsdaten in der Datenablage verknüpft.Schlussfolgerung Die rechtskonforme Machbarkeit der Gewinnung von pseudonymisierten hausärztlichen Routinedaten mit expliziter informierter Patienteneinwilligung und deren wissenschaftliche Nutzung einschließlich Re-Kontaktierung und Einbindung von Fragebogendaten konnte nachgewiesen werden. Die Schutzkonzepte Privacy by design und Datenminimierung (Artikel 25 mit Erwägungsgrund 78 DSGVO) wurden systematisch in das RADAR Projekt integriert und begründen wesentlich, dass der Machbarkeitsnachweis rechtskonformer Primärdatengewinnung und sekundärer Nutzung für Forschungszwecke gelang. Eine Nutzung hinreichend anonymisierter, aber noch sinnvoller hausärztlicher Gesundheitsdaten ohne individuelle Einwilligung ist im bestehenden Rechtsrahmen in Deutschland schwerlich umsetzbar."}
author = {Johannes Hauswaldt and Thomas Bahls and Arne Blumentritt and Iris Demmer and Johannes Drepper and Roland Groh and Stephanie Heinemann and Wolfgang Hoffmann and Valérie Kempter and Johannes Pung and Otto Rienhoff and Falk Schlegelmilch and Philipp Wieder and Ramin Yahyapour and Eva Hummers}
doi = {10.1055/a-1676-4020}
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/97749}
title = {Sekundäre Nutzung von hausärztlichen Routinedaten ist machbar – Bericht vom RADAR Projekt}
year = {2021}
month = {01}
}
- An Optimized Single Sign-On Schema for Reliable Multi -Level Security Management in Clouds
(Aytaj Badirova, Shirin Dabbaghi, Faraz Fatemi-Moghaddam, Philipp Wieder, Ramin Yahyapour),
In Proceedings of FiCloud 2021 – 8th International Conference on Future Internet of Things and Cloud,
2021-01-01
DOI
BIBTEX
@inproceedings{2_121153
author = {Aytaj Badirova and Shirin Dabbaghi and Faraz Fatemi-Moghaddam and Philipp Wieder and Ramin Yahyapour}
doi = {10.1109/FiCloud49777.2021.00014}
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121153}
journal = {Proceedings of FiCloud 2021 – 8th International Conference on Future Internet of Things and Cloud}
title = {An Optimized Single Sign-On Schema for Reliable Multi -Level Security Management in Clouds}
year = {2021}
month = {01}
}
- OLA-HD – Ein OCR-D-Langzeitarchiv für historische Drucke
(Triet Ho Anh Doan, Zeki Mustafa Doğan, Jörg-Holger Panzer, Kristine Schima-Voigt, Philipp Wieder),
2020-01-01
DOI
BIBTEX
@article{2_116509
author = {Triet Ho Anh Doan and Zeki Mustafa Doğan and Jörg-Holger Panzer and Kristine Schima-Voigt and Philipp Wieder}
doi = {10.18452/21548}
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/116509}
title = {OLA-HD – Ein OCR-D-Langzeitarchiv für historische Drucke}
year = {2020}
month = {01}
}
- menoci: Lightweight Extensible Web Portal enabling FAIR Data Management for Biomedical Research Projects
(Markus Suhr, Christoph Lehmann, Christian Robert Bauer, Theresa Bender, Cornelius Knopp, Luca Freckmann, Björn Öst Hansen, Christian Henke, Georg Aschenbrandt, Lea Katharina Kühlborn, Sophia Rheinländer, Linus Weber, Bartlomiej Marzec, Marcel Hellkamp, Philipp Wieder, Harald Kusch, Ulrich Sax, Sara Yasemin Nussbeck),
2020-01-01
URL
BIBTEX
@misc{2_63412
abstract = {"Background: Biomedical research projects deal with data management requirements from multiple sources like funding agencies' guidelines, publisher policies, discipline best practices, and their own users' needs. We describe functional and quality requirements based on many years of experience implementing data management for the CRC 1002 and CRC 1190. A fully equipped data management software should improve documentation of experiments and materials, enable data storage and sharing according to the FAIR Guiding Principles while maximizing usability, information security, as well as software sustainability and reusability. Results: We introduce the modular web portal software menoci for data collection, experiment documentation, data publication, sharing, and preservation in biomedical research projects. Menoci modules are based on the Drupal content management system which enables lightweight deployment and setup, and creates the possibility to combine research data management with a customisable project home page or collaboration platform. Conclusions: Management of research data and digital research artefacts is transforming from individual researcher or groups best practices towards project- or organisation-wide service infrastructures. To enable and support this structural transformation process, a vital ecosystem of open source software tools is needed. Menoci is a contribution to this ecosystem of research data management tools that is specifically designed to support biomedical research projects."}
author = {Markus Suhr and Christoph Lehmann and Christian Robert Bauer and Theresa Bender and Cornelius Knopp and Luca Freckmann and Björn Öst Hansen and Christian Henke and Georg Aschenbrandt and Lea Katharina Kühlborn and Sophia Rheinländer and Linus Weber and Bartlomiej Marzec and Marcel Hellkamp and Philipp Wieder and Harald Kusch and Ulrich Sax and Sara Yasemin Nussbeck}
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/63412}
title = {menoci: Lightweight Extensible Web Portal enabling FAIR Data Management for Biomedical Research Projects}
url = {https://sfb1190.med.uni-goettingen.de/production/literature/publications/106}
year = {2020}
month = {01}
}
- Designing and piloting a generic research architecture and workflows to unlock German primary care data for secondary use
(Thomas Bahls, Johannes Pung, Stephanie Heinemann, Johannes Hauswaldt, Iris Demmer, Arne Blumentritt, Henriette Rau, Johannes Drepper, Philipp Wieder, Roland Groh, Eva Hummers, Falk Schlegelmilch),
2020-01-01
DOI
BIBTEX
@article{2_68099
abstract = {"Medical data from family doctors are of great importance to health care researchers but seem to be locked in German practices and, thus, are underused in research. The RADAR project (Routine Anonymized Data for Advanced Health Services Research) aims at designing, implementing and piloting a generic research architecture, technical software solutions as well as procedures and workflows to unlock data from family doctor's practices. A long-term medical data repository for research taking legal requirements into account is established. Thereby, RADAR helps closing the gap between the European countries and to contribute data from primary care in Germany."}
author = {Thomas Bahls and Johannes Pung and Stephanie Heinemann and Johannes Hauswaldt and Iris Demmer and Arne Blumentritt and Henriette Rau and Johannes Drepper and Philipp Wieder and Roland Groh and Eva Hummers and Falk Schlegelmilch}
doi = {10.1186/s12967-020-02547-x}
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/68099}
title = {Designing and piloting a generic research architecture and workflows to unlock German primary care data for secondary use}
year = {2020}
month = {01}
}
- OCR-D kompakt: Ergebnisse und Stand der Forschung in der Förderinitiative
(Konstantin Baierer, Matthias Boenig, Elisabeth Engl, Clemens Neudecker, Reinhard Altenhöner, Alexander Geyken, Johannes Mangei, Rainer Stotzka, Andreas Dengel, Martin Jenckel, Alexander Gehrke, Frank Puppe, Stefan Weil, Robert Sachunsky, Lena K. Schiffer, Maciej Janicki, Gerhard Heyer, Florian Fink, Klaus U. Schulz, Nikolaus Weichselbaumer, Saskia Limbach, Mathias Seuret, Rui Dong, Manuel Burghardt, Vincent Christlein, Triet Ho Anh Doan, Zeki Mustafa Dogan, Jörg-Holger Panzer, Kristine Schima-Voigt, Philipp Wieder),
2020-01-01
URL
DOI
BIBTEX
@misc{2_121682
abstract = {"Bereits seit einigen Jahren werden große Anstrengungen unternommen, um die im deutschen Sprachraum erschienenen Drucke des 16.-18. Jahrhunderts zu erfassen und zu digitalisieren. Deren Volltexttransformation konzeptionell und technisch vorzubereiten, ist das übergeordnete Ziel des DFG-Projekts OCR-D, das sich mit der Weiterentwicklung von Verfahren der Optical Character Recognition befasst. Der Beitrag beschreibt den aktuellen Entwicklungsstand der OCR-D-Software und analysiert deren erste Teststellung in ausgewählten Bibliotheken."}
author = {Konstantin Baierer and Matthias Boenig and Elisabeth Engl and Clemens Neudecker and Reinhard Altenhöner and Alexander Geyken and Johannes Mangei and Rainer Stotzka and Andreas Dengel and Martin Jenckel and Alexander Gehrke and Frank Puppe and Stefan Weil and Robert Sachunsky and Lena K. Schiffer and Maciej Janicki and Gerhard Heyer and Florian Fink and Klaus U. Schulz and Nikolaus Weichselbaumer and Saskia Limbach and Mathias Seuret and Rui Dong and Manuel Burghardt and Vincent Christlein and Triet Ho Anh Doan and Zeki Mustafa Dogan and Jörg-Holger Panzer and Kristine Schima-Voigt and Philipp Wieder}
doi = {10.18452/21548}
grolink = {https://resolver.sub.uni-goettingen.de/purl?gro-2/121682}
title = {OCR-D kompakt: Ergebnisse und Stand der Forschung in der Förderinitiative}
url = {https://publications.goettingen-research-online.de/handle/2/116509}
year = {2020}
month = {01}
}