Provenance Demo
Contents
Provenance Demo#
Python PROV libray:
https://nbviewer.ipython.org/github/trungdong/notebooks/blob/master/PROV Tutorial.ipynb
https://github.com/trungdong/prov/blob/master/src/prov/tests/examples.py
ESMValTool Provenance:
from prov.model import ProvDocument
# Create a new provenance document
doc = ProvDocument()
Namespaces#
# Declaring namespaces for various prefixes used in the example
ROOCS_URI_PREFIX = 'https://roocs.org/'
doc.add_namespace('software', uri=ROOCS_URI_PREFIX + 'software')
doc.add_namespace('project', ROOCS_URI_PREFIX + 'project') # copernicus, roocs
doc.add_namespace('workflow', ROOCS_URI_PREFIX + 'workflow') # workflow description
doc.add_namespace('operator', ROOCS_URI_PREFIX + 'operator') # task, job, calculation, algorithm
doc.add_namespace('parameter', ROOCS_URI_PREFIX + 'parameter') # operator parameter, option
doc.add_namespace('file', ROOCS_URI_PREFIX + 'file') # netcdf, plots
doc.add_namespace('attribute', ROOCS_URI_PREFIX + 'attribute') # netcdf attributes, headers variables
<Namespace: attribute {https://roocs.org/attribute}>
Software#
daops = doc.activity('software:daops==v0.2.0')
Project#
project_cds = doc.agent('project:Copernicus Climate Data Store')
Datasets#
attributes = {'attribute:variable': 'tas'}
ds_mpi = doc.entity('file:/data/cmip6/mpi_tas_2000-2010.nc', attributes)
ds_ipsl = doc.entity('file:/data/cmip6/ipsl_tas_2000-2010.nc', attributes)
Operators#
op_subset = doc.activity('operator:subset', other_attributes={'parameter:time': '2005'})
op_diff = doc.activity('operator:diff')
Workflow#
# Create workflow
wf_diff = doc.entity('workflow:diff.json')
# Relate workflow to project
doc.wasAttributedTo(wf_diff, project_cds)
<ProvAttribution: (workflow:diff.json, project:Copernicus Climate Data Store)>
Run Subset Operator#
# subset started by daops
doc.start(op_subset, starter=daops, trigger=wf_diff)
<ProvStart: (operator:subset, workflow:diff.json)>
# Generated output file for mpi dataet
output1 = doc.entity('file:mpi_tas_2005.nc', attributes)
doc.wasDerivedFrom(output1, ds_mpi, activity=op_subset)
<ProvDerivation: (file:mpi_tas_2005.nc, file:/data/cmip6/mpi_tas_2000-2010.nc)>
# Generated output file for ipsl dataset
output2 = doc.entity('file:ipsl_tas_2005.nc', attributes)
doc.wasDerivedFrom(output2, ds_ipsl, activity=op_subset)
<ProvDerivation: (file:ipsl_tas_2005.nc, file:/data/cmip6/ipsl_tas_2000-2010.nc)>
Run Diff Operator#
# diff started by daops
doc.start(op_diff, starter=daops, trigger=wf_diff)
<ProvStart: (operator:diff, workflow:diff.json)>
# Generated output
output_diff = doc.entity('file:diff_tas_2005.nc', attributes)
doc.wasDerivedFrom(output_diff, output1, activity=op_diff)
doc.wasDerivedFrom(output_diff, output2, activity=op_diff)
<ProvDerivation: (file:diff_tas_2005.nc, file:ipsl_tas_2005.nc)>
Show Provenance#
print(doc.get_provn())
document
prefix software <https://roocs.org/software>
prefix project <https://roocs.org/project>
prefix workflow <https://roocs.org/workflow>
prefix operator <https://roocs.org/operator>
prefix parameter <https://roocs.org/parameter>
prefix file <https://roocs.org/file>
prefix attribute <https://roocs.org/attribute>
activity(software:daops==v0.2.0, -, -)
agent(project:Copernicus Climate Data Store)
entity(file:/data/cmip6/mpi_tas_2000-2010.nc, [attribute:variable="tas"])
entity(file:/data/cmip6/ipsl_tas_2000-2010.nc, [attribute:variable="tas"])
activity(operator:subset, -, -, [parameter:time="2005"])
activity(operator:diff, -, -)
entity(workflow:diff.json)
wasAttributedTo(workflow:diff.json, project:Copernicus Climate Data Store)
wasStartedBy(operator:subset, workflow:diff.json, software:daops==v0.2.0, -)
entity(file:mpi_tas_2005.nc, [attribute:variable="tas"])
wasDerivedFrom(file:mpi_tas_2005.nc, file:/data/cmip6/mpi_tas_2000-2010.nc, operator:subset, -, -)
entity(file:ipsl_tas_2005.nc, [attribute:variable="tas"])
wasDerivedFrom(file:ipsl_tas_2005.nc, file:/data/cmip6/ipsl_tas_2000-2010.nc, operator:subset, -, -)
wasStartedBy(operator:diff, workflow:diff.json, software:daops==v0.2.0, -)
entity(file:diff_tas_2005.nc, [attribute:variable="tas"])
wasDerivedFrom(file:diff_tas_2005.nc, file:mpi_tas_2005.nc, operator:diff, -, -)
wasDerivedFrom(file:diff_tas_2005.nc, file:ipsl_tas_2005.nc, operator:diff, -, -)
endDocument