|
@@ -5,6 +5,7 @@ Task definitions for doit.
|
|
|
Run the steps to generate and query our datasets using RDFox.
|
|
|
"""
|
|
|
|
|
|
+from pathlib import Path
|
|
|
from os import path
|
|
|
from doit.tools import LongRunning
|
|
|
|
|
@@ -29,41 +30,15 @@ data_csv = [
|
|
|
'data/unfcc_processed_USA_ghg.csv'
|
|
|
]
|
|
|
|
|
|
-ontology_ffs = [
|
|
|
- 'data/probs.fss'
|
|
|
+PRODCOM_DATA_CSV = [
|
|
|
+ 'data/PRODCOM2016DATA.csv',
|
|
|
+ 'data/PRODCOM2017DATA.csv',
|
|
|
+ 'data/PRODCOM2018DATA.csv',
|
|
|
]
|
|
|
|
|
|
probs_original_data = [
|
|
|
'data/probs_original_data.nt.gz'
|
|
|
]
|
|
|
-probs_enhanced_data = [
|
|
|
- 'data/probs_enhanced_data.nt.gz'
|
|
|
-]
|
|
|
-
|
|
|
-reasoning_input = ontology_ffs + probs_enhanced_data + [
|
|
|
- 'scripts/shared/setup-RDFox.rdfox',
|
|
|
- 'scripts/shared/init-reasoning.rdfox',
|
|
|
- 'scripts/reasoning/input.rdfox',
|
|
|
- 'scripts/reasoning/process.rdfox',
|
|
|
- 'scripts/reasoning/rules.dlog'
|
|
|
-]
|
|
|
-
|
|
|
-
|
|
|
-def task_ontology_conversion():
|
|
|
- """Converts the Turtle ontology into Functional-Style OWL."""
|
|
|
- return {
|
|
|
- 'file_dep': [
|
|
|
- 'ontology/probs.ttl',
|
|
|
- 'scripts/ontology-conversion/master.rdfox',
|
|
|
- 'scripts/shared/setup-RDFox.rdfox',
|
|
|
- 'scripts/ontology-conversion/init.rdfox',
|
|
|
- 'scripts/ontology-conversion/convert.rdfox'
|
|
|
- ],
|
|
|
- 'targets': ontology_ffs,
|
|
|
- 'actions': [
|
|
|
- f'{rdfox_path} -sandbox-directory {dir_path} sandbox {dir_path} scripts/ontology-conversion/master'
|
|
|
- ],
|
|
|
- }
|
|
|
|
|
|
|
|
|
def task_preprocess():
|
|
@@ -71,10 +46,11 @@ def task_preprocess():
|
|
|
return {
|
|
|
'file_dep': [
|
|
|
'scripts/preprocess.py',
|
|
|
+
|
|
|
+ # TODO: generate this list by replacing "data/" with "raw_data/" in
|
|
|
+ # the `data_csv` list
|
|
|
'raw_data/ct-2018-exports.csv',
|
|
|
'raw_data/ct-2018-imports.csv',
|
|
|
- 'raw_data/EHS_Houseing_stock_2018_byAge.csv',
|
|
|
- 'raw_data/EHS_Houseing_stock_2018_byType.csv',
|
|
|
'raw_data/HSCodeandDescription_2017.csv',
|
|
|
'raw_data/Object_table_for_ontoloy.csv',
|
|
|
'raw_data/PRD_2016_20200617_185122.csv',
|
|
@@ -89,119 +65,59 @@ def task_preprocess():
|
|
|
],
|
|
|
}
|
|
|
|
|
|
-
|
|
|
-def task_data_conversion():
|
|
|
- """Reads CSV files, runs all the rules, and converts all of them into RDF."""
|
|
|
- return {
|
|
|
- 'file_dep': ontology_ffs + data_csv + [
|
|
|
- 'scripts/shared/setup-RDFox.rdfox',
|
|
|
- 'scripts/shared/init-conversion.rdfox',
|
|
|
- 'scripts/data-conversion/input.rdfox',
|
|
|
- 'scripts/data-conversion/load_data.rdfox',
|
|
|
- 'scripts/data-conversion/map.dlog',
|
|
|
- 'scripts/data-conversion/master.rdfox',
|
|
|
- 'scripts/data-conversion/master-pipeline.rdfox',
|
|
|
- 'scripts/data-conversion/output.rdfox',
|
|
|
- 'scripts/data-conversion/save_data.rdfox',
|
|
|
- 'scripts/data-conversion/unit_conversion.dlog',
|
|
|
- ],
|
|
|
- 'targets': probs_original_data,
|
|
|
- 'actions':
|
|
|
- [
|
|
|
- f'{rdfox_path} -sandbox-directory {dir_path} sandbox {dir_path} scripts/data-conversion/master'
|
|
|
- ],
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
-def task_data_enhancement():
|
|
|
+def task_data_conversion_prodcom():
|
|
|
"""Reads CSV files, runs all the rules, and converts all of them into RDF."""
|
|
|
- return {
|
|
|
- 'file_dep': ontology_ffs + data_csv + probs_original_data + [
|
|
|
- 'scripts/shared/setup-RDFox.rdfox',
|
|
|
- 'scripts/shared/init-conversion.rdfox',
|
|
|
- 'scripts/data-enhancement/input.rdfox',
|
|
|
- 'scripts/data-enhancement/master.rdfox',
|
|
|
- 'scripts/data-enhancement/master-pipeline.rdfox',
|
|
|
- 'scripts/data-enhancement/process.rdfox',
|
|
|
- 'scripts/data-enhancement/rules.dlog',
|
|
|
- 'scripts/data-enhancement/save_data.rdfox',
|
|
|
- 'scripts/data-enhancement/export_enhanced_data.rq',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_compatibility_signature.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_composition_hierarchy.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_connected_bounds.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_connected_members.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_connected_wdf.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_empty_observations.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_equivalence.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_f_.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_f_stats.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_i_.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_i_stats.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_m_.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_m_stats.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_main.rdfox',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_missing_measurement.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_ordering.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_parallel_world.dlog',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_query_.rdfox',
|
|
|
- 'scripts/data-enhancement/equivalence_composition/y_query_stats.rdfox',
|
|
|
- ],
|
|
|
- 'targets': probs_enhanced_data,
|
|
|
- 'actions':
|
|
|
- [
|
|
|
- f'{rdfox_path} -sandbox-directory {dir_path} sandbox {dir_path} scripts/data-enhancement/master'
|
|
|
- ],
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
-def task_test_queries():
|
|
|
- """Reads the RDF file with the data, answers some queries."""
|
|
|
- return {
|
|
|
- 'file_dep': reasoning_input + [
|
|
|
- 'scripts/test-queries/master.rdfox',
|
|
|
- 'scripts/test-queries/master-pipeline.rdfox',
|
|
|
- 'scripts/test-queries/run_queries.rdfox',
|
|
|
- 'scripts/test-queries/queries/query18.rq',
|
|
|
- 'scripts/test-queries/queries/query18g.rq',
|
|
|
- 'scripts/test-queries/queries/query26.rq',
|
|
|
- 'scripts/test-queries/queries/query32.rq',
|
|
|
- 'scripts/test-queries/queries/query33.rq',
|
|
|
- 'scripts/test-queries/queries/queryAllObs.rq',
|
|
|
- 'scripts/test-queries/queries/queryHMC.rq',
|
|
|
- 'scripts/test-queries/queries/queryHMO.rq',
|
|
|
- 'scripts/test-queries/queries/queryHMObs.rq',
|
|
|
- 'scripts/test-queries/queries/queryROC.rq',
|
|
|
- 'scripts/test-queries/queries/queryWDF.rq',
|
|
|
- ],
|
|
|
- 'targets': [
|
|
|
- 'output/query18.csv',
|
|
|
- 'output/query18g.csv',
|
|
|
- 'output/query26.csv',
|
|
|
- 'output/query32.csv',
|
|
|
- 'output/query33.csv',
|
|
|
- 'output/queryAllObs.csv',
|
|
|
- 'output/queryHMC.csv',
|
|
|
- 'output/queryHMO.csv',
|
|
|
- 'output/queryHMObs.csv',
|
|
|
- 'output/queryROC.csv',
|
|
|
- 'output/queryWDF.csv',
|
|
|
- ],
|
|
|
- 'actions': [
|
|
|
- [rdfox_path, "-sandbox-directory", dir_path, "sandbox", dir_path, "scripts/test-queries/master"]
|
|
|
- ],
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
-def task_reasoning():
|
|
|
- """Reads the RDF file with the data and starts the RDFox endpoint."""
|
|
|
- cmd = [rdfox_path, "-sandbox-directory", dir_path, "sandbox", dir_path, "scripts/reasoning/master"]
|
|
|
- return {
|
|
|
- 'file_dep': reasoning_input + [
|
|
|
- 'scripts/reasoning/master.rdfox',
|
|
|
- 'scripts/reasoning/master-pipeline.rdfox',
|
|
|
- ],
|
|
|
- "uptodate": [False],
|
|
|
- 'actions': [
|
|
|
- LongRunning(cmd, shell=False)
|
|
|
- ],
|
|
|
- }
|
|
|
+ for data_file in PRODCOM_DATA_CSV:
|
|
|
+ data_file = Path(data_file)
|
|
|
+ target_file = Path("outputs") / (data_file.stem + ".nt.gz")
|
|
|
+ yield {
|
|
|
+ 'name': data_file.stem,
|
|
|
+ 'file_dep': [
|
|
|
+ data_file,
|
|
|
+ "scripts/load_data_prodcom.rdfox",
|
|
|
+ "scripts/map_prodcom.dlog",
|
|
|
+ ],
|
|
|
+ 'targets': [target_file],
|
|
|
+ 'actions': [
|
|
|
+ [
|
|
|
+ "python",
|
|
|
+ "scripts/convert_data.py",
|
|
|
+ "prodcom",
|
|
|
+ data_file,
|
|
|
+ target_file,
|
|
|
+ ]
|
|
|
+ ],
|
|
|
+ }
|
|
|
+
|
|
|
+# TODO: repeat this type of tasks for other data files
|
|
|
+#
|
|
|
+# (1) COMTRADE data ("ct-2018-exports" etc)
|
|
|
+#
|
|
|
+# (2) Classification code definitions "PRD_20XX..."
|
|
|
+#
|
|
|
+# (3) Correspondence tables "PRC_2017_2016.csv"
|
|
|
+#
|
|
|
+# all in parallel (separately)
|
|
|
+
|
|
|
+
|
|
|
+# def task_data_conversion():
|
|
|
+# """Reads CSV files, runs all the rules, and converts all of them into RDF."""
|
|
|
+# return {
|
|
|
+# 'file_dep': ontology_ffs + data_csv + [
|
|
|
+# 'scripts/shared/setup-RDFox.rdfox',
|
|
|
+# 'scripts/shared/init-conversion.rdfox',
|
|
|
+# 'scripts/data-conversion/input.rdfox',
|
|
|
+# 'scripts/data-conversion/load_data.rdfox',
|
|
|
+# 'scripts/data-conversion/map.dlog',
|
|
|
+# 'scripts/data-conversion/master.rdfox',
|
|
|
+# 'scripts/data-conversion/master-pipeline.rdfox',
|
|
|
+# 'scripts/data-conversion/output.rdfox',
|
|
|
+# 'scripts/data-conversion/save_data.rdfox',
|
|
|
+# 'scripts/data-conversion/unit_conversion.dlog',
|
|
|
+# ],
|
|
|
+# 'targets': probs_original_data,
|
|
|
+# 'actions':
|
|
|
+# [
|
|
|
+# f'{rdfox_path} -sandbox-directory {dir_path} sandbox {dir_path} scripts/data-conversion/master'
|
|
|
+# ],
|
|
|
+# }
|