瀏覽代碼

Add RDFox scripts/rules to load PRODCOM bulk data

Only the first 5000 lines of the data file are included in this commit.
Rick Lupton 10 月之前
父節點
當前提交
d5c3f42535

File diff suppressed because it is too large
+ 5001 - 0
raw_data/ds-056120_linear.csv


+ 0 - 0
raw_data/ds-056120_linear_defs.dlog


+ 1 - 1
scripts/convert_data.py

@@ -53,7 +53,7 @@ def convert_data_prodcom(data_type, data_csv, output_path):
         map_file : CODE_DIR / map_file,
         defs_file : data_csv.parent / defs_file,
     }
-    if data_type == "prodcom":
+    if data_type in ("prodcom", "prodcom_new"):
        load_data.append(load_units)
        input_files[units_file] = CODE_DIR / units_file
     datasource = Datasource.from_files(

+ 29 - 0
scripts/load_data_prodcom_new.rdfox

@@ -0,0 +1,29 @@
+PREFIX ufpc:                <http://w3id.org/probs-lab/data/prodcom/>
+PREFIX ufpcd:               <http://w3id.org/probs-lab/data/prodcom_data/>
+PREFIX gnd:                 <https://sws.geonames.org/>
+
+######################################################
+###                      PRODCOM Data              ###
+######################################################
+
+dsource register "PRODCOM_DATA_NEW"                             \
+    type    delimitedFile                                       \
+    file    "$(dir.datasource)/data.csv"                        \
+    header  true                                                \
+    quote   '"'
+
+tupletable create ufrd:PRODCOM_DATA_NEW                         \
+    dataSourceName  "PRODCOM_DATA_NEW"                          \
+    "columns"       6                                           \
+    "1"             "{decl}"                                    \
+    "1.datatype"    "string"                                    \
+    "2"             "{TIME_PERIOD}"                             \
+    "2.datatype"    "string"                                    \
+    "3"             "{prccode}"                                 \
+    "3.datatype"    "string"                                    \
+    "4"             "{indicators}"                              \
+    "4.datatype"    "string"                                    \
+    "5"             "{OBS_VALUE}"                               \
+    "5.datatype"    "string"     	                            \
+    "6"             "{OBS_FLAG}"                                \
+    "6.datatype"    "string"

+ 53 - 0
scripts/map_prodcom_new.dlog

@@ -0,0 +1,53 @@
+# Basic info from "PRODQNT"
+
+:DirectObservation[?ID] ,
+[?ID, :objectDirectlyDefinedBy, ?Object] ,
+[?ID, :hasRegion, ?Region] ,
+[?ID, :hasTimePeriod, ?TimePeriod] ,
+[?ID, :hasRole, :SoldProduction] ,
+[?ID, :partOfDataset, ufpc:PRODCOM] ,
+[?ID, :bound, :ExactBound]
+        :-
+        ufrd:PRODCOM_DATA_NEW(?Decl, ?Year, ?PRCCODE, ?Indicators, ?ObsValue, ?ObsFlag),
+        FILTER(?Indicators = "PRODQNT"),
+        BIND(IRI(CONCAT(STR(ufpcd:), ?Year, "/Observation-", SHA256(CONCAT(?Decl, ?Year, ?PRCCODE, ?Indicators)))) AS ?ID),
+        BIND(IRI(CONCAT(STR(ufpc:), "Object-", ?PRCCODE)) AS ?Object) ,
+        BIND(IRI(CONCAT(STR(gnd:), "TODO-GN-CODE-", ?Decl)) AS ?Region),
+        BIND(IRI(CONCAT(STR(:), "TimePeriod_YearOf", ?Year)) AS ?TimePeriod) .
+
+
+# Additional info from other rows about units and flags
+
+ufu:NG(?ID, ufu:measurementUnit, ?Measurement)
+        :-
+        ufrd:PRODCOM_DATA_NEW(?Decl, ?Year, ?PRCCODE, ?Indicators, ?ObsValue, ?ObsFlag),
+        FILTER(?Indicators = "PRODQNT"),
+        BIND(IRI(CONCAT(STR(ufpcd:), ?Year, "/Observation-", SHA256(CONCAT(?Decl, ?Year, ?PRCCODE, ?Indicators)))) AS ?ID) ,
+        :DirectObservation[?ID] ,
+
+        # Don't use the measurement value when there is a "confidential" flag,
+        # this means that a zero value really means "missing"
+        NOT [?ID, :measurementFlag, ":C"],
+
+        BIND(xsd:decimal(?ObsValue) AS ?Measurement).
+
+
+ufu:NG(?ID, ufu:unit, ?UnitID)
+        :-
+        ufrd:PRODCOM_DATA_NEW(?Decl, ?Year, ?PRCCODE, ?Indicators, ?ObsValue, ?ObsFlag),
+        FILTER(?Indicators = "QNTUNIT"),
+        BIND(IRI(CONCAT(STR(ufpcd:), ?Year, "/Observation-", SHA256(CONCAT(?Decl, ?Year, ?PRCCODE, "PRODQNT")))) AS ?ID) ,
+        :DirectObservation[?ID] ,
+        BIND(IRI(CONCAT(STR(:), "Unit-", SHA256(?ObsValue))) AS ?UnitID) .
+
+
+[?ID, :measurementFlag, ?ObsValue]
+        :-
+        ufrd:PRODCOM_DATA_NEW(?Decl, ?Year, ?PRCCODE, ?Indicators, ?ObsValue, ?ObsFlag),
+        FILTER(?Indicators = "PQNTFLAG"),
+        BIND(IRI(CONCAT(STR(ufpcd:), ?Year, "/Observation-", SHA256(CONCAT(?Decl, ?Year, ?PRCCODE, "PRODQNT")))) AS ?ID) ,
+        :DirectObservation[?ID] .
+
+
+# EU prefix: "http://data.europa.eu/qw1/prodcom2021/"
+