Browse Source

Convert PRODCOM only

Stephen Boyle 1 year ago
parent
commit
26cb6c99bc

+ 2 - 2
README.md

@@ -1,6 +1,6 @@
-# PRODCOM and COMTRADE data as PRObs Observations
+# PRODCOM data as PRObs Observations
 
-This repository converts data from the [PRODCOM](https://ec.europa.eu/eurostat/web/prodcom) and UN [COMTRADE](https://comtradeplus.un.org/) databases into a structure defined by the [Physical Resources Observatory (PRObs)](https://github.com/probs-lab/probs-ontology) ontology.
+This repository converts data from the [PRODCOM](https://ec.europa.eu/eurostat/web/prodcom) database into a structure defined by the [Physical Resources Observatory (PRObs)](https://github.com/probs-lab/probs-ontology) ontology.
 
 See [DEVELOPING.md](DEVELOPING.md) for more information about using this repository.
 

+ 0 - 2
data/HSCodeandDescription_2017_defs.dlog

@@ -1,2 +0,0 @@
-[ufu:CurrentImport, :belongsToList, ufct:Comtrade2017List] .
-[ufu:CurrentImport, :useDataPrefix, ufct2017:] .

+ 0 - 5
data/ct-2018-exports_defs.dlog

@@ -1,5 +0,0 @@
-[ufu:CurrentImport, :hasTimePeriod, :TimePeriod_YearOf2018] .
-[ufu:CurrentImport, :partOfDataset, ufct:ct2018exports] .
-[ufu:CurrentImport, :useDataPrefix, ufctd2018exports:] .
-[ufu:CurrentImport, :hasRole, :Export] .
-[ufu:CurrentImport, :useObjectPrefix, ufct2017:] .

+ 0 - 5
data/ct-2018-imports_defs.dlog

@@ -1,5 +0,0 @@
-[ufu:CurrentImport, :hasTimePeriod, :TimePeriod_YearOf2018] .
-[ufu:CurrentImport, :partOfDataset, ufct:ct2018imports] .
-[ufu:CurrentImport, :useDataPrefix, ufctd2018imports:] .
-[ufu:CurrentImport, :hasRole, :Import] .
-[ufu:CurrentImport, :useObjectPrefix, ufct2017:] .

+ 0 - 15
dodo.py

@@ -14,9 +14,6 @@ dir_path = path.dirname(path.realpath(__file__))
 print(f"Running {rdfox_path} from {dir_path}")
 
 data_csv = [
-    'data/ct-2018-exports.csv',
-    'data/ct-2018-imports.csv',
-    'data/HSCodeandDescription_2017.csv',
     'data/Object_table_for_ontoloy.csv',
     'data/PRC_2017_2016.csv',
     'data/PRD_2016_20200617_185122.csv',
@@ -53,23 +50,11 @@ DATA_FILES = [
     ("prodcom", 'data/PRODCOM2016DATA.csv'),
     ("prodcom", 'data/PRODCOM2017DATA.csv'),
     ("prodcom", 'data/PRODCOM2018DATA.csv'),
-    ("comtrade", 'data/ct-2018-imports.csv'),
-    ("comtrade", 'data/ct-2018-exports.csv'),
     ("prodcom_correspondence", 'data/PRC_2017_2016.csv'),
     ("prodcom_list", 'data/PRD_2017_20200617_185035.csv'),
     ("prodcom_list", 'data/PRD_2016_20200617_185122.csv'),
-    ("comtrade_class", 'data/HSCodeandDescription_2017.csv'),
 ]
 
-# TODO: add to this list for other data files
-#
-# (1) COMTRADE data ("ct-2018-exports" etc)
-#
-# (2) Classification code definitions "PRD_20XX..."
-#
-# (3) Correspondence tables "PRC_2017_2016.csv"
-#
-
 
 def task_convert_data():
     """Reads CSV files, runs all the rules, and converts all of them into RDF."""

File diff suppressed because it is too large
+ 0 - 6710
raw_data/HSCodeandDescription_2017.csv


File diff suppressed because it is too large
+ 0 - 6523
raw_data/ct-2018-exports.csv


File diff suppressed because it is too large
+ 0 - 6565
raw_data/ct-2018-imports.csv


+ 0 - 3
scripts/preprocess.py

@@ -73,9 +73,6 @@ if __name__ == "__main__":
         "PRODCOM2016DATA.csv",
         "PRODCOM2017DATA.csv",
         "PRODCOM2018DATA.csv",
-        "ct-2018-exports.csv",
-        "ct-2018-imports.csv",
-        "HSCodeandDescription_2017.csv",
         "Object_table_for_ontoloy.csv",
     ]
     for filename in code_lists:

+ 0 - 123
tests/test_comtrade.py

@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-import pytest
-from numpy import isnan
-from hashlib import sha256
-
-from rdflib import Namespace
-from probs_runner import PROBS, QUANTITYKIND
-from numpy.testing import assert_allclose
-
-COMTRADE2017 = Namespace("https://ukfires.org/probs/ontology/comtrade/2017/")
-GEONAMES = Namespace("https://sws.geonames.org/")
-
-def assert_exact_results(result, expected_value):
-    assert len(result) == 1
-    assert result[0].bound == PROBS.ExactBound
-    assert_allclose(result[0].measurement, expected_value, rtol=1e-3)
-
-
-def assert_no_measurement(result):
-    assert len(result) == 1
-    assert isnan(result[0].measurement)
-
-
-def obj_id(obj_code):
-
-    return ("Object-" + sha256(obj_code.encode('utf-8')).hexdigest())
-
-
-class TestComtradeImport:
-    """Test COMTRADE Import conversion"""
-
-    endpoint_data = ["outputs/ct-2018-imports.nt.gz"]
-
-    @pytest.mark.parametrize("code,expected_value", [
-        ("10121", 1733341),
-        ("20110", 31679041),
-        ("80310", 41745763),
-        ("251120", 1019),
-        ("390110", 447125375),
-        ("520710", 473405),
-        ("620431", 859029),
-        ("721590", 19448264),
-        ("845969", 5402447)
-    ])
-
-    def test_expected_measurements(self, rdfox, code, expected_value):
-        result = rdfox.get_observations(
-            PROBS.TimePeriod_YearOf2018,
-            GEONAMES["2635167"] ,
-            QUANTITYKIND.Mass,
-            PROBS.Import,
-            object_=COMTRADE2017[obj_id(code)],
-        )
-        assert_exact_results(result, expected_value)
-
-
-    @pytest.mark.parametrize("code", [
-        ("440420"),
-        ("722990")
-    ])
-    
-    def test_no_measurements(self, rdfox, code):
-        # Test for cases of no data for measurement
-        result = rdfox.get_observations(
-            PROBS.TimePeriod_YearOf2018,
-            GEONAMES["2635167"] ,
-            QUANTITYKIND.Mass,
-            PROBS.Import,
-            object_=COMTRADE2017[obj_id(code)],
-        )
-        assert_no_measurement(result)
-   
-
-
-class TestComtradeExport:
-    """Test COMTRADE Export conversion"""
-
-    endpoint_data = ["outputs/ct-2018-exports.nt.gz"]
-
-
-    @pytest.mark.parametrize("code,expected_value", [
-        ("10511", 2316447),
-        ("70999", 7979977),
-        ("290371", 543901),
-        ("440325", 27693018),
-        ("540331", 458),
-        ("680430", 66781),
-        ("790111", 6017177),
-        ("851650", 8874060)
-    ])
-
-
-    def test_expected_measurements(self, rdfox, code, expected_value):
-        result = rdfox.get_observations(
-            PROBS.TimePeriod_YearOf2018,
-            GEONAMES["2635167"] ,
-            QUANTITYKIND.Mass,
-            PROBS.Export,
-            object_=COMTRADE2017[obj_id(code)],
-        )
-        assert_exact_results(result, expected_value)
-
-
-
-    @pytest.mark.parametrize("code", [
-       ("140110"),
-       ("350211")
-    ])
-    
-    def test_no_measurements(self, rdfox, code):
-        # Test for cases of no data for measurement
-        result = rdfox.get_observations(
-            PROBS.TimePeriod_YearOf2018,
-            GEONAMES["2635167"] ,
-            QUANTITYKIND.Mass,
-            PROBS.Export,
-            object_=COMTRADE2017[obj_id(code)],
-        )
-        assert_no_measurement(result)
-
-
-
-

+ 0 - 75
tests/test_comtrade_class.py

@@ -1,75 +0,0 @@
-#!/usr/bin/env python3
-import pytest
-from numpy import isnan
-from hashlib import sha256
-
-from rdflib import Namespace
-from probs_runner import PROBS, QUANTITYKIND, answer_queries
-from numpy.testing import assert_allclose
-from decimal import Decimal
-#from uuid import uuid4
-
-
-COMTRADE2017 = Namespace("https://ukfires.org/probs/ontology/comtrade/2017/")
-GEONAMES = Namespace("https://sws.geonames.org/")
-
-
-
-def assert_exact_results(result, expected_value):
-    assert len(result) == 1
-    assert result[0].bound == PROBS.ExactBound
-    assert_allclose(result[0].measurement, expected_value, rtol=1e-3)
-
-
-def assert_no_measurement(result):
-    assert len(result) == 1
-    assert isnan(result[0].measurement)
-
-
-def obj_id(obj_code):
-    return ("Object-" + sha256(obj_code.encode('utf-8')).hexdigest())
-
-def obs_id(obs_code):
-    return ("Observation-" + sha256(obs_code.encode('utf-8')).hexdigest())
-
-def class_id(obj_code):
-    return ("ClassificationCode-" + sha256(obj_code.encode('utf-8')).hexdigest())
-
-
-
-
-
-class TestComtradeClass:
-    """Test COMTRADE classification file conversion"""
-
-    endpoint_data = [ "outputs/HSCodeandDescription_2017.nt.gz" ]
-
-    @pytest.mark.parametrize("code,parent,description", [
-        ("260700", "2607", "Lead ores and concentrates"),
-        ("360300", "3603", \
-         "Fuses and detonators; safety or detonating fuses, percussion or detonating caps, igniters, electric detonators"),
-        ("8202", "82", "Tools, hand; saws and blades for saws of all kinds (including slitting, slotting or toothless blades)")
-    ])
-    
-    def test_object_attributes(self, rdfox, code, parent, description):
-        # test object from COMTRADE classification list has correct attributes
-        obj_name = "\"COMTRADE Object from Code " + code + "\""
-        code_name = "\"" + code + "\""
-        code_description = "\"" + description + "\""
-        parent_name = "\"COMTRADE Object from Code " + parent + "\""
-        query = r""" PREFIX ufct: <https://ukfires.org/probs/ontology/comtrade/>
-                     SELECT ?a WHERE {
-                         ?a :objectName %s ;
-                            :hasClassificationCode ?b .
-                         ?b :codeName %s ;
-                            :codeDescription %s ;
-                            :belongsToList ufct:Comtrade2017List .
-                         ?c :objectName %s ;
-                            :objectComposedOf ?a .
-                     } """ % (obj_name, code_name, code_description, parent_name)
-        result = rdfox.query_records(query)
-        assert len(result) == 1
-
-
-
-