From a2ca0e735bde14eb698b9da340e8a418b134838a Mon Sep 17 00:00:00 2001 From: anonymous <anonymous> Date: Mon, 28 Jan 2019 18:00:13 +0100 Subject: [PATCH] ODPSH-266: add test for date validation --- .../odsh/i18n/de/LC_MESSAGES/ckanext-odsh.mo | Bin 5188 -> 5363 bytes .../odsh/i18n/de/LC_MESSAGES/ckanext-odsh.po | 2 +- ckanext/odsh/plugin.py | 5 +- .../odsh/tests/test_statistikNordHarvester.py | 628 ------------------ ckanext/odsh/tests/test_upload.py | 94 +++ 5 files changed, 99 insertions(+), 630 deletions(-) delete mode 100644 ckanext/odsh/tests/test_statistikNordHarvester.py create mode 100644 ckanext/odsh/tests/test_upload.py diff --git a/ckanext/odsh/i18n/de/LC_MESSAGES/ckanext-odsh.mo b/ckanext/odsh/i18n/de/LC_MESSAGES/ckanext-odsh.mo index c18f411194b2c76da97372e439cf38554b0223d9..fd674326210052b3d4a7aca6618bab63dcb260d8 100644 GIT binary patch delta 1619 zcmX@2@maI}o)F7a1_lO(a0Uhj83qQ1D@-6B0{5^oFbFX)F!ZuAFmN+4Ff3q&s9VFz zz`)ADz;Ke4fq|WYf#Ez<{yHlI13v=;!(&zk1}+8$hR>`F41x>{4F6ae7!(*77zEfD z7$g}O7!29K;`I#PY!HR9Y!C}F*dPX%ut6-YVq;)n137?=fq|2Ofnf?81A_zu1H(M1 z_;xmk1NK49IR@2#5h{P3je&uWfq~&38v}zX0|UcbHU<V21_lOUb_NC!hI$4D3w8zu zSq26MUv>rtUIqq+Y<7qXE7>6$CxaC-Ff3w+IB+{V#D|C385pD(7#J?IGcd3)FfhDh zXJFs~`53B?frEiTmw|zSpM!xxl!1Z4mILCkU=9Y56hkBj0|Q$<0|P@62gIUGs6+(^ z1A{mN14APR#G;uT5DQj9)vbey@8y7m$XTeq>l~1fd;nGV8mjLX2PB04L)Ec!LL4B# z$-qz#iaSY8NDwP>LVT#f$-uzOz`$U_363KM3r>gwyf_&cgc%qZ{5c^$O6G((w2~7N zw-Y%bLA(jdzro4CAjiPK@Ds|H;DV$bOD;$hgmE#{gM+@B3liirxfmGa85kHgL;07v zAZg$&lm?|hP#O^CW?%rNQ(0~X20aD_1_f>g1}~7qxEUBM7#J94aWgPXU}Rvp%ngas z6kbT0DdL4Vu$vbmzLB>c5@-8)A@mtuh|g~FGBAiSFfhD_^4a(p7}P*Hgb(6CXFiAl z$$Sv?g?x}GZ0Can^)x;P1_4ka<%5LmdOk?hY~zD?=pdATr5>u_K9qjO2T3Gvq5RK$ zpj67h@QaUuK^v5q_#r`R%?}C6D1L~8%K0HtH=Q4nm>2Uy9CnZ&5=GbfA!+43KP04o z@-r}KGB7aI3kyJeVk-cNTW<k~#&`h+1|tRrhEf5D1uFy~4m%+LG3cBC#Ai34{QCls zH1bja5>oF3APyH5goKEeAS5jr2|_}~MG$OGJwqsj!4NM9aY!aqVYwhA5q1hfQfn_% z-83k@07|bCgyfPfP=k*MLL7P(YR+vzh(q5(<-Z6rFlaC^Ft7@NvS&R5gPsrrgE<2O zLyQn4aZC|{1mP|rNC?~zg7}DAn1Mlyfq}tDn1Ml&fq@}b7-Rq_2qCF?tuO<FAp--$ zC1FSivWP%@DkTDmb8QibN9;r(9&;CAV6YTnU|{eTVPJ4(U|?t!fyBwl$(gLGlee(C zZ<c2}&$!u);}Ro(W^r+8YD#=weo1^vVoB;`TQ2Fzf?Rb5#RZ8anTa{^rA3+XsYONk zMe#X_NvSyu`6<O2@yL>;dD(gS<$059xuls=^Cs7D$xN=}QrKL_b&6^7c7DFe9lQaX z|MChjZH^L1W}585%g?2donKUxnq87wJb9kbj>((&MJLM&*n*^36<qUDQYR<yN=^3Q S6`K5kmv^!gukqwC;dcN}wZD4+ delta 1626 zcmeyYc|@cBo)F7a1_lO(PzDAD83qQ1BTOJ30_U(YFbFX)Fyyi_FmN+4Ff_0-FsL#x zF!ZoO)Nf*CU|?lnU^vgpz`)MHz;G3+?k+0>13v=;!%J2M24w~YhM%kq3|tHh46<wt z41x>{3>s_<3<?Yk^$b>Q3=EPC3=GjwiBdL*!ge-@g_GDI7R+OVIAjSM0|Ofa1H*PU z1_n+B28MlX3=9$s3=Air;t$y%4tW7J=L1y#Z>T&YI|BnB0|NsGJIEvT3=HDz3=Ap^ z3=FpH3=AR+3=B!^3=Fai3=HM$3=F&s3=C7*Ar4#&)wma`?hHG`fe+ak7^E2(7~Zlo zFlaL{FmQ4}d~V9Yz#zrIz~IWkz`(-5z>vVfz`(=6z>vxTaX=9V1A}fo0|P@N2LppB z0|Uc)4v0^WaxgG(FfcHj=79M03RJ^wsQ4=m1_p5k28Pd2i?}%<7D#hK3|8QTI6$8h z5<>Qz5c51aAt4(CRTs<2z)%lzSq>*8$O@qfE1`4~RJ@xL;*iOl5FgLtWME)sU|?9r z35m;9PzUVcWMB|xU|=}N3GvWXPKZNabAp1Jfq|I|9On$0Tnr3y3=9ljTnzP~M8J^A z1qtFVE@&J>>Frz)ADrT1U;w4dD_jf=dJGH<*SQ!Nycif5*ti)OEEpIV0=XF&CNMHE zba6wXOqv&xCNy~&7z`O07@T+^4k+P;$j{=1M8yi;dI)1HFT}-%cp+)w1}_7H7y|>t zU#PeU9|MCL0|SE=9|MCt$U;7dL4|w}ef4~hpr6VI3CSgV3=9Gc3=C`dAR)Jx4-)i8 z_#hrU59L1sX#}O_w@`&&_#kQEH<ZuF4@s09{E);Y%MS@UPku=D%ixDNsFfcQHB0#+ ziFYGE#9`<8AyM#@9}<#(`573r7#JAX1sE6<85kJqjRYV;A0oiOpvl0%kS74~*$e?l zTrLxU7_>`(fx(D@f#HGx#6o64h(nbHAqHy-LVRir<y#3t5~YhEB;-5=As#6ZgoIG5 zAUN&RGxQ2Vf@(fg;|3_bOAz9aBM^lQmjoe+?uj5IHNSwW`v|3fLupnaNb2Vof|x5W z1aYW=5ZFQnGa-mWJ;3ty3=IB43=A5e{3*o1pv%C(&?y8-EZc=3L3vjQ;`84^5TED? zLqa4}7-B)TFeFZEgdq;=7lt@=hA;zzB?AM)9AO3qX9fm_Yr>EamI4(rj>*ZX#l;HA zlc%yuPEHgM=C@LCOU=kh%}YrwQV35in!JkDTQMb7Atkj~Au2Vqq$sg8H?<f|QOV{{ ztd|(w3KB~)6LS<wi!#yW8G@1JLNZbniVIScGt)9tQxrmjJQXsF74q^+6teU3%kwrL zV!y&T`5!0e<O7^_lN-1=H$UXsz%*G>fM@c4VYbPygrz4N3ky%4&l@mVSU_m<eqrIw f?|B87CR+&mZvM}o%rtqH(9FpR0-}?pL~a5AL~qCG diff --git a/ckanext/odsh/i18n/de/LC_MESSAGES/ckanext-odsh.po b/ckanext/odsh/i18n/de/LC_MESSAGES/ckanext-odsh.po index 26f58932..95e25172 100644 --- a/ckanext/odsh/i18n/de/LC_MESSAGES/ckanext-odsh.po +++ b/ckanext/odsh/i18n/de/LC_MESSAGES/ckanext-odsh.po @@ -178,7 +178,7 @@ msgid "odsh_temporal_end_error_label" msgstr "Bitte wählen Sie ein Ende des Zeitraumes aus" msgid "odsh_temporal_error_label" -msgstr "Bitte wählen Sie einen Zeitraum aus" +msgstr "Bitte geben Sie einen gültigen Zeitraum an" msgid "Name: Fehlender Wert" msgstr "Bitte geben Sie eine Url ein" diff --git a/ckanext/odsh/plugin.py b/ckanext/odsh/plugin.py index 0edcd9eb..32e192bf 100644 --- a/ckanext/odsh/plugin.py +++ b/ckanext/odsh/plugin.py @@ -131,7 +131,8 @@ def odsh_validate_extra_date(key, field, data, errors, context): if not value: # Statistikamt Nord does not always provide temporal_start/end, # but their datasets have to be accepted as they are. - if data[('id',)][:7] != 'StaNord': + if not ('id',) in data or data[('id',)][:7] != 'StaNord': + print(field+' raise missing') raise toolkit.Invalid(field+':odsh_'+field+'_error_label') else: try: @@ -139,7 +140,9 @@ def odsh_validate_extra_date(key, field, data, errors, context): # no matter if the date is like "2012-01-01" or "2012-01-01T00:00:00" datetime.datetime.strptime(value.split('T')[0],'%Y-%m-%d').isoformat() except ValueError: + print(field+' raise wrong') raise toolkit.Invalid(field+':odsh_'+field+'_not_date_error_label') + print(field+' raise nothing') def odsh_validate_extra_date_factory(field): diff --git a/ckanext/odsh/tests/test_statistikNordHarvester.py b/ckanext/odsh/tests/test_statistikNordHarvester.py deleted file mode 100644 index a662c84d..00000000 --- a/ckanext/odsh/tests/test_statistikNordHarvester.py +++ /dev/null @@ -1,628 +0,0 @@ -# -*- coding: utf-8 -*- - -from collections import defaultdict - -import nose -import httpretty -from mock import patch - -from six.moves import xrange - -import ckan.plugins as p -import ckantoolkit.tests.helpers as h - -import ckanext.harvest.model as harvest_model -from ckanext.harvest import queue - -from ckanext.odsh.harvesters import StatistikamtNordHarvester -from ckanext.dcat.interfaces import IDCATRDFHarvester -import ckanext.dcat.harvesters.rdf - - -eq_ = nose.tools.eq_ - - -# This horrible monkey patch is needed because httpretty does not play well -# with redis, so we need to disable it straight after the mocked call is used. -# See https://github.com/gabrielfalcao/HTTPretty/issues/113 - -# Start monkey-patch - -original_rdf_get_content_and_type = DCATRDFHarvester._get_content_and_type - -def _patched_rdf_get_content_and_type(self, url, harvest_job, page=1, content_type=None): - - httpretty.enable() - - value1, value2 = original_rdf_get_content_and_type(self, url, harvest_job, page, content_type) - - httpretty.disable() - - return value1, value2 - -DCATRDFHarvester._get_content_and_type = _patched_rdf_get_content_and_type - -original_json_get_content_and_type = DCATJSONHarvester._get_content_and_type - -def _patched_json_get_content_and_type(self, url, harvest_job, page=1, content_type=None): - - httpretty.enable() - - value1, value2 = original_json_get_content_and_type(self, url, harvest_job, page, content_type) - - httpretty.disable() - - return value1, value2 - -DCATJSONHarvester._get_content_and_type = _patched_json_get_content_and_type - -# End monkey-patch - - -class TestRDFHarvester(p.SingletonPlugin): - - p.implements(IDCATRDFHarvester) - - calls = defaultdict(int) - - def before_download(self, url, harvest_job): - - self.calls['before_download'] += 1 - - if url == 'http://return.none': - return None, [] - elif url == 'http://return.errors': - return None, ['Error 1', 'Error 2'] - else: - return url, [] - - def update_session(self, session): - self.calls['update_session'] += 1 - session.headers.update({'x-test': 'true'}) - return session - - def after_download(self, content, harvest_job): - - self.calls['after_download'] += 1 - - if content == 'return.empty.content': - return None, [] - elif content == 'return.errors': - return None, ['Error 1', 'Error 2'] - else: - return content, [] - - def before_update(self, harvest_object, dataset_dict, temp_dict): - self.calls['before_update'] += 1 - - def after_update(self, harvest_object, dataset_dict, temp_dict): - self.calls['after_update'] += 1 - return None - - def before_create(self, harvest_object, dataset_dict, temp_dict): - self.calls['before_create'] += 1 - - def after_create(self, harvest_object, dataset_dict, temp_dict): - self.calls['after_create'] += 1 - return None - - - -class FunctionalHarvestTest(object): - - @classmethod - def setup_class(cls): - - h.reset_db() - - cls.gather_consumer = queue.get_gather_consumer() - cls.fetch_consumer = queue.get_fetch_consumer() - - # Minimal remote RDF file - cls.rdf_mock_url = 'http://some.dcat.file.rdf' - cls.rdf_content_type = 'application/rdf+xml' - cls.rdf_content = '''<?xml version="1.0" encoding="utf-8" ?> - <rdf:RDF - xmlns:dct="http://purl.org/dc/terms/" - xmlns:dcat="http://www.w3.org/ns/dcat#" - xmlns:xsd="http://www.w3.org/2001/XMLSchema#" - xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> - <dcat:Catalog rdf:about="https://data.some.org/catalog"> - <dcat:dataset> - <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/1"> - <dct:title>Example dataset 1</dct:title> - </dcat:Dataset> - </dcat:dataset> - <dcat:dataset> - <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/2"> - <dct:title>Example dataset 2</dct:title> - </dcat:Dataset> - </dcat:dataset> - </dcat:Catalog> - </rdf:RDF> - ''' - - # Minimal remote RDF file with pagination (1) - # Use slashes for paginated URLs because HTTPretty won't distinguish - # query strings - cls.rdf_mock_url_pagination_1 = 'http://some.dcat.file.pagination.rdf' - cls.rdf_content_pagination_1 = '''<?xml version="1.0" encoding="utf-8" ?> - <rdf:RDF - xmlns:dct="http://purl.org/dc/terms/" - xmlns:dcat="http://www.w3.org/ns/dcat#" - xmlns:xsd="http://www.w3.org/2001/XMLSchema#" - xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" - xmlns:hydra="http://www.w3.org/ns/hydra/core#"> - <dcat:Catalog rdf:about="https://data.some.org/catalog"> - <dcat:dataset> - <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/1"> - <dct:title>Example dataset 1</dct:title> - </dcat:Dataset> - </dcat:dataset> - <dcat:dataset> - <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/2"> - <dct:title>Example dataset 2</dct:title> - </dcat:Dataset> - </dcat:dataset> - </dcat:Catalog> - <hydra:PagedCollection rdf:about="http://some.dcat.file.pagination.rdf/page/1"> - <hydra:totalItems rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">4</hydra:totalItems> - <hydra:lastPage>http://some.dcat.file.pagination.rdf/page/2</hydra:lastPage> - <hydra:itemsPerPage rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2</hydra:itemsPerPage> - <hydra:nextPage>http://some.dcat.file.pagination.rdf/page/2</hydra:nextPage> - <hydra:firstPage>http://some.dcat.file.pagination.rdf/page/1</hydra:firstPage> - </hydra:PagedCollection> - </rdf:RDF> - ''' - - # Minimal remote RDF file with pagination (2) - cls.rdf_mock_url_pagination_2 = 'http://some.dcat.file.pagination.rdf/page/2' - cls.rdf_content_pagination_2 = '''<?xml version="1.0" encoding="utf-8" ?> - <rdf:RDF - xmlns:dct="http://purl.org/dc/terms/" - xmlns:dcat="http://www.w3.org/ns/dcat#" - xmlns:xsd="http://www.w3.org/2001/XMLSchema#" - xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" - xmlns:hydra="http://www.w3.org/ns/hydra/core#"> - <dcat:Catalog rdf:about="https://data.some.org/catalog"> - <dcat:dataset> - <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/3"> - <dct:title>Example dataset 3</dct:title> - </dcat:Dataset> - </dcat:dataset> - <dcat:dataset> - <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/4"> - <dct:title>Example dataset 4</dct:title> - </dcat:Dataset> - </dcat:dataset> - </dcat:Catalog> - <hydra:PagedCollection rdf:about="http://some.dcat.file.pagination.rdf/page/1"> - <hydra:totalItems rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">4</hydra:totalItems> - <hydra:lastPage>http://some.dcat.file.pagination.rdf/page/2</hydra:lastPage> - <hydra:itemsPerPage rdf:datatype="http://www.w3.org/2001/XMLSchema#integer">2</hydra:itemsPerPage> - <hydra:previousPage>http://some.dcat.file.pagination.rdf/page/1</hydra:previousPage> - <hydra:firstPage>http://some.dcat.file.pagination.rdf/page/1</hydra:firstPage> - </hydra:PagedCollection> - </rdf:RDF> - ''' - - # Minimal remote RDF file - cls.rdf_mock_url = 'http://some.dcat.file.rdf' - cls.rdf_content_type = 'application/rdf+xml' - cls.rdf_content = '''<?xml version="1.0" encoding="utf-8" ?> - <rdf:RDF - xmlns:dct="http://purl.org/dc/terms/" - xmlns:dcat="http://www.w3.org/ns/dcat#" - xmlns:xsd="http://www.w3.org/2001/XMLSchema#" - xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> - <dcat:Catalog rdf:about="https://data.some.org/catalog"> - <dcat:dataset> - <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/1"> - <dct:title>Example dataset 1</dct:title> - </dcat:Dataset> - </dcat:dataset> - <dcat:dataset> - <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/2"> - <dct:title>Example dataset 2</dct:title> - </dcat:Dataset> - </dcat:dataset> - </dcat:Catalog> - </rdf:RDF> - ''' - - cls.rdf_remote_file_small = '''<?xml version="1.0" encoding="utf-8" ?> - <rdf:RDF - xmlns:dct="http://purl.org/dc/terms/" - xmlns:dcat="http://www.w3.org/ns/dcat#" - xmlns:xsd="http://www.w3.org/2001/XMLSchema#" - xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> - <dcat:Catalog rdf:about="https://data.some.org/catalog"> - <dcat:dataset> - <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/1"> - <dct:title>Example dataset 1</dct:title> - </dcat:Dataset> - </dcat:dataset> - </dcat:Catalog> - </rdf:RDF> - ''' - - # RDF with minimal distribution - cls.rdf_content_with_distribution_uri = '''<?xml version="1.0" encoding="utf-8" ?> - <rdf:RDF - xmlns:dct="http://purl.org/dc/terms/" - xmlns:dcat="http://www.w3.org/ns/dcat#" - xmlns:xsd="http://www.w3.org/2001/XMLSchema#" - xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> - <dcat:Catalog rdf:about="https://data.some.org/catalog"> - <dcat:dataset> - <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/1"> - <dct:title>Example dataset 1</dct:title> - <dcat:distribution> - <dcat:Distribution rdf:about="https://data.some.org/catalog/datasets/1/resource/1"> - <dct:title>Example resource 1</dct:title> - <dcat:accessURL>http://data.some.org/download.zip</dcat:accessURL> - </dcat:Distribution> - </dcat:distribution> - </dcat:Dataset> - </dcat:dataset> - </dcat:Catalog> - </rdf:RDF> - ''' - cls.rdf_content_with_distribution = '''<?xml version="1.0" encoding="utf-8" ?> - <rdf:RDF - xmlns:dct="http://purl.org/dc/terms/" - xmlns:dcat="http://www.w3.org/ns/dcat#" - xmlns:xsd="http://www.w3.org/2001/XMLSchema#" - xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> - <dcat:Catalog rdf:about="https://data.some.org/catalog"> - <dcat:dataset> - <dcat:Dataset rdf:about="https://data.some.org/catalog/datasets/1"> - <dct:title>Example dataset 1</dct:title> - <dcat:distribution> - <dcat:Distribution> - <dct:title>Example resource 1</dct:title> - <dcat:accessURL>http://data.some.org/download.zip</dcat:accessURL> - </dcat:Distribution> - </dcat:distribution> - </dcat:Dataset> - </dcat:dataset> - </dcat:Catalog> - </rdf:RDF> - ''' - - def setup(self): - - harvest_model.setup() - - queue.purge_queues() - - def teardown(cls): - h.reset_db() - - def _create_harvest_source(self, mock_url, **kwargs): - - source_dict = { - 'title': 'Test RDF DCAT Source', - 'name': 'test-rdf-dcat-source', - 'url': mock_url, - 'source_type': 'dcat_rdf', - } - - source_dict.update(**kwargs) - - harvest_source = h.call_action('harvest_source_create', - {}, **source_dict) - - return harvest_source - - def _create_harvest_job(self, harvest_source_id): - - harvest_job = h.call_action('harvest_job_create', - {}, source_id=harvest_source_id) - - return harvest_job - - def _run_jobs(self, harvest_source_id=None): - try: - h.call_action('harvest_jobs_run', - {}, source_id=harvest_source_id) - except Exception, e: - if (str(e) == 'There are no new harvesting jobs'): - pass - - def _gather_queue(self, num_jobs=1): - - for job in xrange(num_jobs): - # Pop one item off the queue (the job id) and run the callback - reply = self.gather_consumer.basic_get( - queue='ckan.harvest.gather.test') - - # Make sure something was sent to the gather queue - assert reply[2], 'Empty gather queue' - - # Send the item to the gather callback, which will call the - # harvester gather_stage - queue.gather_callback(self.gather_consumer, *reply) - - def _fetch_queue(self, num_objects=1): - - for _object in xrange(num_objects): - # Pop item from the fetch queues (object ids) and run the callback, - # one for each object created - reply = self.fetch_consumer.basic_get( - queue='ckan.harvest.fetch.test') - - # Make sure something was sent to the fetch queue - assert reply[2], 'Empty fetch queue, the gather stage failed' - - # Send the item to the fetch callback, which will call the - # harvester fetch_stage and import_stage - queue.fetch_callback(self.fetch_consumer, *reply) - - def _run_full_job(self, harvest_source_id, num_jobs=1, num_objects=1): - - # Create new job for the source - self._create_harvest_job(harvest_source_id) - - # Run the job - self._run_jobs(harvest_source_id) - - # Handle the gather queue - self._gather_queue(num_jobs) - - # Handle the fetch queue - self._fetch_queue(num_objects) - - -class TestDCATHarvestFunctional(FunctionalHarvestTest): - - def test_harvest_create_rdf(self): - - self._test_harvest_create(self.rdf_mock_url, - self.rdf_content, - self.rdf_content_type) - - def _test_harvest_create(self, url, content, content_type, **kwargs): - - # Mock the GET request to get the file - httpretty.register_uri(httpretty.GET, url, - body=content, content_type=content_type) - - # The harvester will try to do a HEAD request first so we need to mock - # this as well - httpretty.register_uri(httpretty.HEAD, url, - status=405, content_type=content_type) - - harvest_source = self._create_harvest_source(url, **kwargs) - - self._run_full_job(harvest_source['id'], num_objects=2) - - # Check that two datasets were created - fq = "+type:dataset harvest_source_id:{0}".format(harvest_source['id']) - results = h.call_action('package_search', {}, fq=fq) - - eq_(results['count'], 2) - for result in results['results']: - assert result['title'] in ('Example dataset 1', - 'Example dataset 2') - - def test_harvest_create_rdf_pagination(self): - - # Mock the GET requests needed to get the file - httpretty.register_uri(httpretty.GET, self.rdf_mock_url_pagination_1, - body=self.rdf_content_pagination_1, - content_type=self.rdf_content_type) - - httpretty.register_uri(httpretty.GET, self.rdf_mock_url_pagination_2, - body=self.rdf_content_pagination_2, - content_type=self.rdf_content_type) - - # The harvester will try to do a HEAD request first so we need to mock - # them as well - httpretty.register_uri(httpretty.HEAD, self.rdf_mock_url_pagination_1, - status=405, - content_type=self.rdf_content_type) - - httpretty.register_uri(httpretty.HEAD, self.rdf_mock_url_pagination_2, - status=405, - content_type=self.rdf_content_type) - - harvest_source = self._create_harvest_source( - self.rdf_mock_url_pagination_1) - - self._run_full_job(harvest_source['id'], num_objects=4) - - # Check that four datasets were created - fq = "+type:dataset harvest_source_id:{0}".format(harvest_source['id']) - results = h.call_action('package_search', {}, fq=fq) - - eq_(results['count'], 4) - eq_(sorted([d['title'] for d in results['results']]), - ['Example dataset 1', 'Example dataset 2', - 'Example dataset 3', 'Example dataset 4']) - - def test_harvest_create_rdf_pagination_same_content(self): - - # Mock the GET requests needed to get the file. Two different URLs but - # same content to mock a misconfigured server - httpretty.register_uri(httpretty.GET, self.rdf_mock_url_pagination_1, - body=self.rdf_content_pagination_1, - content_type=self.rdf_content_type) - - httpretty.register_uri(httpretty.GET, self.rdf_mock_url_pagination_2, - body=self.rdf_content_pagination_1, - content_type=self.rdf_content_type) - - # The harvester will try to do a HEAD request first so we need to mock - # them as well - httpretty.register_uri(httpretty.HEAD, self.rdf_mock_url_pagination_1, - status=405, - content_type=self.rdf_content_type) - - httpretty.register_uri(httpretty.HEAD, self.rdf_mock_url_pagination_2, - status=405, - content_type=self.rdf_content_type) - - harvest_source = self._create_harvest_source( - self.rdf_mock_url_pagination_1) - - self._run_full_job(harvest_source['id'], num_objects=2) - - # Check that two datasets were created - fq = "+type:dataset harvest_source_id:{0}".format(harvest_source['id']) - results = h.call_action('package_search', {}, fq=fq) - - eq_(results['count'], 2) - eq_(sorted([d['title'] for d in results['results']]), - ['Example dataset 1', 'Example dataset 2']) - - def test_harvest_update_unicode_keywords(self): - - self._test_harvest_create(self.ttl_mock_url, - self.ttl_unicode_in_keywords, - self.ttl_content_type) - - def test_harvest_update_commas_keywords(self): - - self._test_harvest_update(self.ttl_mock_url, - self.ttl_commas_in_keywords, - self.ttl_content_type) - - def _test_harvest_update(self, url, content, content_type): - # Mock the GET request to get the file - httpretty.register_uri(httpretty.GET, url, - body=content, content_type=content_type) - - # The harvester will try to do a HEAD request first so we need to mock - # this as well - httpretty.register_uri(httpretty.HEAD, url, - status=405, content_type=content_type) - - harvest_source = self._create_harvest_source(url) - - # First run, will create two datasets as previously tested - self._run_full_job(harvest_source['id'], num_objects=2) - - # Run the jobs to mark the previous one as Finished - self._run_jobs() - - # Mock an update in the remote file - new_file = content.replace('Example dataset 1', - 'Example dataset 1 (updated)') - httpretty.register_uri(httpretty.GET, url, - body=new_file, content_type=content_type) - - # Run a second job - self._run_full_job(harvest_source['id'], num_objects=2) - - # Check that we still have two datasets - fq = "+type:dataset harvest_source_id:{0}".format(harvest_source['id']) - results = h.call_action('package_search', {}, fq=fq) - - eq_(results['count'], 2) - - # Check that the dataset was updated - for result in results['results']: - assert result['title'] in ('Example dataset 1 (updated)', - 'Example dataset 2') - - def test_harvest_update_existing_resources(self): - - existing, new = self._test_harvest_update_resources(self.rdf_mock_url, - self.rdf_content_with_distribution_uri, - self.rdf_content_type) - eq_(new['uri'], 'https://data.some.org/catalog/datasets/1/resource/1') - eq_(new['uri'], existing['uri']) - eq_(new['id'], existing['id']) - - def test_harvest_update_new_resources(self): - - existing, new = self._test_harvest_update_resources(self.rdf_mock_url, - self.rdf_content_with_distribution, - self.rdf_content_type) - eq_(existing['uri'], '') - eq_(new['uri'], '') - nose.tools.assert_is_not(new['id'], existing['id']) - - def _test_harvest_update_resources(self, url, content, content_type): - # Mock the GET request to get the file - httpretty.register_uri(httpretty.GET, url, - body=content, content_type=content_type) - - # The harvester will try to do a HEAD request first so we need to mock - # this as well - httpretty.register_uri(httpretty.HEAD, url, - status=405, content_type=content_type) - - harvest_source = self._create_harvest_source(url) - - # First run, create the dataset with the resource - self._run_full_job(harvest_source['id'], num_objects=1) - - # Run the jobs to mark the previous one as Finished - self._run_jobs() - - # get the created dataset - fq = "+type:dataset harvest_source_id:{0}".format(harvest_source['id']) - results = h.call_action('package_search', {}, fq=fq) - eq_(results['count'], 1) - - existing_dataset = results['results'][0] - existing_resource = existing_dataset.get('resources')[0] - - # Mock an update in the remote file - new_file = content.replace('Example resource 1', - 'Example resource 1 (updated)') - httpretty.register_uri(httpretty.GET, url, - body=new_file, content_type=content_type) - - # Run a second job - self._run_full_job(harvest_source['id']) - - # get the updated dataset - new_results = h.call_action('package_search', {}, fq=fq) - eq_(new_results['count'], 1) - - new_dataset = new_results['results'][0] - new_resource = new_dataset.get('resources')[0] - - eq_(existing_resource['name'], 'Example resource 1') - eq_(len(new_dataset.get('resources')), 1) - eq_(new_resource['name'], 'Example resource 1 (updated)') - return (existing_resource, new_resource) - - def test_harvest_bad_format_rdf(self): - - self._test_harvest_bad_format(self.rdf_mock_url, - self.rdf_remote_file_invalid, - self.rdf_content_type) - - def _test_harvest_bad_format(self, url, bad_content, content_type): - - # Mock the GET request to get the file - httpretty.register_uri(httpretty.GET, url, - body=bad_content, content_type=content_type) - - # The harvester will try to do a HEAD request first so we need to mock - # this as well - httpretty.register_uri(httpretty.HEAD, url, - status=405, content_type=content_type) - - harvest_source = self._create_harvest_source(url) - self._create_harvest_job(harvest_source['id']) - self._run_jobs(harvest_source['id']) - self._gather_queue(1) - - # Run the jobs to mark the previous one as Finished - self._run_jobs() - - # Get the harvest source with the udpated status - harvest_source = h.call_action('harvest_source_show', - id=harvest_source['id']) - - last_job_status = harvest_source['status']['last_job'] - - eq_(last_job_status['status'], 'Finished') - assert ('Error parsing the RDF file' - in last_job_status['gather_error_summary'][0][0]) - diff --git a/ckanext/odsh/tests/test_upload.py b/ckanext/odsh/tests/test_upload.py new file mode 100644 index 00000000..16e765ab --- /dev/null +++ b/ckanext/odsh/tests/test_upload.py @@ -0,0 +1,94 @@ + +# encoding: utf-8 + +import ckan.tests.factories as factories +import ckan.tests.helpers as helpers +from ckan import model +from test_helpers import odsh_test +from routes import url_for +from nose.tools import assert_true, assert_false, assert_equal, assert_in +from ckanext.odsh.helpers import odsh_create_checksum +webtest_submit = helpers.webtest_submit + + +class TestUpload(helpers.FunctionalTestBase): + + _load_plugins = ['odsh', 'spatial_metadata', 'spatial_query'] + + def teardown(self): + model.repo.rebuild_db() + + + @odsh_test() + def test_upload_empty_form_fails(self): + # arrange + form = self._get_package_new_form() + + # act + response = self._submit_form(form) + + # assert + response.mustcontain('Title: Missing value') + response.mustcontain('Description: Missing value') + response.mustcontain('odsh_spatial_uri_error_label') + response.mustcontain('odsh_temporal_error_label') + + @odsh_test() + def test_upload_empty_wrong_spatial_uri(self): + # arrange + form = self._get_package_new_form() + + # act + form[self._get_field_name('spatial_uri')]='wrong' + response = self._submit_form(form) + + # assert + response.mustcontain('odsh_spatial_uri_unknown_error_label') + + @odsh_test() + def test_upload_empty_wrong_date_temporal_start(self): + # arrange + form = self._get_package_new_form() + + # act + form[self._get_field_name('temporal_start')]='2001-12-35' + response1 = self._submit_form(form) + form[self._get_field_name('temporal_end')]='2001-12-35' + response2 = self._submit_form(form) + form[self._get_field_name('temporal_start')]='2001-12-01' + response3 = self._submit_form(form) + form[self._get_field_name('temporal_end')]='2001-12-01' + form[self._get_field_name('temporal_start')]='2001-12-35' + response4 = self._submit_form(form) + form[self._get_field_name('temporal_start')]='11-11-11' + response5 = self._submit_form(form) + form[self._get_field_name('temporal_start')]='11-11-2011' + response6 = self._submit_form(form) + form[self._get_field_name('temporal_start')]='datum' + response7 = self._submit_form(form) + + # assert + response1.mustcontain('odsh_temporal_error_label') + response2.mustcontain('odsh_temporal_error_label') + response3.mustcontain('odsh_temporal_end_not_date_error_label') + response4.mustcontain('odsh_temporal_start_not_date_error_label') + response5.mustcontain('odsh_temporal_start_not_date_error_label') + response6.mustcontain('odsh_temporal_start_not_date_error_label') + + + def _get_field_name(self, field): + checksum = odsh_create_checksum(field) + return 'extras__' + str(checksum) + '__value' + + def _submit_form(self, form): + return webtest_submit(form, 'save', status=200, extra_environ=self.env) + + def _get_package_new_form(self): + app = self._get_test_app() + user = factories.User() + self.env = {'REMOTE_USER': user['name'].encode('ascii')} + response = app.get( + url=url_for(controller='package', action='new'), + extra_environ=self.env, + ) + return response.forms['dataset-edit'] -- GitLab