From 662d73320bf1e3f73b8931a365de3b993e910987 Mon Sep 17 00:00:00 2001 From: Daniel Alley Date: Wed, 6 Nov 2024 20:16:29 -0500 Subject: [PATCH] Allow incremental parsing without filelists or other xml Supplying a NULL value as a filelists or other "path" argument ought to work. closes #423 --- src/python/createrepo_c/__init__.py | 4 +- src/python/xml_parser-py.c | 6 +- src/xml_parser_main_metadata_together.c | 76 ++++++++++--------- tests/python/tests/test_xml_parser.py | 48 ++++++++++++ .../test_xml_parser_main_metadata_together.c | 50 ++++++++++++ 5 files changed, 145 insertions(+), 39 deletions(-) diff --git a/src/python/createrepo_c/__init__.py b/src/python/createrepo_c/__init__.py index db6e8633..958088b7 100644 --- a/src/python/createrepo_c/__init__.py +++ b/src/python/createrepo_c/__init__.py @@ -416,8 +416,8 @@ def _warningcb(warning_type, message): metadata_files = {record.type: record for record in repomd.records} parser = RepositoryReader() parser._primary_xml_path = os.path.join(path, metadata_files["primary"].location_href) - parser._filelists_xml_path = os.path.join(path, metadata_files["filelists"].location_href) - parser._other_xml_path = os.path.join(path, metadata_files["other"].location_href) + parser._filelists_xml_path = os.path.join(path, metadata_files["filelists"].location_href) if metadata_files.get("filelists") else None + parser._other_xml_path = os.path.join(path, metadata_files["other"].location_href) if metadata_files.get("other") else None if metadata_files.get("updateinfo"): parser._updateinfo_path = os.path.join(path, metadata_files["updateinfo"].location_href) diff --git a/src/python/xml_parser-py.c b/src/python/xml_parser-py.c index b575feb7..94ea6de3 100644 --- a/src/python/xml_parser-py.c +++ b/src/python/xml_parser-py.c @@ -780,14 +780,14 @@ pkg_iterator_init(_PkgIteratorObject *self, PyObject *args, PyObject *kwargs) static char *kwlist[] = {"primary", "filelists", "other", "newpkgcb", "warningcb", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "sssOO:pkg_iterator_init", kwlist, + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "szzOO:pkg_iterator_init", kwlist, &primary_path, &filelists_path, &other_path, &py_newpkgcb, &py_warningcb)) { return -1; } - if (!primary_path || !filelists_path || !other_path) { - PyErr_SetString(PyExc_TypeError, "file paths must be provided"); + if (!primary_path) { + PyErr_SetString(PyExc_TypeError, "primary file path must be provided"); return -1; } diff --git a/src/xml_parser_main_metadata_together.c b/src/xml_parser_main_metadata_together.c index d84bf07a..a2b00357 100644 --- a/src/xml_parser_main_metadata_together.c +++ b/src/xml_parser_main_metadata_together.c @@ -31,15 +31,16 @@ #define ERR_DOMAIN CREATEREPO_C_ERROR typedef struct { - GSList *in_progress_pkgs_list; - int in_progress_count_primary; - int in_progress_count_filelists; - int in_progress_count_other; - GQueue *finished_pkgs_queue; - cr_XmlParserNewPkgCb newpkgcb; // newpkgcb passed in from user - void *newpkgcb_data;// newpkgcb data passed in from user - cr_XmlParserPkgCb pkgcb; // pkgcb passed in from user - void *pkgcb_data; // pkgcb data passed in from user + GSList *in_progress_pkgs_list; + int in_progress_count_primary; + int in_progress_count_filelists; + int in_progress_count_other; + GQueue *finished_pkgs_queue; + cr_XmlParserNewPkgCb newpkgcb; // newpkgcb passed in from user + void *newpkgcb_data;// newpkgcb data passed in from user + cr_XmlParserPkgCb pkgcb; // pkgcb passed in from user + void *pkgcb_data; // pkgcb data passed in from user + cr_PackageLoadingFlags loadingflags; // which callbacks need to be called before the package is considered "finished" loading } cr_CbData; struct _cr_PkgIterator { @@ -67,17 +68,19 @@ struct _cr_PkgIterator { static int queue_package_if_finished(cr_Package *pkg, cr_CbData *cb_data) { - if (pkg && (pkg->loadingflags & CR_PACKAGE_LOADED_PRI) && (pkg->loadingflags & CR_PACKAGE_LOADED_OTH) && - (pkg->loadingflags & CR_PACKAGE_LOADED_FIL)) + if (pkg && ((pkg->loadingflags & cb_data->loadingflags) == cb_data->loadingflags)) { //remove first element in the list cb_data->in_progress_pkgs_list = g_slist_delete_link(cb_data->in_progress_pkgs_list, cb_data->in_progress_pkgs_list); // One package was fully finished cb_data->in_progress_count_primary--; - cb_data->in_progress_count_filelists--; - cb_data->in_progress_count_other--; - + if (cb_data->loadingflags & CR_PACKAGE_LOADED_FIL) { + cb_data->in_progress_count_filelists--; + } + if (cb_data->loadingflags & CR_PACKAGE_LOADED_OTH) { + cb_data->in_progress_count_other--; + } g_queue_push_tail(cb_data->finished_pkgs_queue, pkg); } return CR_CB_RET_OK; @@ -319,8 +322,6 @@ parse_next_section(CR_FILE *target_file, const char *path, cr_ParserData *pd, GE //TODO(amatej): there is quite some overlap with this and cr_load_xml_files, // consider using this api to implement cr_load_xml_files? -// TODO: maybe whether or not individual files are parsed could be controlled by NULL paths? I think cr_load_xml_files -// already works that way. cr_PkgIterator * cr_PkgIterator_new(const char *primary_path, const char *filelists_path, @@ -332,8 +333,6 @@ cr_PkgIterator_new(const char *primary_path, GError **err) { assert(primary_path); - assert(filelists_path); - assert(other_path); assert(!err || *err == NULL); cr_PkgIterator* new_iter = g_new0(cr_PkgIterator, 1); @@ -370,30 +369,41 @@ cr_PkgIterator_new(const char *primary_path, cbdata->newpkgcb_data = newpkgcb_data; new_iter->tmp_err = NULL; + int parse_files_in_primary = 0; GError* tmp_err = new_iter->tmp_err; new_iter->primary_f = cr_open(primary_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err); + cbdata->loadingflags |= CR_PACKAGE_LOADED_PRI; if (tmp_err) { g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", primary_path); cr_PkgIterator_free(new_iter, err); return NULL; } - new_iter->filelists_f = cr_open(filelists_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err); - if (tmp_err) { - g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", filelists_path); - cr_PkgIterator_free(new_iter, err); - return NULL; + if (new_iter->filelists_path) { + new_iter->filelists_f = cr_open(filelists_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err); + cbdata->loadingflags |= CR_PACKAGE_LOADED_FIL; + if (tmp_err) { + g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", filelists_path); + cr_PkgIterator_free(new_iter, err); + return NULL; + } + } else { + new_iter->filelists_is_done = 1; + parse_files_in_primary = 1; } - new_iter->other_f = cr_open(other_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err); - if (tmp_err) { - g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", other_path); - cr_PkgIterator_free(new_iter, err); - return NULL; + if (new_iter->other_path) { + new_iter->other_f = cr_open(other_path, CR_CW_MODE_READ, CR_CW_AUTO_DETECT_COMPRESSION, &tmp_err); + cbdata->loadingflags |= CR_PACKAGE_LOADED_OTH; + if (tmp_err) { + g_propagate_prefixed_error(err, tmp_err, "Cannot open %s: ", other_path); + cr_PkgIterator_free(new_iter, err); + return NULL; + } + } else { + new_iter->other_is_done = 1; } - //TODO(amatej): In the future we could make filelists/other optional if there is a need for it. That would mean we - // should replace the last 0 in primary_parser_data_new depending on whether we have filelists or not. - new_iter->primary_pd = primary_parser_data_new(newpkgcb_primary, cbdata, pkgcb_primary, cbdata, warningcb, warningcb_data, 0); + new_iter->primary_pd = primary_parser_data_new(newpkgcb_primary, cbdata, pkgcb_primary, cbdata, warningcb, warningcb_data, parse_files_in_primary); new_iter->filelists_pd = filelists_parser_data_new(newpkgcb_filelists, cbdata, pkgcb_filelists, cbdata, warningcb, warningcb_data); new_iter->other_pd = other_parser_data_new(newpkgcb_other, cbdata, pkgcb_other, cbdata, warningcb, warningcb_data); return new_iter; @@ -404,9 +414,7 @@ cr_PkgIterator_parse_next(cr_PkgIterator *iter, GError **err) { cr_CbData *cbdata = (cr_CbData*) iter->cbdata; while (!cr_PkgIterator_is_finished(iter) && g_queue_is_empty(cbdata->finished_pkgs_queue)) { - while ((cbdata->in_progress_count_primary <= cbdata->in_progress_count_filelists || - cbdata->in_progress_count_primary <= cbdata->in_progress_count_other) && - !iter->primary_is_done) + while (((cbdata->in_progress_count_primary <= cbdata->in_progress_count_filelists) && (cbdata->loadingflags & CR_PACKAGE_LOADED_FIL) || (cbdata->in_progress_count_primary <= cbdata->in_progress_count_other) && (cbdata->loadingflags & CR_PACKAGE_LOADED_OTH) || cbdata->loadingflags == CR_PACKAGE_LOADED_PRI) && !iter->primary_is_done) { iter->primary_is_done = parse_next_section(iter->primary_f, iter->primary_path, iter->primary_pd, err); if (*err) { diff --git a/tests/python/tests/test_xml_parser.py b/tests/python/tests/test_xml_parser.py index 043ede22..49005edd 100644 --- a/tests/python/tests/test_xml_parser.py +++ b/tests/python/tests/test_xml_parser.py @@ -1245,6 +1245,54 @@ def warningcb(warn_type, msg): self.assertRaises(StopIteration, next, package_iterator) self.assertTrue(package_iterator.is_finished()) + def test_xml_parser_pkg_iterator_repo02_no_filelists(self): + warnings = [] + def warningcb(warn_type, msg): + warnings.append((warn_type, msg)) + + package_iterator = cr.PackageIterator( + primary_path=REPO_02_PRIXML, filelists_path=None, other_path=REPO_02_OTHXML, + warningcb=warningcb, + ) + packages = list(package_iterator) + + self.assertEqual(len(packages), 2) + self.assertEqual(packages[0].name, "fake_bash") + self.assertEqual(packages[0].changelogs, [ + ('Tomas Mlcoch - 1.1.1-1', 1334664000, '- First release'), + ]) + self.assertEqual(packages[0].files, [(None, '/usr/bin/', 'fake_bash')]) # the files still get parsed - only from primary.xml + self.assertListEqual(warnings, []) + self.assertRaises(StopIteration, next, package_iterator) + self.assertTrue(package_iterator.is_finished()) + + def test_xml_parser_pkg_iterator_repo02_no_primary(self): + def test(): + cr.PackageIterator( + primary_path=None, filelists_path=REPO_02_FILXML, other_path=REPO_02_OTHXML, + ) + + self.assertRaises(TypeError, test) + + def test_xml_parser_pkg_iterator_repo02_no_other(self): + warnings = [] + def warningcb(warn_type, msg): + warnings.append((warn_type, msg)) + + package_iterator = cr.PackageIterator( + primary_path=REPO_02_PRIXML, filelists_path=REPO_02_FILXML, other_path=None, + warningcb=warningcb, + ) + packages = list(package_iterator) + + self.assertEqual(len(packages), 2) + self.assertEqual(packages[0].name, "fake_bash") + self.assertEqual(packages[0].changelogs, []) + self.assertEqual(packages[0].files, [(None, '/usr/bin/', 'fake_bash')]) + self.assertListEqual(warnings, []) + self.assertRaises(StopIteration, next, package_iterator) + self.assertTrue(package_iterator.is_finished()) + def test_xml_parser_pkg_iterator_repo02_newpkgcb_as_filter(self): def newpkgcb(pkgId, name, arch): if name in {"fake_bash"}: diff --git a/tests/test_xml_parser_main_metadata_together.c b/tests/test_xml_parser_main_metadata_together.c index 288e4877..2fc07c2b 100644 --- a/tests/test_xml_parser_main_metadata_together.c +++ b/tests/test_xml_parser_main_metadata_together.c @@ -120,6 +120,50 @@ test_cr_xml_package_iterator_00(void) g_assert_cmpint(parsed, ==, 2); } +static void +test_cr_xml_package_iterator_00_no_filelists(void) +{ + int parsed = 0; + GError *tmp_err = NULL; + cr_Package *package = NULL; + + cr_PkgIterator *pkg_iterator = cr_PkgIterator_new( + TEST_REPO_02_PRIMARY, NULL, TEST_REPO_02_OTHER, NULL, NULL, NULL, NULL, &tmp_err); + + while ((package = cr_PkgIterator_parse_next(pkg_iterator, &tmp_err))) { + parsed++; + cr_package_free(package); + } + + g_assert(cr_PkgIterator_is_finished(pkg_iterator)); + cr_PkgIterator_free(pkg_iterator, &tmp_err); + + g_assert(tmp_err == NULL); + g_assert_cmpint(parsed, ==, 2); +} + +static void +test_cr_xml_package_iterator_00_no_other(void) +{ + int parsed = 0; + GError *tmp_err = NULL; + cr_Package *package = NULL; + + cr_PkgIterator *pkg_iterator = cr_PkgIterator_new( + TEST_REPO_02_PRIMARY, TEST_REPO_02_FILELISTS, NULL, NULL, NULL, NULL, NULL, &tmp_err); + + while ((package = cr_PkgIterator_parse_next(pkg_iterator, &tmp_err))) { + parsed++; + cr_package_free(package); + } + + g_assert(cr_PkgIterator_is_finished(pkg_iterator)); + cr_PkgIterator_free(pkg_iterator, &tmp_err); + + g_assert(tmp_err == NULL); + g_assert_cmpint(parsed, ==, 2); +} + static void test_cr_xml_package_iterator_filelists_ext_00(void) { @@ -320,6 +364,12 @@ main(int argc, char *argv[]) g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_00", test_cr_xml_package_iterator_00); + g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_00_no_filelists", + test_cr_xml_package_iterator_00_no_filelists); + + g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_00_no_other", + test_cr_xml_package_iterator_00_no_other); + g_test_add_func("/xml_parser_main_metadata/test_cr_xml_package_iterator_filelists_ext_00", test_cr_xml_package_iterator_filelists_ext_00);