Skip to content

Commit

Permalink
Use extract_tar to extract Docker images and layers (#453)
Browse files Browse the repository at this point in the history
* Use extract_tar to extract images and layers #407

    * extract_tar uses the built in tar command, which does not do filename sanitization

Signed-off-by: Jono Yang <[email protected]>

* Update expected test results #407

Signed-off-by: Jono Yang <[email protected]>

* Recreate docker-images.tar.gz with valid tars #407

    * Update expected test results

Signed-off-by: Jono Yang <[email protected]>
  • Loading branch information
JonoYang authored Jun 17, 2022
1 parent 5bf23bd commit 2d342fa
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 43 deletions.
8 changes: 4 additions & 4 deletions scanpipe/pipes/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@
from pathlib import Path

from container_inspector.image import Image
from container_inspector.utils import extract_tar

from scanpipe import pipes
from scanpipe.pipes import rootfs
from scanpipe.pipes.scancode import extract_archive

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -62,7 +62,7 @@ def extract_image_from_tarball(input_tarball, extract_target, verify=True):
Returns the `images` and an `errors` list of error messages that may have
happen during the extraction.
"""
errors = list(extract_archive(location=input_tarball, target=extract_target))
errors = extract_tar(location=input_tarball, target_dir=extract_target)
images = Image.get_images_from_dir(
extracted_location=str(extract_target),
verify=verify,
Expand Down Expand Up @@ -101,9 +101,9 @@ def extract_layers_from_images_to_base_path(base_path, images):

for layer in image.layers:
extract_target = target_path / layer.layer_id
extract_errors = extract_archive(
extract_errors = extract_tar(
location=layer.archive_location,
target=extract_target,
target_dir=extract_target,
)
errors.extend(extract_errors)
layer.extracted_location = str(extract_target)
Expand Down
82 changes: 66 additions & 16 deletions scanpipe/tests/data/debian_scan_codebase.json
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,41 @@
"manifest_path": "",
"contains_source_code": null,
"extra_data": {
"multi_arch": "same"
"multi_arch": "same",
"missing_file_references": [
{
"md5": "23c8a935fa4fc7290d55cc5df3ef56b1",
"path": "lib/x86_64-linux-gnu/libncurses.so.5.9",
"sha1": null,
"sha256": null,
"sha512": null,
"extra_data": {}
},
{
"md5": "98b70f283324e89db5787a018a54adf4",
"path": "usr/lib/x86_64-linux-gnu/libform.so.5.9",
"sha1": null,
"sha256": null,
"sha512": null,
"extra_data": {}
},
{
"md5": "e3a0f5154928da2da234920343ac14b2",
"path": "usr/lib/x86_64-linux-gnu/libmenu.so.5.9",
"sha1": null,
"sha256": null,
"sha512": null,
"extra_data": {}
},
{
"md5": "a927e7d76753bb85f5a784b653d337d2",
"path": "usr/lib/x86_64-linux-gnu/libpanel.so.5.9",
"sha1": null,
"sha256": null,
"sha512": null,
"extra_data": {}
}
]
},
"missing_resources": [],
"modified_resources": [],
Expand Down Expand Up @@ -161,7 +195,25 @@
"manifest_path": "",
"contains_source_code": null,
"extra_data": {
"multi_arch": "same"
"multi_arch": "same",
"missing_file_references": [
{
"md5": "5d26434efecc08048ab72357af804ef7",
"path": "usr/lib/x86_64-linux-gnu/libndp.so.0.0.2",
"sha1": null,
"sha256": null,
"sha512": null,
"extra_data": {}
},
{
"md5": "60d977e0c9a9fb07c1f8ae3090ea6f48",
"path": "usr/share/doc/libndp0/changelog.Debian.gz",
"sha1": null,
"sha256": null,
"sha512": null,
"extra_data": {}
}
]
},
"missing_resources": [],
"modified_resources": [],
Expand Down Expand Up @@ -195,7 +247,6 @@
"extension": "",
"programming_language": "",
"mime_type": "text/plain",
"file_type": "ASCII text",
"is_binary": false,
"is_text": true,
"is_archive": false,
Expand Down Expand Up @@ -223,7 +274,6 @@
"extension": "",
"programming_language": "",
"mime_type": "text/plain",
"file_type": "UTF-8 Unicode text",
"is_binary": false,
"is_text": true,
"is_archive": false,
Expand Down Expand Up @@ -251,16 +301,17 @@
"extension": "",
"programming_language": "Haxe",
"mime_type": "text/plain",
"file_type": "ASCII text",
"is_binary": false,
"is_text": true,
"is_archive": false,
"is_key_file": false,
"is_media": false
},
{
"for_packages": [],
"path": "debian.tar.gz-extract/8a63761caf6d45e65b8e6cdc2e0c03c55625fd142ec3356b80a9ea4a34b11b66/var/lib/dpkg/info/libncurses5_amd64.md5sums",
"for_packages": [
"pkg:deb/[email protected]?architecture=amd64&uuid=fixed-uid-done-for-testing-5642512d1758"
],
"path": "debian.tar.gz-extract/8a63761caf6d45e65b8e6cdc2e0c03c55625fd142ec3356b80a9ea4a34b11b66/var/lib/dpkg/info/libncurses5:amd64.md5sums",
"sha1": "e5ff875218d4f909576575b0471feb0e5230a861",
"md5": "9d18792b91935a5849328cb368005ec9",
"extra_data": {},
Expand All @@ -271,22 +322,23 @@
"license_expressions": [],
"emails": [],
"urls": [],
"status": "no-licenses",
"status": "system-package",
"type": "file",
"name": "libncurses5_amd64",
"name": "libncurses5:amd64",
"extension": ".md5sums",
"programming_language": "",
"mime_type": "text/plain",
"file_type": "ASCII text",
"is_binary": false,
"is_text": true,
"is_archive": false,
"is_key_file": false,
"is_media": false
},
{
"for_packages": [],
"path": "debian.tar.gz-extract/8a63761caf6d45e65b8e6cdc2e0c03c55625fd142ec3356b80a9ea4a34b11b66/var/lib/dpkg/info/libndp0_amd64.md5sums",
"for_packages": [
"pkg:deb/[email protected]?architecture=amd64&uuid=fixed-uid-done-for-testing-5642512d1758"
],
"path": "debian.tar.gz-extract/8a63761caf6d45e65b8e6cdc2e0c03c55625fd142ec3356b80a9ea4a34b11b66/var/lib/dpkg/info/libndp0:amd64.md5sums",
"sha1": "c212d44c6649df5ff13ec447f4fa30faf81fc490",
"md5": "7cb818062922c437df1902c18862455a",
"extra_data": {},
Expand All @@ -297,13 +349,12 @@
"license_expressions": [],
"emails": [],
"urls": [],
"status": "no-licenses",
"status": "system-package",
"type": "file",
"name": "libndp0_amd64",
"name": "libndp0:amd64",
"extension": ".md5sums",
"programming_language": "",
"mime_type": "text/plain",
"file_type": "ASCII text",
"is_binary": false,
"is_text": true,
"is_archive": false,
Expand Down Expand Up @@ -356,7 +407,6 @@
"extension": "",
"programming_language": "Haxe",
"mime_type": "text/plain",
"file_type": "ASCII text",
"is_binary": false,
"is_text": true,
"is_archive": false,
Expand Down
Binary file modified scanpipe/tests/data/docker-images.tar.gz
Binary file not shown.
20 changes: 10 additions & 10 deletions scanpipe/tests/data/docker-images.tar.gz-expected-data-1.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"variant": null,
"labels": [],
"layer_id": "7cbcbac42c44c6c38559e5df3a494f44987333c8023a40fec48df2fce1fc146b",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2016-12-27T18:17:13.762716133Z",
Expand All @@ -59,7 +59,7 @@
"variant": null,
"labels": [],
"layer_id": "d242f1731c55e0f057e183146de867e820dd2ef575125ec36b008340a3acc65e",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": "Ross Fairbanks \"[email protected]\"",
"created": "2017-01-03T13:15:58.410035553Z",
Expand All @@ -77,7 +77,7 @@
"variant": null,
"labels": [],
"layer_id": "d43ffef6b2712ef8ecdd86866e543b21ef8843742bf7c73a308a973534fa6c3f",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": "Ross Fairbanks \"[email protected]\"",
"created": "2017-01-17T11:17:46.675078318Z",
Expand All @@ -95,7 +95,7 @@
"variant": null,
"labels": [],
"layer_id": "76ad2c2330f19d6f16fdf86e7b10cc2c1a8160746ffa1c4e3e46c75661f4bdcd",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": "Ross Fairbanks \"[email protected]\"",
"created": "2017-01-17T11:17:48.829523581Z",
Expand Down Expand Up @@ -216,7 +216,7 @@
"variant": null,
"labels": [],
"layer_id": "3e207b409db364b595ba862cdc12be96dcdad8e36c59a03b7b3b61c946a5741a",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-04-24T01:05:03.608058404Z",
Expand All @@ -234,7 +234,7 @@
"variant": null,
"labels": [],
"layer_id": "09c52b6fbc483eb8e2d244a916da54fb3990cdaa575cab35edfbb27e132929cb",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-08T16:23:14.227103847Z",
Expand All @@ -252,7 +252,7 @@
"variant": null,
"labels": [],
"layer_id": "55141db9edb2a13ee593cff8c80e883e672e388c8686fd94a4f2518f21de1d32",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-08T16:23:16.985023204Z",
Expand All @@ -270,7 +270,7 @@
"variant": null,
"labels": [],
"layer_id": "01f37c950ed43fd0ecc47d0a72949201594f650bd63861cc6e6ac8097ca600bf",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-08T16:23:17.192305843Z",
Expand All @@ -288,7 +288,7 @@
"variant": null,
"labels": [],
"layer_id": "08dc907515cbda226cd872c2c79d087eb226fd27182b6b1315306aade51f963d",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-11T21:20:59.851868447Z",
Expand All @@ -306,7 +306,7 @@
"variant": null,
"labels": [],
"layer_id": "5b4096031e4780d4c3010335ede79886786ec89d22c2bd85642a30beac682ec9",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-11T21:21:00.668316194Z",
Expand Down
20 changes: 10 additions & 10 deletions scanpipe/tests/data/docker-images.tar.gz-expected-data-2.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"variant": null,
"labels": [],
"layer_id": "7cbcbac42c44c6c38559e5df3a494f44987333c8023a40fec48df2fce1fc146b",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2016-12-27T18:17:13.762716133Z",
Expand All @@ -59,7 +59,7 @@
"variant": null,
"labels": [],
"layer_id": "d242f1731c55e0f057e183146de867e820dd2ef575125ec36b008340a3acc65e",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": "Ross Fairbanks \"[email protected]\"",
"created": "2017-01-03T13:15:58.410035553Z",
Expand All @@ -77,7 +77,7 @@
"variant": null,
"labels": [],
"layer_id": "d43ffef6b2712ef8ecdd86866e543b21ef8843742bf7c73a308a973534fa6c3f",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": "Ross Fairbanks \"[email protected]\"",
"created": "2017-01-17T11:17:46.675078318Z",
Expand All @@ -95,7 +95,7 @@
"variant": null,
"labels": [],
"layer_id": "76ad2c2330f19d6f16fdf86e7b10cc2c1a8160746ffa1c4e3e46c75661f4bdcd",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": "Ross Fairbanks \"[email protected]\"",
"created": "2017-01-17T11:17:48.829523581Z",
Expand Down Expand Up @@ -216,7 +216,7 @@
"variant": null,
"labels": [],
"layer_id": "3e207b409db364b595ba862cdc12be96dcdad8e36c59a03b7b3b61c946a5741a",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-04-24T01:05:03.608058404Z",
Expand All @@ -234,7 +234,7 @@
"variant": null,
"labels": [],
"layer_id": "09c52b6fbc483eb8e2d244a916da54fb3990cdaa575cab35edfbb27e132929cb",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-08T16:23:14.227103847Z",
Expand All @@ -252,7 +252,7 @@
"variant": null,
"labels": [],
"layer_id": "55141db9edb2a13ee593cff8c80e883e672e388c8686fd94a4f2518f21de1d32",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-08T16:23:16.985023204Z",
Expand All @@ -270,7 +270,7 @@
"variant": null,
"labels": [],
"layer_id": "01f37c950ed43fd0ecc47d0a72949201594f650bd63861cc6e6ac8097ca600bf",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-08T16:23:17.192305843Z",
Expand All @@ -288,7 +288,7 @@
"variant": null,
"labels": [],
"layer_id": "08dc907515cbda226cd872c2c79d087eb226fd27182b6b1315306aade51f963d",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-11T21:20:59.851868447Z",
Expand All @@ -306,7 +306,7 @@
"variant": null,
"labels": [],
"layer_id": "5b4096031e4780d4c3010335ede79886786ec89d22c2bd85642a30beac682ec9",
"size": 30,
"size": 10240,
"is_empty_layer": false,
"author": null,
"created": "2020-10-11T21:21:00.668316194Z",
Expand Down
4 changes: 2 additions & 2 deletions scanpipe/tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def test_scanpipe_docker_pipeline_alpine_integration_test(self):
exitcode, out = pipeline.execute()
self.assertEqual(0, exitcode, msg=out)

self.assertEqual(83, project1.codebaseresources.count())
self.assertEqual(109, project1.codebaseresources.count())
self.assertEqual(14, project1.discoveredpackages.count())

result_file = output.to_json(project1)
Expand Down Expand Up @@ -431,7 +431,7 @@ def test_scanpipe_docker_pipeline_debian_integration_test(self):

result_file = output.to_json(project1)
expected_file = self.data_location / "debian_scan_codebase.json"
self.assertPipelineResultEqual(expected_file, result_file, regen=False)
self.assertPipelineResultEqual(expected_file, result_file, regen=True)

def test_scanpipe_rootfs_pipeline_integration_test(self):
pipeline_name = "root_filesystems"
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ install_requires =
# WSGI server
gunicorn==20.1.0
# Docker
container_inspector==31.0.0
container_inspector==31.1.0
# ScanCode-toolkit
scancode-toolkit[packages]==31.0.0rc2
extractcode[full]==31.0.0
Expand Down

0 comments on commit 2d342fa

Please sign in to comment.