forked from cve-search/cve-search
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsearch_fulltext.py
97 lines (88 loc) · 3.52 KB
/
search_fulltext.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Search the CVE fulltext database
#
# Software is free software released under the "Modified BSD license"
#
# Copyright (c) 2012-2013 Alexandre Dulaunoy - [email protected]
import pymongo
import os
import argparse
import sys
import json
from bson import json_util
indexpath = "./indexdir"
sys.path.append("./lib/")
from whoosh import index
from whoosh.fields import *
schema = Schema(title=TEXT(stored=True), path=ID(stored=True), content=TEXT)
ix = index.open_dir("indexdir")
from whoosh.qparser import QueryParser
argParser = argparse.ArgumentParser(description='Fulltext search for cve-search')
argParser.add_argument('-q', action='append', help='query to lookup (one or more)')
argParser.add_argument('-t', action='store_true', help='output title of the match CVE(s)')
argParser.add_argument('-f', action='store_true', help='output matching CVE(s) in JSON')
argParser.add_argument('-m', type=int, default=30, help='most frequent terms (default is 30)')
argParser.add_argument('-l', action='store_true', default=False, help='dump all terms encountered in CVE description')
argParser.add_argument('-g', action='store_true', default=False, help='graph of most frequent terms with each matching CVE (JSON output)')
argParser.add_argument('-s', action='store_true', default=False, help='enable stemming on graph JSON output (default is False)')
argParser.add_argument('-n', action='store_true', help='lookup complete cpe (Common Platform Enumeration) name for vulnerable configuration')
argParser.add_argument('-r', action='store_true', help='lookup ranking of vulnerable configuration')
args = argParser.parse_args()
if args.f or args.t:
import cves
cves = cves.last(rankinglookup = args.r, namelookup = args.n)
from whoosh.query import *
if args.q:
with ix.searcher() as searcher:
query = QueryParser("content", ix.schema).parse(" ".join(args.q))
results = searcher.search(query, limit=None)
for x in results:
if not args.f:
print (x['path'])
else:
print(json.dumps(cves.getcve(x['path']), sort_keys=True, default=json_util.default))
if args.t and not args.f:
print (" -- "+x['title'])
elif args.m and not args.l and not args.g:
xr = ix.searcher().reader()
for x in xr.most_frequent_terms("content", number=args.m):
sys.stdout.write(str(int(x[0])))
sys.stdout.write(",")
sys.stdout.write(x[1].decode('utf-8'))
sys.stdout.write("\n")
elif args.l and not args.g:
xr = ix.searcher().reader()
for x in xr.lexicon("content"):
print (x)
elif args.g:
import json
if args.s:
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import stopwords
lmtzr = WordNetLemmatizer()
xr = ix.searcher().reader()
s = {"name": 'cve-search', "children": []}
d = {}
for x in xr.most_frequent_terms("content", 3000):
query = QueryParser("content", ix.schema).parse(x[1])
if args.s:
term = lmtzr.lemmatize(x[1],'v')
if term in stopwords.words('english'): continue
else:
term = x[1]
if term in d:
d[term]['size'] = d[term]['size']+int(x[0])
else:
d[term] = {}
d[term]['size'] = int(x[0])
for k in sorted(d.keys(), key=lambda y: (d[y]['size']), reverse=True):
v = {}
v["name"] = k
v["size"] = d[k]['size']
s['children'].append(v)
print (json.dumps(s))
else:
argParser.print_help()
exit(1)