Simplify pandas operations, update transforms

This commit is contained in:
pemontto
2019-04-15 17:05:21 +10:00
parent 275b89c94d
commit 29a91cbfb2
6 changed files with 38 additions and 43 deletions

1
.gitignore vendored
View File

@ -1,5 +1,6 @@
# Vulnwhisperer stuff
data/
docker_data/
logs/
elk6/vulnwhisperer.ini
resources/elk6/vulnwhisperer.ini

View File

@ -24,6 +24,7 @@ script:
- rm -rf /tmp/VulnWhisperer
- vuln_whisperer -F -c configs/test.ini --mock --mock_dir ${TEST_PATH}
# Run a second time with no scans to import
- rm -rf /tmp/VulnWhisperer/data/database
- vuln_whisperer -F -c configs/test.ini --mock --mock_dir ${TEST_PATH}
# Test one failed scan
- rm -rf /tmp/VulnWhisperer

View File

@ -19,6 +19,7 @@ services:
mem_limit: 8g
volumes:
- ./docker_data/esdata1:/usr/share/elasticsearch/data
- ./docker_data/es_snapshots:/snapshots
ports:
- 9200:9200
#restart: always
@ -37,8 +38,8 @@ services:
- 5601:5601
depends_on:
- elasticsearch
# volumes:
# - ./kibana-data:
volumes:
- ./docker_data/kibana_optimize:/usr/share/kibana/optimize
networks:
esnet:
aliases:
@ -50,7 +51,6 @@ services:
volumes:
- ./resources/elk6/init_kibana.sh:/opt/init_kibana.sh
- ./resources/elk6/kibana_APIonly.json:/opt/kibana_APIonly.json
- ./docker_data/kibana_optimize:/usr/share/kibana/optimize
command: sh -c "apk add --no-cache curl bash && chmod +x /opt/init_kibana.sh && chmod +r /opt/kibana_APIonly.json && cd /opt/ && /bin/bash /opt/init_kibana.sh" # /opt/kibana_APIonly.json"
networks:
esnet:

View File

@ -203,15 +203,11 @@ class NessusAPI(object):
df.drop('CVSS', axis=1, inplace=True)
df.drop('IP Address', axis=1, inplace=True)
# Map fields from COLUMN_MAPPING
fields = [x.lower() for x in df.columns]
for field, replacement in self.COLUMN_MAPPING.iteritems():
if field in fields:
self.logger.debug('Renaming "{}" to "{}"'.format(field, replacement))
fields[fields.index(field)] = replacement
# Lowercase and map fields from COLUMN_MAPPING
df.columns = [x.lower() for x in df.columns]
df.rename(columns=self.COLUMN_MAPPING, inplace=True)
df.columns = [x.replace(' ', '_') for x in df.columns]
fields = [x.replace(' ', '_') for x in fields]
df.columns = fields
return df
def transform_values(self, df):
@ -227,8 +223,7 @@ class NessusAPI(object):
# Map risk to a SEVERITY MAPPING value
self.logger.debug('Mapping risk to severity number')
df['risk_number'] = df['risk'].str.lower()
df['risk_number'].replace(self.SEVERITY_MAPPING, inplace=True)
df['risk_number'] = df['risk'].str.lower().map(self.SEVERITY_MAPPING)
if self.profile == 'tenable':
self.logger.debug('Combinging CVSS vectors for tenable')
@ -243,9 +238,14 @@ class NessusAPI(object):
.apply(lambda x: '{}/{}'.format(x[0], x[1]), axis=1)
.str.rstrip('/nan')
)
# CVSS score = cvss_temporal if cvss_temporal else cvss_base
df.drop(['cvss_temporal_vector', 'cvss3_temporal_vector'], axis=1, inplace=True)
# CVSS score = cvss3_temporal or cvss3_base or cvss_temporal or cvss_base
df['cvss'] = df['cvss_base']
df.loc[df['cvss_temporal'].notnull(), 'cvss'] = df['cvss_temporal']
df['cvss3'] = df['cvss3_base']
df.loc[df['cvss3_temporal'].notnull(), 'cvss3'] = df['cvss3_temporal']
df.fillna('', inplace=True)
return df

View File

@ -144,15 +144,10 @@ class qualysVulnScan:
def map_fields(self, df):
self.logger.info('Mapping fields')
# Map fields from COLUMN_MAPPING
fields = [x.lower() for x in df.columns]
for field, replacement in self.COLUMN_MAPPING.iteritems():
if field in fields:
self.logger.info('Renaming "{}" to "{}"'.format(field, replacement))
fields[fields.index(field)] = replacement
fields = [x.replace(' ', '_') for x in fields]
df.columns = fields
# Lowercase and map fields from COLUMN_MAPPING
df.columns = [x.lower() for x in df.columns]
df.rename(columns=self.COLUMN_MAPPING, inplace=True)
df.columns = [x.replace(' ', '_') for x in df.columns]
return df
@ -165,32 +160,28 @@ class qualysVulnScan:
df['protocol'] = df['protocol'].str.lower()
# Contruct the CVSS vector
df['cvss_vector'] = ''
df.loc[df['cvss_base'].notnull(), 'cvss_vector'] = (
df['cvss_vector'] = (
df.loc[df['cvss_base'].notnull(), 'cvss_base']
.str.split()
.apply(lambda x: x[1])
.str.replace('(', '')
.str.replace(')', '')
.str.strip('()')
)
df.loc[df['cvss_base'].notnull(), 'cvss_base'] = (
df['cvss_base'] = (
df.loc[df['cvss_base'].notnull(), 'cvss_base']
.str.split()
.apply(lambda x: x[0])
)
df['cvss_temporal_vector'] = ''
df.loc[df['cvss_temporal'].notnull(), 'cvss_temporal_vector'] = (
df['cvss_temporal_vector'] = (
df.loc[df['cvss_temporal'].notnull(), 'cvss_temporal']
.str.split()
.apply(lambda x: x[1])
.str.replace('(', '')
.str.replace(')', '')
.str.strip('()')
)
df.loc[df['cvss_temporal'].notnull(), 'cvss_temporal'] = (
df['cvss_temporal'] = (
df.loc[df['cvss_temporal'].notnull(), 'cvss_temporal']
.str.split()
.apply(lambda x: x[0])
.fillna('')
)
# Combine base and temporal
@ -198,7 +189,6 @@ class qualysVulnScan:
df[['cvss_vector', 'cvss_temporal_vector']]
.apply(lambda x: '{}/{}'.format(x[0], x[1]), axis=1)
.str.rstrip('/nan')
.fillna('')
)
df.drop('cvss_temporal_vector', axis=1, inplace=True)
@ -206,8 +196,11 @@ class qualysVulnScan:
# Convert Qualys severity to standardised risk number
df['risk_number'] = df['severity'].astype(int)-1
# CVSS score = cvss3_temporal or cvss3_base or cvss_temporal or cvss_base
df['cvss'] = df['cvss_base']
df.loc[df['cvss_temporal'].notnull(), 'cvss'] = df['cvss_temporal']
df['cvss3'] = df['cvss3_base']
df.loc[df['cvss3_temporal'].notnull(), 'cvss3'] = df['cvss3_temporal']
df.fillna('', inplace=True)

View File

@ -84,7 +84,7 @@ class vulnWhispererBase(object):
self.cur = self.conn.cursor()
self.logger.info('Connected to database at {loc}'.format(loc=self.database))
except Exception as e:
self.logger.error('Could not connect to database at {loc}\nReason: {e} - Please ensure the path exist'.format(
self.logger.error('Could not connect to database at {loc}\nReason: {e} - Please ensure the path exists'.format(
e=e,
loc=self.database))
else:
@ -189,7 +189,7 @@ class vulnWhispererBase(object):
scan=self.write_path.encode('utf8')))
else:
os.path.exists(self.write_path)
self.logger.info('Directory already exist for {scan} - Skipping creation'.format(
self.logger.info('Directory already exists for {scan} - Skipping creation'.format(
scan=self.write_path.encode('utf8')))
def get_latest_results(self, source, scan_name):
@ -376,7 +376,7 @@ class vulnWhispererNessus(vulnWhispererBase):
os.makedirs(self.path_check(f['name']))
else:
os.path.exists(self.path_check(f['name']))
self.logger.info('Directory already exist for {scan} - Skipping creation'.format(
self.logger.info('Directory already exists for {scan} - Skipping creation'.format(
scan=self.path_check(f['name']).encode('utf8')))
# try download and save scans into each folder the belong to
@ -419,7 +419,7 @@ class vulnWhispererNessus(vulnWhispererBase):
if os.path.isfile(relative_path_name):
if self.develop:
csv_in = pd.read_csv(relative_path_name)
csv_in = pd.read_json(relative_path_name, lines=True)
record_meta = (
scan_name,
scan_id,
@ -433,7 +433,7 @@ class vulnWhispererNessus(vulnWhispererBase):
0,
)
self.record_insert(record_meta)
self.logger.info('File {filename} already exist! Updating database'.format(filename=relative_path_name))
self.logger.info('File {filename} already exists! Updating database'.format(filename=relative_path_name))
else:
try:
file_req = \
@ -608,7 +608,7 @@ class vulnWhispererQualys(vulnWhispererBase):
0,
)
self.record_insert(record_meta)
self.logger.info('File {filename} already exist! Updating database'.format(filename=relative_path_name))
self.logger.info('File {filename} already exists! Updating database'.format(filename=relative_path_name))
else:
self.logger.info('Generating report for {}'.format(report_id))
@ -775,7 +775,7 @@ class vulnWhispererOpenVAS(vulnWhispererBase):
0,
)
self.record_insert(record_meta)
self.logger.info('File {filename} already exist! Updating database'.format(filename=relative_path_name))
self.logger.info('File {filename} already exists! Updating database'.format(filename=relative_path_name))
record_meta = (
scan_name,
@ -889,7 +889,7 @@ class vulnWhispererQualysVuln(vulnWhispererBase):
0,
)
self.record_insert(record_meta)
self.logger.info('File {filename} already exist! Updating database'.format(filename=relative_path_name))
self.logger.info('File {filename} already exists! Updating database'.format(filename=relative_path_name))
else:
try: