tenfourfox/testing/mozharness/scripts/release/antivirus.py
Cameron Kaiser c9b2922b70 hello FPR
2017-04-19 00:56:45 -07:00

195 lines
7.0 KiB
Python

from multiprocessing.pool import ThreadPool
import os
import re
import sys
import shutil
sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))
from mozharness.base.python import VirtualenvMixin, virtualenv_config_options
from mozharness.base.script import BaseScript
class AntivirusScan(BaseScript, VirtualenvMixin):
config_options = [
[["--product"], {
"dest": "product",
"help": "Product being released, eg: firefox, thunderbird",
}],
[["--version"], {
"dest": "version",
"help": "Version of release, eg: 39.0b5",
}],
[["--build-number"], {
"dest": "build_number",
"help": "Build number of release, eg: 2",
}],
[["--bucket-name"], {
"dest": "bucket_name",
"help": "S3 Bucket to retrieve files from",
}],
[["--exclude"], {
"dest": "excludes",
"action": "append",
"help": "List of filename patterns to exclude. See script source for default",
}],
[["-d", "--download-parallelization"], {
"dest": "download_parallelization",
"default": 6,
"type": "int",
"help": "Number of concurrent file downloads",
}],
[["-s", "--scan-parallelization"], {
"dest": "scan_parallelization",
"default": 4,
"type": "int",
"help": "Number of concurrent file scans",
}],
[["--tools-repo"], {
"dest": "tools_repo",
"default": "https://hg.mozilla.org/build/tools",
}],
[["--tools-revision"], {
"dest": "tools_revision",
"help": "Revision of tools repo to use when downloading extract_and_run_command.py",
}],
] + virtualenv_config_options
DEFAULT_EXCLUDES = [
r"^.*tests.*$",
r"^.*crashreporter.*$",
r"^.*\.zip(\.asc)?$",
r"^.*\.log$",
r"^.*\.txt$",
r"^.*\.asc$",
r"^.*/partner-repacks.*$",
r"^.*.checksums(\.asc)?$",
r"^.*/logs/.*$",
r"^.*/jsshell.*$",
r"^.*json$",
r"^.*/host.*$",
r"^.*/mar-tools/.*$",
r"^.*gecko-unsigned-unaligned.apk$",
r"^.*robocop.apk$",
r"^.*contrib.*"
]
CACHE_DIR = 'cache'
def __init__(self):
BaseScript.__init__(self,
config_options=self.config_options,
require_config_file=False,
config={
"virtualenv_modules": [
"boto",
"redo",
"mar",
],
"virtualenv_path": "venv",
},
all_actions=[
"create-virtualenv",
"activate-virtualenv",
"get-extract-script",
"get-files",
"scan-files",
"cleanup-cache",
],
default_actions=[
"create-virtualenv",
"activate-virtualenv",
"get-extract-script",
"get-files",
"scan-files",
"cleanup-cache",
],
)
self.excludes = self.config.get('excludes', self.DEFAULT_EXCLUDES)
self.dest_dir = self.CACHE_DIR
def _get_candidates_prefix(self):
return "pub/{}/candidates/{}-candidates/build{}/".format(
self.config['product'],
self.config["version"],
self.config["build_number"]
)
def _matches_exclude(self, keyname):
for exclude in self.excludes:
if re.search(exclude, keyname):
return True
return False
def get_extract_script(self):
"""Gets a copy of extract_and_run_command.py from tools, and the supporting mar.py,
so that we can unpack various files for clam to scan them."""
remote_file = "{}/raw-file/{}/stage/extract_and_run_command.py".format(self.config["tools_repo"],
self.config["tools_revision"])
self.download_file(remote_file, file_name="extract_and_run_command.py")
def get_files(self):
"""Pull the candidate files down from S3 for scanning, using parallel requests"""
from boto.s3.connection import S3Connection
from boto.exception import S3CopyError, S3ResponseError
from redo import retry
from httplib import HTTPException
# suppress boto debug logging, it's too verbose with --loglevel=debug
import logging
logging.getLogger('boto').setLevel(logging.INFO)
self.info("Connecting to S3")
conn = S3Connection(anon=True)
self.info("Getting bucket {}".format(self.config["bucket_name"]))
bucket = conn.get_bucket(self.config["bucket_name"])
if os.path.exists(self.dest_dir):
self.info('Emptying {}'.format(self.dest_dir))
shutil.rmtree(self.dest_dir)
os.makedirs(self.dest_dir)
def worker(item):
source, destination = item
self.info("Downloading {} to {}".format(source, destination))
key = bucket.get_key(source)
return retry(key.get_contents_to_filename,
args=(destination, ),
sleeptime=30, max_sleeptime=150,
retry_exceptions=(S3CopyError, S3ResponseError,
IOError, HTTPException))
def find_release_files():
candidates_prefix = self._get_candidates_prefix()
self.info("Getting key names from candidates")
for key in bucket.list(prefix=candidates_prefix):
keyname = key.name
if self._matches_exclude(keyname):
self.debug("Excluding {}".format(keyname))
else:
destination = os.path.join(self.dest_dir, keyname.replace(candidates_prefix, ''))
dest_dir = os.path.dirname(destination)
if not os.path.isdir(dest_dir):
os.makedirs(dest_dir)
yield (keyname, destination)
pool = ThreadPool(self.config["download_parallelization"])
pool.map(worker, find_release_files())
def scan_files(self):
"""Scan the files we've collected. We do the download and scan concurrently to make
it easier to have a coherent log afterwards. Uses the venv python."""
self.run_command([self.query_python_path(), 'extract_and_run_command.py',
'-j{}'.format(self.config['scan_parallelization']),
'clamdscan', '-m', '--no-summary', '--', self.dest_dir])
def cleanup_cache(self):
"""If we have simultaneous releases in flight an av slave may end up doing another
av job before being recycled, and we need to make sure the full disk is available."""
shutil.rmtree(self.dest_dir)
if __name__ == "__main__":
myScript = AntivirusScan()
myScript.run_and_exit()