mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-10-25 22:28:27 +00:00
195 lines
7.0 KiB
Python
195 lines
7.0 KiB
Python
from multiprocessing.pool import ThreadPool
|
|
import os
|
|
import re
|
|
import sys
|
|
import shutil
|
|
|
|
sys.path.insert(1, os.path.dirname(os.path.dirname(sys.path[0])))
|
|
|
|
from mozharness.base.python import VirtualenvMixin, virtualenv_config_options
|
|
from mozharness.base.script import BaseScript
|
|
|
|
|
|
class AntivirusScan(BaseScript, VirtualenvMixin):
|
|
config_options = [
|
|
[["--product"], {
|
|
"dest": "product",
|
|
"help": "Product being released, eg: firefox, thunderbird",
|
|
}],
|
|
[["--version"], {
|
|
"dest": "version",
|
|
"help": "Version of release, eg: 39.0b5",
|
|
}],
|
|
[["--build-number"], {
|
|
"dest": "build_number",
|
|
"help": "Build number of release, eg: 2",
|
|
}],
|
|
[["--bucket-name"], {
|
|
"dest": "bucket_name",
|
|
"help": "S3 Bucket to retrieve files from",
|
|
}],
|
|
[["--exclude"], {
|
|
"dest": "excludes",
|
|
"action": "append",
|
|
"help": "List of filename patterns to exclude. See script source for default",
|
|
}],
|
|
[["-d", "--download-parallelization"], {
|
|
"dest": "download_parallelization",
|
|
"default": 6,
|
|
"type": "int",
|
|
"help": "Number of concurrent file downloads",
|
|
}],
|
|
[["-s", "--scan-parallelization"], {
|
|
"dest": "scan_parallelization",
|
|
"default": 4,
|
|
"type": "int",
|
|
"help": "Number of concurrent file scans",
|
|
}],
|
|
[["--tools-repo"], {
|
|
"dest": "tools_repo",
|
|
"default": "https://hg.mozilla.org/build/tools",
|
|
}],
|
|
[["--tools-revision"], {
|
|
"dest": "tools_revision",
|
|
"help": "Revision of tools repo to use when downloading extract_and_run_command.py",
|
|
}],
|
|
] + virtualenv_config_options
|
|
|
|
DEFAULT_EXCLUDES = [
|
|
r"^.*tests.*$",
|
|
r"^.*crashreporter.*$",
|
|
r"^.*\.zip(\.asc)?$",
|
|
r"^.*\.log$",
|
|
r"^.*\.txt$",
|
|
r"^.*\.asc$",
|
|
r"^.*/partner-repacks.*$",
|
|
r"^.*.checksums(\.asc)?$",
|
|
r"^.*/logs/.*$",
|
|
r"^.*/jsshell.*$",
|
|
r"^.*json$",
|
|
r"^.*/host.*$",
|
|
r"^.*/mar-tools/.*$",
|
|
r"^.*gecko-unsigned-unaligned.apk$",
|
|
r"^.*robocop.apk$",
|
|
r"^.*contrib.*"
|
|
]
|
|
CACHE_DIR = 'cache'
|
|
|
|
def __init__(self):
|
|
BaseScript.__init__(self,
|
|
config_options=self.config_options,
|
|
require_config_file=False,
|
|
config={
|
|
"virtualenv_modules": [
|
|
"boto",
|
|
"redo",
|
|
"mar",
|
|
],
|
|
"virtualenv_path": "venv",
|
|
},
|
|
all_actions=[
|
|
"create-virtualenv",
|
|
"activate-virtualenv",
|
|
"get-extract-script",
|
|
"get-files",
|
|
"scan-files",
|
|
"cleanup-cache",
|
|
],
|
|
default_actions=[
|
|
"create-virtualenv",
|
|
"activate-virtualenv",
|
|
"get-extract-script",
|
|
"get-files",
|
|
"scan-files",
|
|
"cleanup-cache",
|
|
],
|
|
)
|
|
self.excludes = self.config.get('excludes', self.DEFAULT_EXCLUDES)
|
|
self.dest_dir = self.CACHE_DIR
|
|
|
|
def _get_candidates_prefix(self):
|
|
return "pub/{}/candidates/{}-candidates/build{}/".format(
|
|
self.config['product'],
|
|
self.config["version"],
|
|
self.config["build_number"]
|
|
)
|
|
|
|
def _matches_exclude(self, keyname):
|
|
for exclude in self.excludes:
|
|
if re.search(exclude, keyname):
|
|
return True
|
|
return False
|
|
|
|
def get_extract_script(self):
|
|
"""Gets a copy of extract_and_run_command.py from tools, and the supporting mar.py,
|
|
so that we can unpack various files for clam to scan them."""
|
|
remote_file = "{}/raw-file/{}/stage/extract_and_run_command.py".format(self.config["tools_repo"],
|
|
self.config["tools_revision"])
|
|
self.download_file(remote_file, file_name="extract_and_run_command.py")
|
|
|
|
def get_files(self):
|
|
"""Pull the candidate files down from S3 for scanning, using parallel requests"""
|
|
from boto.s3.connection import S3Connection
|
|
from boto.exception import S3CopyError, S3ResponseError
|
|
from redo import retry
|
|
from httplib import HTTPException
|
|
|
|
# suppress boto debug logging, it's too verbose with --loglevel=debug
|
|
import logging
|
|
logging.getLogger('boto').setLevel(logging.INFO)
|
|
|
|
self.info("Connecting to S3")
|
|
conn = S3Connection(anon=True)
|
|
self.info("Getting bucket {}".format(self.config["bucket_name"]))
|
|
bucket = conn.get_bucket(self.config["bucket_name"])
|
|
|
|
if os.path.exists(self.dest_dir):
|
|
self.info('Emptying {}'.format(self.dest_dir))
|
|
shutil.rmtree(self.dest_dir)
|
|
os.makedirs(self.dest_dir)
|
|
|
|
def worker(item):
|
|
source, destination = item
|
|
|
|
self.info("Downloading {} to {}".format(source, destination))
|
|
key = bucket.get_key(source)
|
|
return retry(key.get_contents_to_filename,
|
|
args=(destination, ),
|
|
sleeptime=30, max_sleeptime=150,
|
|
retry_exceptions=(S3CopyError, S3ResponseError,
|
|
IOError, HTTPException))
|
|
|
|
def find_release_files():
|
|
candidates_prefix = self._get_candidates_prefix()
|
|
self.info("Getting key names from candidates")
|
|
for key in bucket.list(prefix=candidates_prefix):
|
|
keyname = key.name
|
|
if self._matches_exclude(keyname):
|
|
self.debug("Excluding {}".format(keyname))
|
|
else:
|
|
destination = os.path.join(self.dest_dir, keyname.replace(candidates_prefix, ''))
|
|
dest_dir = os.path.dirname(destination)
|
|
if not os.path.isdir(dest_dir):
|
|
os.makedirs(dest_dir)
|
|
yield (keyname, destination)
|
|
|
|
pool = ThreadPool(self.config["download_parallelization"])
|
|
pool.map(worker, find_release_files())
|
|
|
|
def scan_files(self):
|
|
"""Scan the files we've collected. We do the download and scan concurrently to make
|
|
it easier to have a coherent log afterwards. Uses the venv python."""
|
|
self.run_command([self.query_python_path(), 'extract_and_run_command.py',
|
|
'-j{}'.format(self.config['scan_parallelization']),
|
|
'clamdscan', '-m', '--no-summary', '--', self.dest_dir])
|
|
|
|
def cleanup_cache(self):
|
|
"""If we have simultaneous releases in flight an av slave may end up doing another
|
|
av job before being recycled, and we need to make sure the full disk is available."""
|
|
shutil.rmtree(self.dest_dir)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
myScript = AntivirusScan()
|
|
myScript.run_and_exit()
|