cockpit/bots/image-prune

219 lines
8.5 KiB
Python
Executable File

#!/usr/bin/env python3
# This file is part of Cockpit.
#
# Copyright (C) 2013 Red Hat, Inc.
#
# Cockpit is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# Cockpit is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Cockpit; If not, see <http://www.gnu.org/licenses/>.
# Days after which images expire if not in use
IMAGE_EXPIRE = 14
import argparse
import os
import subprocess
import sys
import time
from contextlib import contextmanager
from task import github
from machine import testvm
BOTS = os.path.dirname(os.path.realpath(__file__))
# threshold in G below which unreferenced qcow2 images will be pruned, even if they aren't old
PRUNE_THRESHOLD_G = float(os.environ.get("PRUNE_THRESHOLD_G", 15))
def enough_disk_space():
"""Check if available disk space in our data store is sufficient
"""
st = os.statvfs(testvm.get_images_data_dir())
free = st.f_bavail * st.f_frsize / (1024*1024*1024)
return free >= PRUNE_THRESHOLD_G;
def get_refs(open_pull_requests=True, offline=False):
"""Return dictionary for available refs of the format {'rhel-7.4': 'ad50328990e44c22501bd5e454746d4b5e561b7c'}
Expects to be called from the top level of the git checkout
If offline is true, git show-ref is used instead of listing the remote
"""
# get all remote heads and filter empty lines
# output of ls-remote has the format
#
# d864d3792db442e3de3d1811fa4bc371793a8f4f refs/heads/master
# ad50328990e44c22501bd5e454746d4b5e561b7c refs/heads/rhel-7.4
refs = { }
considerable = {}
g = github.GitHub()
if open_pull_requests:
if offline:
raise Exception("Unable to consider open pull requests when in offline mode")
for p in g.pulls():
files = g.get("pulls/{0}/files".format(p["number"]))
images = []
for fl in files:
fl_name = fl['filename']
if fl_name.startswith("bots/images/"):
fl_name_split = fl_name.split("/", 2)
if "/" not in fl_name_split[2]:
images.append(fl_name_split[2])
if images:
sha = p["head"]["sha"]
considerable[sha] = images
subprocess.call(["git", "fetch", "origin", "pull/{0}/head".format(p["number"])])
refs["pull request #{} ({})".format(p["number"], p["title"])] = sha
git_cmd = "show-ref" if offline else "ls-remote"
ref_output = subprocess.check_output(["git", git_cmd], universal_newlines=True).splitlines()
# filter out the "refs/heads/" prefix and generate a dictionary
prefix = "refs/heads"
for ln in ref_output:
[ref, name] = ln.split()
if name.startswith(prefix):
refs[name[len(prefix):]] = ref
return (refs, considerable)
def get_image_links(ref, git_path):
"""Return all image links for the given git ref
Expects to be called from the top level of the git checkout
"""
# get all the links we have first
# trailing slash on path is important
if not git_path.endswith("/"):
git_path = "{0}/".format(git_path)
try:
entries = subprocess.check_output(["git", "ls-tree", "--name-only", ref, git_path], universal_newlines=True).splitlines()
except subprocess.CalledProcessError as e:
if e.returncode == 128:
sys.stderr.write("Skipping {0} due to tree error.\n".format(ref))
return []
raise
links = [subprocess.check_output(["git", "show", "{0}:{1}".format(ref, entry)], universal_newlines=True) for entry in entries]
return [link for link in links if link.endswith(".qcow2")]
@contextmanager
def remember_cwd():
curdir = os.getcwd()
try:
yield
finally:
os.chdir(curdir)
def get_image_names(quiet=False, open_pull_requests=True, offline=False):
"""Return all image names used by all branches and optionally in open pull requests
"""
images = set()
# iterate over visible refs (mostly branches)
# this hinges on being in the top level directory of the the git checkout
with remember_cwd():
os.chdir(os.path.join(BOTS, ".."))
(refs, considerable) = get_refs(open_pull_requests, offline)
# list images present in each branch / pull request
for name, ref in refs.items():
if not quiet:
sys.stderr.write("Considering images from {0} ({1})\n".format(name, ref))
for link in get_image_links(ref, "bots/images"):
if ref in considerable:
for consider in considerable[ref]:
if link.startswith(consider):
images.add(link)
else:
images.add(link)
return images
def prune_images(force, dryrun, quiet=False, open_pull_requests=True, offline=False, checkout_only=False):
"""Prune images
"""
now = time.time()
# everything we want to keep
if checkout_only:
targets = set()
else:
targets = get_image_names(quiet, open_pull_requests, offline)
# what we have in the current checkout might already have been added by its branch, but check anyway
for filename in os.listdir(testvm.IMAGES_DIR):
path = os.path.join(testvm.IMAGES_DIR, filename)
# only consider original image entries as trustworthy sources and ignore non-links
if path.endswith(".qcow2") or path.endswith(".partial") or not os.path.islink(path):
continue
target = os.readlink(path)
targets.add(target)
expiry_threshold = now - IMAGE_EXPIRE * 86400
for filename in os.listdir(testvm.get_images_data_dir()):
path = os.path.join(testvm.get_images_data_dir(), filename)
if not force and (enough_disk_space() and os.lstat(path).st_mtime > expiry_threshold):
continue
if os.path.isfile(path) and (path.endswith(".xz") or path.endswith(".qcow2") or path.endswith(".partial")) and filename not in targets:
if not quiet or dryrun:
sys.stderr.write("Pruning {0}\n".format(filename))
if not dryrun:
os.unlink(path)
# now prune broken links
for filename in os.listdir(testvm.IMAGES_DIR):
path = os.path.join(testvm.IMAGES_DIR, filename)
# don't prune original image entries and ignore non-links
if not path.endswith(".qcow2") or not os.path.islink(path):
continue
# if the link isn't valid, prune
if not os.path.isfile(path):
if not quiet or dryrun:
sys.stderr.write("Pruning link {0}\n".format(path))
if not dryrun:
os.unlink(path)
def every_image():
result = []
for filename in os.listdir(testvm.IMAGES_DIR):
link = os.path.join(testvm.IMAGES_DIR, filename)
if os.path.islink(link):
result.append(filename)
return result
def main():
parser = argparse.ArgumentParser(description='Prune downloaded images')
parser.add_argument("--force", action="store_true", help="Delete images even if they aren't old")
parser.add_argument("--quiet", action="store_true", help="Make downloading quieter")
parser.add_argument("-d", "--dry-run-prune", dest="dryrun", action="store_true", help="Don't actually delete images and links")
parser.add_argument("-b", "--branches-only", dest="branches_only", action="store_true", help="Don't consider pull requests on GitHub, only look at branches")
parser.add_argument("-c", "--checkout-only", dest="checkout_only", action="store_true", help="Consider neither pull requests on GitHub nor branches, only look at the current checkout")
parser.add_argument("-o", "--offline", dest="offline", action="store_true", help="Don't access external sources such as GitHub")
args = parser.parse_args()
try:
prune_images(args.force, args.dryrun, quiet=args.quiet, open_pull_requests=(not args.branches_only), offline=args.offline, checkout_only=args.checkout_only)
except RuntimeError as ex:
sys.stderr.write("image-prune: {0}\n".format(str(ex)))
return 1
return 0
if __name__ == '__main__':
sys.exit(main())