mirror of https://git.sr.ht/~sircmpwn/git.sr.ht
114 lines
3.6 KiB
Python
Executable File
114 lines
3.6 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import os
|
|
import sys
|
|
import math
|
|
import random
|
|
import sqlalchemy as sa
|
|
import subprocess
|
|
import gitsrht.repos as gr
|
|
import prometheus_client
|
|
from prometheus_client import CollectorRegistry, Gauge
|
|
from prometheus_client.context_managers import Timer
|
|
from srht.config import cfg
|
|
from srht.database import DbSession
|
|
from gitsrht.types import Artifact, User, Repository
|
|
from minio import Minio
|
|
from datetime import datetime, timedelta
|
|
|
|
db = DbSession(cfg("git.sr.ht", "connection-string"))
|
|
db.init()
|
|
|
|
registry = CollectorRegistry()
|
|
tg = Gauge("gitsrht_periodic_time",
|
|
"Time to run gitsrht-periodic jobs",
|
|
["section"],
|
|
registry=registry)
|
|
|
|
gc_git_t = tg.labels("gc_git")
|
|
@gc_git_t.time()
|
|
def gc_git():
|
|
repo_count = Repository.query.count()
|
|
|
|
# *srht-periodic scripts are run every twenty minutes,
|
|
# this gives us 504 runs over the course of a week;
|
|
# hence, if we GC a 504th of the repository count each time,
|
|
# on average, we will have GCd every repo around once a week.
|
|
limit = int(math.ceil(repo_count / (7 * 24 * 60 / 20)))
|
|
|
|
rc = Gauge("gitsrht_periodic_gc_git_count",
|
|
"Amount of repos GCd by the gc_git job",
|
|
registry=registry)
|
|
ps = Gauge("gitsrht_periodic_gc_git_packsize",
|
|
"Packfile size in the gc_git job (B)",
|
|
["stage"],
|
|
registry=registry)
|
|
|
|
repos = (Repository.query
|
|
.offset(random.randrange(0, repo_count + 1 - limit))
|
|
.limit(limit)).all()
|
|
for r in repos:
|
|
try:
|
|
ps.labels("pre").inc(sum(map(lambda p: p.stat().st_size,
|
|
os.scandir(os.path.join(r.path, "objects", "pack")))))
|
|
|
|
def gc():
|
|
subprocess.run(["git", "-C", r.path, "gc", "--quiet"],
|
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
gc()
|
|
|
|
ps.labels("post").inc(sum(map(lambda p: p.stat().st_size,
|
|
os.scandir(os.path.join(r.path, "objects", "pack")))))
|
|
rc.inc()
|
|
except FileNotFoundError:
|
|
continue
|
|
|
|
gc_s3_t = tg.labels("gc_s3")
|
|
@gc_s3_t.time()
|
|
def gc_s3():
|
|
if not gr.object_storage_enabled:
|
|
return
|
|
# Once a weekish
|
|
if random.randrange(0, int(math.ceil(7 * 24 * 60 / 20))) != 0:
|
|
return
|
|
prefix = os.path.join(gr.s3_prefix, "artifacts")
|
|
minio = Minio(gr.s3_upstream, access_key=gr.s3_access_key,
|
|
secret_key=gr.s3_secret_key, secure=True)
|
|
|
|
objs = set(obj.object_name for obj
|
|
in minio.list_objects(gr.s3_bucket, prefix, recursive=True))
|
|
artifacts = Artifact.query.all()
|
|
|
|
users = {u.id: u for u in (User.query .filter(User.id.in_(
|
|
set(a.user_id for a in artifacts)))).all()}
|
|
|
|
repos = {r.id: r for r in (Repository.query.filter(Repository.id.in_(
|
|
set(a.repo_id for a in artifacts)))).all()}
|
|
|
|
for art in artifacts:
|
|
artifact_path = os.path.join(prefix, users[art.user_id].canonical_name,
|
|
repos[art.repo_id].name, art.filename)
|
|
objs.discard(artifact_path)
|
|
|
|
if not objs:
|
|
return
|
|
errs = list(minio.remove_objects(gr.s3_bucket, objs))
|
|
if errs:
|
|
raise Exception(f"While removing dangling artifacts {objs}, got errors: {errs}")
|
|
|
|
Gauge("gitsrht_periodic_gc_s3_count",
|
|
"Amount of objects pruned by the gc_s3 job",
|
|
registry=registry).set(len(objs))
|
|
|
|
all_t = tg.labels("total")
|
|
@all_t.time()
|
|
def all():
|
|
gc_git()
|
|
gc_s3()
|
|
all()
|
|
|
|
|
|
pg_endpoint = cfg("sr.ht", "pushgateway", default=None)
|
|
if pg_endpoint:
|
|
prometheus_client.pushadd_to_gateway(pg_endpoint,
|
|
job="git.sr.ht", registry=registry)
|