#!/usr/bin/python

import os
import subprocess
from collections import defaultdict
import shutil
from pathlib import Path
import time

DRY_RUN = True

def get_git_managed():
  cmd = ['git', 'ls-files']
  out = subprocess.check_output(cmd, text = True)

  pkgbase_to_files = defaultdict(list)
  for line in out.splitlines():
    if line == '.gitignore':
      continue

    try:
      pkgbase, file = line.split('/', 1)
    except ValueError:
      # skip subproject commits
      continue
    pkgbase_to_files[pkgbase].append(file)

  return pkgbase_to_files

def rmdir(d):
  if DRY_RUN:
    print('Would remove dir ', d)
  else:
    print('Removing dir ', d)
    shutil.rmtree(d, ignore_errors=True)

def rmfile(f):
  if DRY_RUN:
    print('Would remove file', f)
  else:
    print('Removing file', f)
    f.unlink()

def is_vcs(dir):
  files = [f.name for f in dir.iterdir()]
  return any(x in files for x in ['.git', '.hg', '.svn', 'packed-refs'])

def process(dir, git_files):
  files = list(dir.iterdir())
  mtimes = {f: f.stat(follow_symlinks=False).st_mtime for f in files}
  vcs_dirs = []

  # don't delete files touched near last update
  try:
    protected_mtime = max(x for x in (
      y for f, y in mtimes.items() if f.name in git_files)
    ) - 86400
  except ValueError: # max() arg is an empty sequence
    protected_mtime = time.time()

  for file in files:
    if file.name == '__pycache__':
      continue

    if file.name.endswith('.log'):
      # logs are being handled by find -delete using crontab
      # and keeped longer than source code
      continue

    if file.name.endswith((
      '.pkg.tar.zst', '.pkg.tar.zst.sig',
      '.pkg.tar.xz', '.pkg.tar.xz.sig',
      )):
      continue

    is_dir = file.is_dir(follow_symlinks=False)

    if is_dir and is_vcs(file):
      vcs_dirs.append(file)
      continue

    if file.name in git_files:
      continue

    if mtimes[file] > protected_mtime:
      continue

    if is_dir:
      rmdir(file)
    else:
      rmfile(file)

  if vcs_dirs:
    vcs_max = max(mtimes[x] for x in vcs_dirs)
    for x in vcs_dirs:
      if vcs_max - mtimes[x] > 86400:
        rmdir(x)

def main(repodir, force):
  global DRY_RUN

  DRY_RUN = not force

  os.chdir(repodir)
  pkgbase_to_files = get_git_managed()

  for dir in repodir.iterdir():
    if dir.name == '.gitignore':
      continue

    if not dir.is_dir():
      rmfile(dir)
      continue

    if not (dir / 'lilac.yaml').exists() and dir.name not in pkgbase_to_files:
      rmdir(dir)
      continue

    process(dir, pkgbase_to_files[dir.name])

if __name__ == '__main__':
  import argparse

  parser = argparse.ArgumentParser(description='clean up our git repository used by lilac')
  parser.add_argument('-f', action='store_true',
                            help='do not dry-run; really delete files')
  parser.add_argument('DIR', nargs='?',
                      help="path to the repository; read lilac's config by default")
  args = parser.parse_args()

  if args.DIR:
    repodir = Path(args.DIR)
  else:
    from lilac2.tools import read_config
    config = read_config()
    repodir = Path(config['repository']['repodir']).expanduser()

  if args.f:
    try:
      from lilac2.vendor.myutils import lock_file
      from lilac2.const import mydir
    except ImportError:
      pass
    else:
      lock_file(mydir / '.lock')

  main(repodir, args.f)
