dndbeyond_src/update.py

188 lines
5.8 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import gzip
import html.parser
import logging
import pathlib
import re
import shutil
import subprocess
import tempfile
import typing
import urllib.request
## The URL of a dndbeyond character which won't be deleted
DND_BEYOND_CHARACTER_URL = "https://www.dndbeyond.com/characters/147022047"
## Regex to find the matching JS file
MAIN_JS_RE = re.compile(r"https://media\.dndbeyond\.com\/character-app.*main.*\.js")
## Path to sourcemapper bin
SOURCEMAPPER_BIN = "/home/prometheus/go/bin/sourcemapper"
def main():
parser = argparse.ArgumentParser(
description="Utility for scanning and converting videos."
)
parser.add_argument(
"-d",
"--directory",
required=True,
help="The destination directory to put the files (required)",
)
parser.add_argument(
"-r",
"--git-repo",
type=str,
help="Store the code in the directory of this git repo",
)
parser.add_argument(
"-b",
"--git-branch",
type=str,
default="master",
help="Git branch to push to, defaults to master",
)
parser.add_argument(
"-u",
"--character-url",
default=DND_BEYOND_CHARACTER_URL,
help="URL of the dndbeyond character to grab the source from.",
)
parser.add_argument(
"-v", "--verbose", action="store_true", help="Enable verbose logging"
)
parser.add_argument(
"-q", "--quiet", action="store_true", help="Only display errors"
)
args = parser.parse_args()
if args.verbose:
logging.basicConfig(level=logging.DEBUG)
elif args.quiet:
logging.basicConfig(level=logging.ERROR)
else:
logging.basicConfig(level=logging.INFO)
char_data = download_character()
parser = MainJSExtractor()
parser.feed(char_data.decode("utf-8"))
logging.debug(f"Found URL: {parser.js_url}")
if args.git_repo is None:
download_src(args.directory, parser.js_url)
else:
update_repo(args.git_repo, args.git_branch, args.directory, parser.js_url)
def download_character() -> bytes:
try:
req = urllib.request.Request(
DND_BEYOND_CHARACTER_URL,
headers={
"user-agent": "Mozilla/5.0",
"authority": "www.dndbeyond.com",
"cache-control": "max-age=0",
"upgrade-insecure-requests": "1",
"sec-fetch-user": "?1",
"accept": "text/json",
"sec-fetch-site": "none",
"sec-fetch-mode": "navigate",
"accept-encoding": "gzip, deflate, br",
"accept-language": "en-US,en;q=0.9",
},
)
with urllib.request.urlopen(req) as resp:
if resp.info().get("Content-Encoding") == "gzip":
return gzip.decompress(resp.read())
else:
return resp.read()
except Exception:
logging.exception(f"Failed to load the character")
class MainJSExtractor(html.parser.HTMLParser):
def __init__(self):
super().__init__()
self.js_url = None
def handle_starttag(self, tag, attrs):
if tag == "script":
for attr_name, attr_val in attrs:
if attr_name == "src":
m = MAIN_JS_RE.match(attr_val)
if m is not None:
self.js_url = attr_val
def download_src(output_dir: str, js_url: str) -> None:
subprocess.run(
[
SOURCEMAPPER_BIN,
"-output",
output_dir,
"-jsurl",
js_url,
],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
def update_repo(git_repo: str, git_branch: str, output_dir: str, js_url: str) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
logging.debug(f"Cloning repo {git_repo} to {tmpdir}")
git_sparse_clone(tmpdir, git_repo, git_branch, output_dir)
checkout_output_dir = pathlib.Path(tmpdir) / output_dir
logging.debug(f"Updating source code in {checkout_output_dir}")
if checkout_output_dir.exists():
shutil.rmtree(str(checkout_output_dir))
download_src(str(checkout_output_dir), js_url)
git_commit_all(tmpdir, "New source found from dndbeyond.com")
def git_sparse_clone(
work_tree: str, git_repo: str, git_branch: str, sparse_dir: str
) -> None:
# Clone without checkout first so we can do a sparse checkout
git_empty_checkout(work_tree, git_repo)
git_setup_sparse_checkout(work_tree, sparse_dir)
git_checkout(work_tree, git_branch)
def git_empty_checkout(work_tree: str, git_repo: str) -> None:
run_git_cmd(["clone", "--no-checkout", git_repo, work_tree], None)
def git_setup_sparse_checkout(work_tree: str, sparse_dir: str) -> None:
run_git_cmd(["sparse-checkout", "set", "--no-cone", sparse_dir], work_tree)
def git_checkout(work_tree: str, git_branch: str) -> None:
run_git_cmd(["checkout", git_branch], work_tree)
def git_commit_all(work_tree: str, commit_msg: str) -> None:
run_git_cmd(["add", "--all"], work_tree)
git_status = run_git_cmd(["status", "--porcelain=v1"], work_tree)
if len(git_status.stdout) > 0:
logging.debug(f"Changes found, comitting")
run_git_cmd(["commit", "-m", commit_msg], work_tree)
run_git_cmd(["push"], work_tree)
else:
logging.debug("No changes found")
def run_git_cmd(args: typing.List[str], work_tree: str) -> subprocess.CompletedProcess:
git_cmd = ["/usr/bin/git"]
if work_tree is not None:
git_dir = f"{work_tree}/.git"
git_cmd += [
f"--git-dir={git_dir}",
f"--work-tree={work_tree}",
]
return subprocess.run(git_cmd + args, check=True, capture_output=True)
if __name__ == "__main__":
main()