diff --git a/rorapi/management/commands/setup.py b/rorapi/management/commands/setup.py index 0795078..d4e84da 100644 --- a/rorapi/management/commands/setup.py +++ b/rorapi/management/commands/setup.py @@ -1,35 +1,77 @@ import requests -import zipfile -import base64 -from django.core.management.base import BaseCommand +import logging +from django.core.management.base import BaseCommand, CommandError from rorapi.management.commands.deleteindex import Command as DeleteIndexCommand from rorapi.management.commands.createindex import Command as CreateIndexCommand from rorapi.management.commands.indexrordump import Command as IndexRorDumpCommand from rorapi.management.commands.getrordump import Command as GetRorDumpCommand from rorapi.settings import ROR_DUMP -HEADERS = {'Accept': 'application/vnd.github.v3+json'} +REQUEST_TIMEOUT_SECONDS = 30 +logger = logging.getLogger(__name__) + + +def build_github_headers(): + token = ROR_DUMP.get('GITHUB_TOKEN') + if not token: + raise CommandError('Missing GitHub authentication configuration; cannot authenticate to GitHub API.') + return { + 'Authorization': f'token {token}', + 'Accept': 'application/vnd.github.v3+json' + } + + +def get_contents_url(use_test_data): + repo_key = 'TEST_REPO_URL' if use_test_data else 'PROD_REPO_URL' + repo_url = ROR_DUMP.get(repo_key) + if not repo_url: + raise CommandError('Repository source URL is not configured; cannot build contents endpoint.') + return f"{repo_url.rstrip('/')}/contents" -HEADERS = {'Authorization': 'token {}'.format(ROR_DUMP['GITHUB_TOKEN']), 'Accept': 'application/vnd.github.v3+json'} def get_ror_dump_sha(filename, use_test_data): - sha = '' - if use_test_data: - contents_url = ROR_DUMP['TEST_REPO_URL'] + '/contents' - else: - contents_url = ROR_DUMP['PROD_REPO_URL'] + '/contents' + headers = build_github_headers() + contents_url = get_contents_url(use_test_data) + try: - response = requests.get(contents_url, headers=HEADERS) - except requests.exceptions.RequestException as e: - raise SystemExit(f"{contents_url}: is Not reachable \nErr: {e}") + response = requests.get(contents_url, headers=headers, timeout=REQUEST_TIMEOUT_SECONDS) + except requests.exceptions.Timeout as exc: + raise CommandError(f'Request timed out while reaching {contents_url}.') from exc + except requests.exceptions.ConnectionError as exc: + raise CommandError(f'Could not connect to {contents_url}.') from exc + except requests.exceptions.RequestException as exc: + raise CommandError(f'GitHub request failed for {contents_url}: {exc}') from exc + + try: + response.raise_for_status() + except requests.exceptions.HTTPError as exc: + status = response.status_code + if status in (401, 403): + raise CommandError('GitHub authentication/authorization failed. Check API credentials and permissions.') from exc + if status == 404: + raise CommandError(f'GitHub repository contents endpoint not found: {contents_url}.') from exc + raise CommandError(f'GitHub API returned HTTP {status} for {contents_url}.') from exc + try: repo_contents = response.json() - for file in repo_contents: - if filename in file['name']: - sha = file['sha'] - return sha - except: - return None + except ValueError as exc: + raise CommandError(f'GitHub API returned invalid JSON for {contents_url}.') from exc + + if not isinstance(repo_contents, list): + raise CommandError(f'Unexpected GitHub API response type for {contents_url}: {type(repo_contents).__name__}.') + + for entry in repo_contents: + if not isinstance(entry, dict): + continue + name = entry.get('name') + sha = entry.get('sha') + if not name or not sha: + continue + if filename in name: + return sha + + return None + class Command(BaseCommand): help = 'Setup ROR API' @@ -49,7 +91,12 @@ def handle(self, *args, **options): else: print("Using ror-data repo") - sha = get_ror_dump_sha(filename, use_test_data) + try: + sha = get_ror_dump_sha(filename, use_test_data) + except CommandError as exc: + msg = f'ERROR: Could not validate ROR data dump source. {exc}' + self.stdout.write(msg) + return msg if sha: try: @@ -58,11 +105,12 @@ def handle(self, *args, **options): CreateIndexCommand().handle(*args, **options) IndexRorDumpCommand().handle(*args, **options) msg = 'SUCCESS: ROR dataset {} indexed in v2. Using test repo: {}'.format(filename, str(use_test_data)) - except: + except Exception: + logger.exception('Failed while indexing ROR dataset %s (use_test_data=%s).', filename, use_test_data) msg = 'ERROR: Could not index ROR data dump. Check API logs for details.' else: msg = 'ERROR: ROR dataset for file {} not found. '.format(filename) \ - +'Please generate the data dump first.' + + 'Please generate the data dump first.' self.stdout.write(msg) return msg