chore: add freesound batch download script

This commit is contained in:
Steven Wroblewski
2026-05-11 11:20:33 +02:00
parent fefa947783
commit 162ebd158f
+221
View File
@@ -0,0 +1,221 @@
#!/usr/bin/env python3
"""
Cozypaw Hospital — freesound.org batch audio downloader
Downloads all placeholder audio files (0-byte .ogg) and replaces them with
128 kbps HQ OGG previews from freesound.org.
Prerequisites:
pip install requests
API key setup (free, ~2 min):
1. freesound.org → login → click your username → "API credentials"
2. Click "Apply for an API key"
3. App name: "Cozypaw Download Script", Description: "Personal game project"
4. Copy the "Api key" value (the long string) into API_KEY below.
Usage:
python docs/download_audio.py
Quality note:
This script downloads the "preview-hq-ogg" (128 kbps OGG Vorbis).
For a mobile children's game this is indistinguishable from lossless.
Original-quality downloads require full OAuth2 — not worth the hassle.
"""
import os
import sys
import requests
from pathlib import Path
# ── Fill in your API key here ──────────────────────────────────────────────────
API_KEY = "" # e.g. "aB3dEfGhIjKlMnOpQrStUvWx"
# ──────────────────────────────────────────────────────────────────────────────
REPO_ROOT = Path(__file__).parent.parent
BASE_URL = "https://freesound.org/apiv2"
# Files with confirmed freesound IDs — downloaded by ID, no searching needed.
# CC-BY entries are marked; they need an attribution line in docs/credits-audio.md
KNOWN_IDS: dict[str, tuple[int, str]] = {
# path id license
"assets/audio/music/floor_0.ogg": (725019, "CC0"),
"assets/audio/music/floor_1.ogg": (387588, "CC0"),
"assets/audio/music/floor_2.ogg": (684511, "CC0"),
"assets/audio/music/floor_3.ogg": (723913, "CC0"),
"assets/audio/sfx/chest_tap.ogg": (679772, "CC0"),
"assets/audio/sfx/item_spawn.ogg": (683096, "CC0"),
"assets/audio/sfx/item_drag_start.ogg":(411177, "CC0"),
"assets/audio/sfx/item_drop_hand.ogg": (448086, "CC0"),
"assets/audio/sfx/item_drop_outfit.ogg":(161415,"CC-BY 3.0"), # needs attribution
"assets/audio/sfx/item_return_chest.ogg":(740266,"CC0"),
"assets/audio/sfx/item_drop_floor.ogg":(449955, "CC0"),
}
# Files without specific IDs — searched automatically.
# Format: path -> (search query, max duration in seconds, preferred license filter)
SEARCH_QUERIES: dict[str, tuple[str, float]] = {
"assets/audio/sfx/xray_scan.ogg": ("xray machine beep", 2.0),
"assets/audio/sfx/tea_pour.ogg": ("liquid pour short", 3.0),
"assets/audio/sfx/cradle_rock.ogg": ("gentle wood creak", 3.0),
"assets/audio/sfx/gift_open.ogg": ("gift unwrap pop", 2.0),
"assets/audio/sfx/ambulance_siren.ogg": ("toy siren short", 1.5),
"assets/audio/sfx/delivery_cheer.ogg": ("happy chime fanfare", 2.0),
"assets/audio/sfx/object_tap.ogg": ("soft tap click", 1.0),
"assets/audio/sfx/ultrasound_heartbeat.ogg": ("heartbeat monitor beep", 2.0),
"assets/audio/sfx/character_pickup.ogg": ("cartoon pickup whoosh", 1.5),
"assets/audio/sfx/character_place.ogg": ("soft thud gentle landing", 1.0),
"assets/audio/sfx/character_tap.ogg": ("happy pop chime short", 1.0),
}
# ── Helpers ────────────────────────────────────────────────────────────────────
def _is_placeholder(path: Path) -> bool:
"""Returns True if the file is missing or 0-byte (i.e. still a placeholder)."""
return not path.exists() or path.stat().st_size == 0
def _get_sound_info(sound_id: int) -> dict | None:
url = f"{BASE_URL}/sounds/{sound_id}/"
r = requests.get(url, params={
"fields": "id,name,previews,license,username,duration",
"token": API_KEY,
}, timeout=15)
if r.status_code != 200:
print(f" ✗ API error {r.status_code} for ID {sound_id}")
return None
return r.json()
def _search_sound(query: str, max_duration: float) -> dict | None:
r = requests.get(f"{BASE_URL}/search/text/", params={
"query": query,
"filter": f'license:"Creative Commons 0" duration:[0 TO {max_duration}]',
"fields": "id,name,previews,license,username,duration",
"sort": "score",
"page_size": 5,
"token": API_KEY,
}, timeout=15)
if r.status_code != 200:
print(f" ✗ Search API error {r.status_code} for query '{query}'")
return None
results = r.json().get("results", [])
if not results:
print(f" ✗ No results for '{query}' under {max_duration}s")
return None
return results[0]
def _download_preview(info: dict, dest: Path) -> bool:
ogg_url = info.get("previews", {}).get("preview-hq-ogg")
if not ogg_url:
print(f" ✗ No HQ OGG preview URL in response")
return False
r = requests.get(ogg_url, timeout=30)
if r.status_code != 200:
print(f" ✗ CDN download failed ({r.status_code}): {ogg_url}")
return False
dest.parent.mkdir(parents=True, exist_ok=True)
dest.write_bytes(r.content)
return True
def _record_attribution(path: str, info: dict, license_str: str) -> None:
credits_file = REPO_ROOT / "docs" / "credits-audio.md"
line = (
f"| `{path}` | {info['name']} | {info['username']} "
f"| {license_str} | https://freesound.org/s/{info['id']}/ |\n"
)
if not credits_file.exists():
credits_file.write_text(
"# Audio Credits\n\nCC-BY files require attribution.\n\n"
"| File | Title | Author | License | URL |\n"
"|---|---|---|---|---|\n"
)
content = credits_file.read_text()
if f"/{info['id']}/" not in content:
with credits_file.open("a") as f:
f.write(line)
print(f" → Attribution recorded in docs/credits-audio.md")
# ── Main ───────────────────────────────────────────────────────────────────────
def main() -> None:
if not API_KEY:
print("ERROR: API_KEY is empty.")
print("Get your free key at: https://freesound.org/apiv2/apply/")
print("Then fill in API_KEY at the top of this script.")
sys.exit(1)
skipped = []
succeeded = []
failed = []
# ── Known IDs ──────────────────────────────────────────────────────────────
print(f"\n{''*60}")
print("Downloading files with known freesound IDs …")
print(f"{''*60}")
for rel_path, (sound_id, license_str) in KNOWN_IDS.items():
dest = REPO_ROOT / rel_path
if not _is_placeholder(dest):
print(f" ✓ skip {rel_path} (already downloaded)")
skipped.append(rel_path)
continue
print(f"{rel_path} (ID {sound_id})")
info = _get_sound_info(sound_id)
if info is None:
failed.append(rel_path)
continue
if _download_preview(info, dest):
size_kb = dest.stat().st_size // 1024
print(f"{info['name']} by {info['username']}"
f" [{size_kb} KB, {info['duration']:.1f}s, {license_str}]")
succeeded.append(rel_path)
if "CC-BY" in license_str:
_record_attribution(rel_path, info, license_str)
else:
failed.append(rel_path)
# ── Search queries ─────────────────────────────────────────────────────────
print(f"\n{''*60}")
print("Searching and downloading remaining SFX …")
print(f"{''*60}")
for rel_path, (query, max_dur) in SEARCH_QUERIES.items():
dest = REPO_ROOT / rel_path
if not _is_placeholder(dest):
print(f" ✓ skip {rel_path} (already downloaded)")
skipped.append(rel_path)
continue
print(f"{rel_path} (search: '{query}', max {max_dur}s)")
info = _search_sound(query, max_dur)
if info is None:
failed.append(rel_path)
continue
if _download_preview(info, dest):
size_kb = dest.stat().st_size // 1024
print(f"{info['name']} by {info['username']}"
f" [{size_kb} KB, {info['duration']:.1f}s, {info['license']}]")
succeeded.append(rel_path)
else:
failed.append(rel_path)
# ── Summary ────────────────────────────────────────────────────────────────
print(f"\n{''*60}")
print(f"Done. ✓ {len(succeeded)} downloaded"
f" · ↷ {len(skipped)} skipped"
f" · ✗ {len(failed)} failed")
if failed:
print("\nFailed files (fix manually):")
for f in failed:
print(f" {f}")
print(f"{''*60}\n")
if __name__ == "__main__":
main()