File size: 5,236 Bytes
e68321e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
# Ultralytics YOLOv5 🚀, AGPL-3.0 license
"""Download utils."""
import logging
import subprocess
import urllib
from pathlib import Path
import requests
import torch
def is_url(url, check=True):
"""Determines if a string is a URL and optionally checks its existence online, returning a boolean."""
try:
url = str(url)
result = urllib.parse.urlparse(url)
assert all([result.scheme, result.netloc]) # check if is url
return (urllib.request.urlopen(url).getcode() == 200) if check else True # check if exists online
except (AssertionError, urllib.request.HTTPError):
return False
def gsutil_getsize(url=""):
"""
Returns the size in bytes of a file at a Google Cloud Storage URL using `gsutil du`.
Returns 0 if the command fails or output is empty.
"""
output = subprocess.check_output(["gsutil", "du", url], shell=True, encoding="utf-8")
return int(output.split()[0]) if output else 0
def url_getsize(url="https://ultralytics.com/images/bus.jpg"):
"""Returns the size in bytes of a downloadable file at a given URL; defaults to -1 if not found."""
response = requests.head(url, allow_redirects=True)
return int(response.headers.get("content-length", -1))
def curl_download(url, filename, *, silent: bool = False) -> bool:
"""Download a file from a url to a filename using curl."""
silent_option = "sS" if silent else "" # silent
proc = subprocess.run(
[
"curl",
"-#",
f"-{silent_option}L",
url,
"--output",
filename,
"--retry",
"9",
"-C",
"-",
]
)
return proc.returncode == 0
def safe_download(file, url, url2=None, min_bytes=1e0, error_msg=""):
"""
Downloads a file from a URL (or alternate URL) to a specified path if file is above a minimum size.
Removes incomplete downloads.
"""
from utils.general import LOGGER
file = Path(file)
assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
try: # url1
LOGGER.info(f"Downloading {url} to {file}...")
torch.hub.download_url_to_file(url, str(file), progress=LOGGER.level <= logging.INFO)
assert file.exists() and file.stat().st_size > min_bytes, assert_msg # check
except Exception as e: # url2
if file.exists():
file.unlink() # remove partial downloads
LOGGER.info(f"ERROR: {e}\nRe-attempting {url2 or url} to {file}...")
# curl download, retry and resume on fail
curl_download(url2 or url, file)
finally:
if not file.exists() or file.stat().st_size < min_bytes: # check
if file.exists():
file.unlink() # remove partial downloads
LOGGER.info(f"ERROR: {assert_msg}\n{error_msg}")
LOGGER.info("")
def attempt_download(file, repo="ultralytics/yolov5", release="v7.0"):
"""Downloads a file from GitHub release assets or via direct URL if not found locally, supporting backup
versions.
"""
from utils.general import LOGGER
def github_assets(repository, version="latest"):
"""Fetches GitHub repository release tag and asset names using the GitHub API."""
if version != "latest":
version = f"tags/{version}" # i.e. tags/v7.0
response = requests.get(f"https://api.github.com/repos/{repository}/releases/{version}").json() # github api
return response["tag_name"], [x["name"] for x in response["assets"]] # tag, assets
file = Path(str(file).strip().replace("'", ""))
if not file.exists():
# URL specified
name = Path(urllib.parse.unquote(str(file))).name # decode '%2F' to '/' etc.
if str(file).startswith(("http:/", "https:/")): # download
url = str(file).replace(":/", "://") # Pathlib turns :// -> :/
file = name.split("?")[0] # parse authentication https://url.com/file.txt?auth...
if Path(file).is_file():
LOGGER.info(f"Found {url} locally at {file}") # file already exists
else:
safe_download(file=file, url=url, min_bytes=1e5)
return file
# GitHub assets
assets = [f"yolov5{size}{suffix}.pt" for size in "nsmlx" for suffix in ("", "6", "-cls", "-seg")] # default
try:
tag, assets = github_assets(repo, release)
except Exception:
try:
tag, assets = github_assets(repo) # latest release
except Exception:
try:
tag = subprocess.check_output("git tag", shell=True, stderr=subprocess.STDOUT).decode().split()[-1]
except Exception:
tag = release
if name in assets:
file.parent.mkdir(parents=True, exist_ok=True) # make parent dir (if required)
safe_download(
file,
url=f"https://github.com/{repo}/releases/download/{tag}/{name}",
min_bytes=1e5,
error_msg=f"{file} missing, try downloading from https://github.com/{repo}/releases/{tag}",
)
return str(file)
|