Commit 29506e04 authored by Vitaly Lipatov's avatar Vitaly Lipatov

route-web-api: detect TSPU throttling in site checks

Download the HTML page via dgw, parse CSS/JS asset URLs, and try downloading them. If an asset stalls at ~16KB after timeout — report as TSPU throttling (typical TCP initial window cutoff signature). Runs in parallel with existing gateway checks. Co-Authored-By: 's avatarClaude Opus 4.6 <noreply@anthropic.com>
parent c2ccca0e
......@@ -192,19 +192,101 @@ def get_whois(domain):
return []
THROTTLE_TIMEOUT = 8
THROTTLE_SIZE_LIMIT = 32768 # 32KB — above typical TCP initial window
def _detect_throttle(proxy, base_url):
"""Detect TSPU throttling by downloading page and checking assets.
Downloads the HTML page via proxy. If the page is small (< 32KB),
parses it for CSS/JS asset URLs and tries downloading them.
Throttling signature: download stalls at ~16KB after timeout.
Returns dict with throttle info or None.
"""
# Download main page
try:
result = subprocess.run(
["curl", "-4", "--proxy", proxy, "-s", "-m", "10", "-L", base_url],
capture_output=True, timeout=15,
)
if result.returncode != 0:
return None
page = result.stdout.decode("utf-8", errors="ignore")
except (subprocess.TimeoutExpired, OSError):
return None
if len(result.stdout) >= THROTTLE_SIZE_LIMIT:
return None # page itself is large enough, no throttle detected
# Parse asset URLs (CSS, JS) from HTML
parsed_url = urllib.parse.urlparse(base_url)
origin = "%s://%s" % (parsed_url.scheme, parsed_url.netloc)
assets = []
for m in re.finditer(
r'(?:href|src)=["\']([^"\']*\.(?:css|js)(?:\?[^"\']*)?)["\']', page
):
href = m.group(1)
if href.startswith("//"):
href = "https:" + href
elif href.startswith("/"):
href = origin + href
elif not href.startswith("http"):
href = base_url.rstrip("/") + "/" + href
assets.append(href)
if not assets:
return None
# Try downloading assets to detect throttle
for asset_url in assets[:3]:
try:
result = subprocess.run(
["curl", "-4", "--proxy", proxy, "-o", "/dev/null", "-s",
"-w", "%{size_download} %{time_total}",
"-m", str(THROTTLE_TIMEOUT), "-L", asset_url],
capture_output=True, text=True, timeout=THROTTLE_TIMEOUT + 5,
)
parts = result.stdout.strip().split()
if len(parts) < 2:
continue
size = int(float(parts[0]))
elapsed = float(parts[1])
# Throttle: stuck near 16KB, timeout reached
if 0 < size < THROTTLE_SIZE_LIMIT and elapsed >= THROTTLE_TIMEOUT - 1:
fname = asset_url.split("/")[-1].split("?")[0]
return {
"throttled": True,
"asset": fname,
"url": asset_url,
"size": size,
"time": round(elapsed, 1),
}
# Got large file — not throttled
if size >= THROTTLE_SIZE_LIMIT:
return {"throttled": False}
except (subprocess.TimeoutExpired, OSError, ValueError):
continue
return None
def check_site(domain):
"""Check domain: resolve IPs, find in route lists, whois, test gateways."""
url = "https://%s/" % domain
# Run gateway checks (IPv4 + IPv6) and whois in parallel
# Run gateway checks (IPv4 + IPv6), whois and throttle in parallel
checks = {}
workers = len(CHECK_GATEWAYS) * 2 + 1
workers = len(CHECK_GATEWAYS) * 2 + 2
dgw_proxy = CHECK_GATEWAYS[0][1] # dgw — direct path
with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as pool:
gw_futures = []
for name, proxy in CHECK_GATEWAYS:
gw_futures.append(pool.submit(_check_one, name, proxy, url, "-4"))
gw_futures.append(pool.submit(_check_one, name, proxy, url, "-6"))
whois_future = pool.submit(get_whois, domain)
throttle_future = pool.submit(_detect_throttle, dgw_proxy, url)
for future in concurrent.futures.as_completed(gw_futures):
name, ipver, status, code = future.result()
......@@ -218,13 +300,17 @@ def check_site(domain):
checks[name]["http_code_v6"] = code
whois_info = whois_future.result()
throttle = throttle_future.result()
ips = resolve_domain(domain)
routes = find_in_routes(domain, ips)
return {
result = {
"domain": domain, "ips": ips, "routes": routes,
"whois": whois_info, "checks": checks,
}
if throttle:
result["throttle"] = throttle
return result
# domain, IPv4, IPv4/CIDR, IPv6, IPv6/CIDR
......@@ -843,6 +929,23 @@ function renderCheck(data) {
gsec.appendChild(gwrap);
container.appendChild(gsec);
// Throttle detection
if (data.throttle) {
const tsec = mkDiv('check-section');
if (data.throttle.throttled) {
tsec.appendChild(mkDiv('check-section-title', '\\u0417\\u0430\\u043c\\u0435\\u0434\\u043b\\u0435\\u043d\\u0438\\u0435 (\\u0422\\u0421\\u041f\\u0423)'));
const msg = data.throttle.asset + ': ' + data.throttle.size + ' \\u0431\\u0430\\u0439\\u0442 \\u0437\\u0430 ' + data.throttle.time + '\\u0441 (\\u043e\\u0436\\u0438\\u0434\\u0430\\u043b\\u043e\\u0441\\u044c \\u0431\\u043e\\u043b\\u044c\\u0448\\u0435)';
const el = mkDiv('check-route', msg);
el.style.background = '#f8d7da';
el.style.color = '#721c24';
tsec.appendChild(el);
} else {
tsec.appendChild(mkDiv('check-section-title', '\\u0417\\u0430\\u043c\\u0435\\u0434\\u043b\\u0435\\u043d\\u0438\\u0435'));
tsec.appendChild(mkDiv('check-no-route', '\\u041d\\u0435 \\u043e\\u0431\\u043d\\u0430\\u0440\\u0443\\u0436\\u0435\\u043d\\u043e'));
}
container.appendChild(tsec);
}
// Whois
if (data.whois && data.whois.length) {
const wsec = mkDiv('check-section');
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment