mews.packages: add support for the news archive

parent bdf45691
import aiohttp
from bs4 import BeautifulSoup
import re
import io
import gzip
from .. import models
async def packages_parser(html: str, url: str):
from pprint import pprint
async def packages_parser(html: str, url: str):
soup = BeautifulSoup(html, "html.parser")
pre_tag = soup.find("pre")
if not pre_tag:
return {}
text = pre_tag.get_text()
pre_text = pre_tag.get_text(strip=True)
if "Было удалено вложение" in pre_text and "attachment" in pre_text:
attachment_link = pre_tag.find("a", href=True)
if attachment_link:
attachment_url = attachment_link["href"]
text = await _fetch(attachment_url)
else:
return models.PackagesModel(**{"added": [], "removed": [], "updated": [], "url": "none"})
else:
text = pre_tag.get_text()
lines = text.strip().splitlines()
sections = {"added": [], "removed": [], "updated": []}
current_section = None
current_package = {}
seen_changelog = False
for line in lines:
# Смена секции
......@@ -23,18 +37,21 @@ async def packages_parser(html: str, url: str):
sections[current_section].append(current_package)
current_section = "added"
current_package = {}
seen_changelog = False
continue
elif re.search(r"^\s*\d+\s+REMOVED package[s]?", line):
if current_package and current_section:
sections[current_section].append(current_package)
current_section = "removed"
current_package = {}
seen_changelog = False
continue
elif re.search(r"^\s*\d+\s+UPDATED packages[s]?", line):
if current_package and current_section:
sections[current_section].append(current_package)
current_section = "updated"
current_package = {}
seen_changelog = False
continue
if current_section == "removed":
......@@ -50,22 +67,27 @@ async def packages_parser(html: str, url: str):
match = re.match(r"^(\S+)\s+-\s+(.*)", line)
if match:
if current_package:
if current_package and current_section:
sections[current_section].append(current_package)
current_package = {}
current_package["name"] = match.group(1)
current_package["description"] = await _clean_description(match.group(2))
current_package = {
"name": match.group(1),
"description": await _clean_description(match.group(2)),
}
seen_changelog = False
continue
match = re.match(
r"^\*\s+\w+\s+\w+\s+\d+\s+\d+\s+(.+?) <([a-z0-9._-]+) на altlinux>", line
)
if match and current_section != "removed":
if current_package:
if (
current_package
and current_section != "removed"
and not seen_changelog
):
match = re.match(r"^\*\s+\w+\s+\w+\s+\d+\s+\d+\s+(.+?) <([^@\s>]+)(?:@altlinux| на altlinux)>", line)
if match:
current_package["maintainer_name"] = match.group(1).strip()
current_package["maintainer_nick"] = match.group(2).strip()
seen_changelog = True
continue
if current_package and current_section:
sections[current_section].append(current_package)
......@@ -79,3 +101,14 @@ async def _clean_description(desc: str):
desc = re.sub(r'\s+', ' ', desc)
desc = re.sub(r'\[\d+[KMG]?\]', '', desc).strip()
return desc
async def _fetch(url: str) -> str:
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
response.raise_for_status()
compressed_data = await response.read()
with gzip.GzipFile(fileobj=io.BytesIO(compressed_data)) as gz:
text = gz.read().decode('utf-8')
return text
from telegrinder import Dispatch, Message
from telegrinder.rules import Command, Argument
from telegrinder.node import Error
from altrepo import altrepo
......@@ -30,8 +31,15 @@ async def news_handler(m: Message, news_type: str | None, translate: str | None)
await m.answer("Новостей пока нет.")
return
added, removed, updated, info_message = await format_packages(packages_data, bool(translate))
if bool(translate):
await m.answer("Начался перевод, это может занять до 10 минут.")
try:
added, removed, updated, info_message = await format_packages(packages_data, bool(translate))
except:
await m.answer("перевод не удался :(")
return
await m.answer(f"{added}{removed}{info_message}")
for msg in updated:
res = await m.answer(msg)
await m.answer(msg)
......@@ -27,6 +27,7 @@ async def format_packages(packages: PackagesModel, translate: bool | None = None
removed_message += "Удалены:\n"
for package in packages.removed:
removed_message += await _format_removed_package(package)
removed_message += "\n"
if packages.updated:
updated_messages = await _format_updated_packages(packages, translate)
......@@ -54,7 +55,7 @@ async def _format_package(package: PackageElementModel, translate: bool):
async def _format_removed_package(package: RemovedPackageElementModel):
message = HTMLFormatter(
bold(link(f"{repo}srpms/{package.name}", text=package.name))) + "\n\n"
bold(link(f"{repo}srpms/{package.name}", text=package.name))) + "\n"
return message
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment