mirror of
https://github.com/outbackdingo/OpCore-Simplify.git
synced 2026-01-27 10:19:49 +00:00
Improve GitHub web scraping error handling and code organization
This commit is contained in:
@@ -12,9 +12,11 @@ class Github:
|
||||
|
||||
def get_latest_commit(self, owner, repo, branch="main"):
|
||||
url = "https://github.com/{}/{}/commits/{}".format(owner, repo, branch)
|
||||
|
||||
response = self.fetcher.fetch_and_parse_content(url)
|
||||
|
||||
if not response:
|
||||
raise ValueError("Failed to fetch commit information from GitHub.")
|
||||
|
||||
for line in response.splitlines():
|
||||
if "href=\"" in line and "/commit/" in line and "title=\"" in line:
|
||||
sha = line.split("href=\"", 1)[1].split("\"", 1)[0].split("/commit/")[-1]
|
||||
@@ -34,43 +36,61 @@ class Github:
|
||||
url = "https://github.com/{}/{}/releases".format(owner, repo)
|
||||
response = self.fetcher.fetch_and_parse_content(url)
|
||||
|
||||
body = ""
|
||||
tag_name = None
|
||||
assets = []
|
||||
|
||||
for line in response.splitlines():
|
||||
if "<a" in line and "href=\"" in line and "/releases/tag/" in line and not tag_name:
|
||||
tag_name = line.split("/releases/tag/")[1].split("\"")[0]
|
||||
elif "<div" in line and "body-content" in line:
|
||||
body = response.split(line.split(">", 1)[0], 1)[1].split("</div>", 1)[0][1:]
|
||||
break
|
||||
if not response:
|
||||
raise ValueError("Failed to fetch release information from GitHub.")
|
||||
|
||||
tag_name = self._extract_tag_name(response)
|
||||
body = self._extract_body_content(response)
|
||||
|
||||
release_tag_url = "https://github.com/{}/{}/releases/expanded_assets/{}".format(owner, repo, tag_name)
|
||||
response = self.fetcher.fetch_and_parse_content(release_tag_url)
|
||||
|
||||
if not response:
|
||||
raise ValueError("Failed to fetch expanded assets information from GitHub.")
|
||||
|
||||
assets = self._extract_assets(response)
|
||||
|
||||
return {
|
||||
"body": body,
|
||||
"assets": assets
|
||||
}
|
||||
|
||||
def _extract_tag_name(self, response):
|
||||
for line in response.splitlines():
|
||||
if "<a" in line and "href=\"" in line and "/releases/tag/" in line:
|
||||
return line.split("/releases/tag/")[1].split("\"")[0]
|
||||
return None
|
||||
|
||||
def _extract_body_content(self, response):
|
||||
for line in response.splitlines():
|
||||
if "<div" in line and "body-content" in line:
|
||||
return response.split(line.split(">", 1)[0], 1)[1].split("</div>", 1)[0][1:]
|
||||
return ""
|
||||
|
||||
def _extract_assets(self, response):
|
||||
assets = []
|
||||
|
||||
for line in response.splitlines():
|
||||
if "<a" in line and "href=\"" in line and "/releases/download" in line:
|
||||
download_link = line.split("href=\"", 1)[1].split("\"", 1)[0]
|
||||
|
||||
if "tlwm" in download_link or ("tlwm" not in download_link and "DEBUG" not in download_link.upper()):
|
||||
asset_data = response.split(line)[1].split("</div>", 2)[1]
|
||||
|
||||
try:
|
||||
asset_id = "".join(char for char in asset_data.split("datetime=\"")[-1].split("\"")[0][::-1] if char.isdigit())[:9]
|
||||
except:
|
||||
asset_id = "".join(random.choices('0123456789', k=9))
|
||||
|
||||
asset_id = self._generate_asset_id(asset_data)
|
||||
assets.append({
|
||||
"product_name": self.extract_asset_name(download_link.split("/")[-1]),
|
||||
"id": int(asset_id),
|
||||
"url": "https://github.com" + download_link
|
||||
})
|
||||
|
||||
return {
|
||||
"body": body,
|
||||
"assets": assets
|
||||
}
|
||||
|
||||
return assets
|
||||
|
||||
def _generate_asset_id(self, asset_data):
|
||||
try:
|
||||
return "".join(char for char in asset_data.split("datetime=\"")[-1].split("\"")[0][::-1] if char.isdigit())[:9]
|
||||
except:
|
||||
return "".join(random.choices('0123456789', k=9))
|
||||
|
||||
def extract_asset_name(self, file_name):
|
||||
end_idx = len(file_name)
|
||||
if "-" in file_name:
|
||||
|
||||
Reference in New Issue
Block a user