From 6045068c40130b97c3988ba42c46f33262419cea Mon Sep 17 00:00:00 2001 From: Hoang Hong Quan Date: Fri, 27 Dec 2024 16:37:20 +0700 Subject: [PATCH] Improve GitHub web scraping error handling and code organization --- Scripts/github.py | 64 +++++++++++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 22 deletions(-) diff --git a/Scripts/github.py b/Scripts/github.py index dd505f4..b1c094c 100644 --- a/Scripts/github.py +++ b/Scripts/github.py @@ -12,9 +12,11 @@ class Github: def get_latest_commit(self, owner, repo, branch="main"): url = "https://github.com/{}/{}/commits/{}".format(owner, repo, branch) - response = self.fetcher.fetch_and_parse_content(url) + if not response: + raise ValueError("Failed to fetch commit information from GitHub.") + for line in response.splitlines(): if "href=\"" in line and "/commit/" in line and "title=\"" in line: sha = line.split("href=\"", 1)[1].split("\"", 1)[0].split("/commit/")[-1] @@ -34,43 +36,61 @@ class Github: url = "https://github.com/{}/{}/releases".format(owner, repo) response = self.fetcher.fetch_and_parse_content(url) - body = "" - tag_name = None - assets = [] - - for line in response.splitlines(): - if "", 1)[0], 1)[1].split("", 1)[0][1:] - break + if not response: + raise ValueError("Failed to fetch release information from GitHub.") + + tag_name = self._extract_tag_name(response) + body = self._extract_body_content(response) release_tag_url = "https://github.com/{}/{}/releases/expanded_assets/{}".format(owner, repo, tag_name) response = self.fetcher.fetch_and_parse_content(release_tag_url) + if not response: + raise ValueError("Failed to fetch expanded assets information from GitHub.") + + assets = self._extract_assets(response) + + return { + "body": body, + "assets": assets + } + + def _extract_tag_name(self, response): + for line in response.splitlines(): + if "", 1)[0], 1)[1].split("", 1)[0][1:] + return "" + + def _extract_assets(self, response): + assets = [] + for line in response.splitlines(): if "", 2)[1] - - try: - asset_id = "".join(char for char in asset_data.split("datetime=\"")[-1].split("\"")[0][::-1] if char.isdigit())[:9] - except: - asset_id = "".join(random.choices('0123456789', k=9)) - + asset_id = self._generate_asset_id(asset_data) assets.append({ "product_name": self.extract_asset_name(download_link.split("/")[-1]), "id": int(asset_id), "url": "https://github.com" + download_link }) - return { - "body": body, - "assets": assets - } - + return assets + + def _generate_asset_id(self, asset_data): + try: + return "".join(char for char in asset_data.split("datetime=\"")[-1].split("\"")[0][::-1] if char.isdigit())[:9] + except: + return "".join(random.choices('0123456789', k=9)) + def extract_asset_name(self, file_name): end_idx = len(file_name) if "-" in file_name: