From 6045068c40130b97c3988ba42c46f33262419cea Mon Sep 17 00:00:00 2001
From: Hoang Hong Quan <lzhoang2302@gmail.com>
Date: Fri, 27 Dec 2024 16:37:20 +0700
Subject: [PATCH] Improve GitHub web scraping error handling and code
 organization

---
 Scripts/github.py | 64 +++++++++++++++++++++++++++++++----------------
 1 file changed, 42 insertions(+), 22 deletions(-)
diff --git a/Scripts/github.py b/Scripts/github.py
index dd505f4..b1c094c 100644
--- a/Scripts/github.py
+++ b/Scripts/github.py
@@ -12,9 +12,11 @@ class Github:
         
     def get_latest_commit(self, owner, repo, branch="main"):
         url = "https://github.com/{}/{}/commits/{}".format(owner, repo, branch)
-
         response = self.fetcher.fetch_and_parse_content(url)
 
+        if not response:
+            raise ValueError("Failed to fetch commit information from GitHub.")
+
         for line in response.splitlines():
             if "href=\"" in line and "/commit/" in line and "title=\"" in line:
                 sha = line.split("href=\"", 1)[1].split("\"", 1)[0].split("/commit/")[-1]
@@ -34,43 +36,61 @@ class Github:
         url = "https://github.com/{}/{}/releases".format(owner, repo)
         response = self.fetcher.fetch_and_parse_content(url)
 
-        body = ""
-        tag_name = None
-        assets = []
-        
-        for line in response.splitlines():
-            if "<a" in line and "href=\"" in line and "/releases/tag/" in line and not tag_name:
-                tag_name = line.split("/releases/tag/")[1].split("\"")[0]
-            elif "<div" in line and "body-content" in line:
-                body = response.split(line.split(">", 1)[0], 1)[1].split("</div>", 1)[0][1:]
-                break
+        if not response:
+            raise ValueError("Failed to fetch release information from GitHub.")
+
+        tag_name = self._extract_tag_name(response)
+        body = self._extract_body_content(response)
 
         release_tag_url = "https://github.com/{}/{}/releases/expanded_assets/{}".format(owner, repo, tag_name)
         response = self.fetcher.fetch_and_parse_content(release_tag_url)
 
+        if not response:
+            raise ValueError("Failed to fetch expanded assets information from GitHub.")
+
+        assets = self._extract_assets(response)
+
+        return {
+            "body": body,
+            "assets": assets
+        }
+
+    def _extract_tag_name(self, response):
+        for line in response.splitlines():
+            if "<a" in line and "href=\"" in line and "/releases/tag/" in line:
+                return line.split("/releases/tag/")[1].split("\"")[0]
+        return None
+
+    def _extract_body_content(self, response):
+        for line in response.splitlines():
+            if "<div" in line and "body-content" in line:
+                return response.split(line.split(">", 1)[0], 1)[1].split("</div>", 1)[0][1:]
+        return ""
+
+    def _extract_assets(self, response):
+        assets = []
+
         for line in response.splitlines():
             if "<a" in line and "href=\"" in line and "/releases/download" in line:
                 download_link = line.split("href=\"", 1)[1].split("\"", 1)[0]
 
                 if "tlwm" in download_link or ("tlwm" not in download_link and "DEBUG" not in download_link.upper()):
                     asset_data = response.split(line)[1].split("</div>", 2)[1]
-
-                    try:
-                        asset_id = "".join(char for char in asset_data.split("datetime=\"")[-1].split("\"")[0][::-1] if char.isdigit())[:9]
-                    except:
-                        asset_id = "".join(random.choices('0123456789', k=9))
-
+                    asset_id = self._generate_asset_id(asset_data)
                     assets.append({
                         "product_name": self.extract_asset_name(download_link.split("/")[-1]), 
                         "id": int(asset_id), 
                         "url": "https://github.com" + download_link
                     })
 
-        return {
-            "body": body,
-            "assets": assets
-        }
-    
+        return assets
+
+    def _generate_asset_id(self, asset_data):
+        try:
+            return "".join(char for char in asset_data.split("datetime=\"")[-1].split("\"")[0][::-1] if char.isdigit())[:9]
+        except:
+            return "".join(random.choices('0123456789', k=9))
+
     def extract_asset_name(self, file_name):
         end_idx = len(file_name)
         if "-" in file_name: