Fix errors parsing fandom banners

2023-12-31 17:37:25 +01:00 · 2023-12-31 17:37:25 +01:00 · 29e594769e
parent 826c0fcdcd
commit 29e594769e
1 changed files with 9 additions and 5 deletions
--- a/banner_parser.py
+++ b/banner_parser.py
@ -24,12 +24,16 @@ def load_page():
 def load_page_static():
-    with open('banner_history_wiki\Wish History _ Genshin Impact Wiki _ Fandom.html', 'r') as f:
+    try:
-        return f.read()
+        with open('banner_history_wiki\Wish History _ Genshin Impact Wiki _ Fandom.html', 'r') as f:
            return f.read()
    except UnicodeDecodeError as e:
        with open('banner_history_wiki\Wish History _ Genshin Impact Wiki _ Fandom.html', 'r', encoding="utf8") as f:
            return f.read()
 def parse_table(table: Tag):
-    header = table.find('thead')
+    header = table.find('tbody') # thead doesn't exist for me
    header_row = header.find('tr')
    header_cells = header_row.find_all('th')
    header_titles = [x.text.strip() for x in header_cells]
@ -39,7 +43,7 @@ def parse_table(table: Tag):
        return None
    body = table.find('tbody')
-    body_rows = body.find_all('tr')
+    body_rows = body.find_all('tr')[1:] # as thead doesn't exist for me, I add [1:]
    reset_time = time(hour=5, minute=0, second=0)  # UTC+1 == Europe
@ -89,7 +93,7 @@ def parse_page(page_text):
    results = []
    soup = BeautifulSoup(page_text, 'lxml')
-    tables = soup.find_all('table', class_='article-table alternating-colors-table sortable jquery-tablesorter')
+    tables = soup.find_all('table', class_='article-table alternating-colors-table sortable') # jquery-tablesorter is added after opening the page, it doesn't exist when you wget.
    for table in tables:
        results += parse_table(table)