import ics import datetime from bs4 import BeautifulSoup # Load table.html into BeautifulSoup soup = BeautifulSoup(open("table.html"), "html.parser") # Get number of tables with class PSGROUPBOXWBO course_tables = soup.find_all("table", class_="PSGROUPBOXWBO") timetable_dict = {} timezone = datetime.timezone(datetime.timedelta(hours=8)) for i, table in enumerate(course_tables): # Print out table header course = table.find("td", class_="PAGROUPDIVIDER").text if course not in timetable_dict: timetable_dict[course] = [] # Find tables in table with id ACE_DERIVED_REGFRM1_DESCR20${0-number of course} course_table = table.find("table", id="ACE_DERIVED_REGFRM1_DESCR20${}".format(i)) time_table = course_table.find("table", id="CLASS_MTG_VW$scroll${}".format(i)) # Find table with class PSLEVEL3GRID inner_table = time_table.find("table", class_="PSLEVEL3GRID") # Find all rows in table rows = inner_table.find_all("tr") for row in rows[1:]: row_dict = {} # Class group and type only shows on row [1] and [2], the rest are \n row_dict["class group"] = row.find_all("td")[1].text.strip() if row_dict["class group"] == "\xa0" or row_dict["class group"] == "": row_dict["class group"] = timetable_dict[course][-1]["class group"] row_dict["class type"] = row.find_all("td")[2].text.strip() if row_dict["class type"] == "\xa0" or row_dict["class type"] == "": row_dict["class type"] = timetable_dict[course][-1]["class type"] row_dict["location"] = row.find_all("td")[4].text.strip() if row.find_all("td")[3].text.strip() != "TBA": row_dict['start'] = datetime.datetime.strptime(row.find_all("td")[6].text.strip().split(" ")[0] + " " + row.find_all("td")[3].text.strip().split(" ")[1] , "%d/%m/%Y %I:%M%p").astimezone(timezone) row_dict['end'] = datetime.datetime.strptime(row.find_all("td")[6].text.strip().split(" ")[2] + " " + row.find_all("td")[3].text.strip().split(" ")[3] , "%d/%m/%Y %I:%M%p").astimezone(timezone) timetable_dict[course].append(row_dict) # Create calendar cal = ics.Calendar() for course in timetable_dict: for class_group in timetable_dict[course]: event = ics.Event() event.name = "{} - {} - {}".format(course, class_group["class group"], class_group["class type"]) # if start or end is empty, skip if "start" not in class_group or "end" not in class_group: continue event.begin = class_group["start"] event.end = class_group["end"] event.location = class_group["location"] cal.events.add(event) # Save calendar with open('timetable.ics', 'w') as f: f.writelines(cal)