From 001c361290211fbd35a9a12ca04d7e35fba62477 Mon Sep 17 00:00:00 2001 From: missytake Date: Tue, 23 Dec 2025 08:01:18 +0100 Subject: [PATCH] remove --first and --begin, just giving the URL where you want to start is way easier --- main.py | 54 +++++++++++++++--------------------------------------- 1 file changed, 15 insertions(+), 39 deletions(-) diff --git a/main.py b/main.py index b4a3069..397bffc 100644 --- a/main.py +++ b/main.py @@ -10,7 +10,6 @@ def fetch_page( domain: str, i: int, url: str, - download: bool, img_tag: {str,str}, out_dir: Path, next_tag: {str,str}, @@ -20,7 +19,6 @@ def fetch_page( :param domain: the comic's domain :param i: which page of the comic this is :param url: the URL of the page - :param download: whether to download the comic :param img_tag: the html attribute and its content of the tag where the comic is located :param out_dir: the directory the comic is saved to :param next_tag: the html attribute and its content of the "next" button @@ -31,20 +29,19 @@ def fetch_page( page = requests.get(url) soup = BeautifulSoup(page.text, "html.parser") - if download: - img_url = soup.find("img", **img_tag).get("src") - if not img_url.startswith("https://"): - img_url = domain + img_url - comic = requests.get(img_url) - filename = out_dir / f"{i:05d}-{url.split('/')[-1]}.{img_url.split('.')[-1]}" + img_url = soup.find("img", **img_tag).get("src") + if not img_url.startswith("https://"): + img_url = domain + img_url + comic = requests.get(img_url) + filename = out_dir / f"{i:05d}-{url.split('/')[-1]}.{img_url.split('.')[-1]}" - try: - output_file = open(filename, "wb") - except FileNotFoundError: - os.mkdir(out_dir) - output_file = open(filename, "wb") - output_file.write(comic.content) - output_file.close() + try: + output_file = open(filename, "wb") + except FileNotFoundError: + os.mkdir(out_dir) + output_file = open(filename, "wb") + output_file.write(comic.content) + output_file.close() return soup.find("a", **next_tag).get("href") @@ -69,24 +66,12 @@ def fetch_comic(args): img_split = args.img.split("::") img_tag = {img_split[0]: img_split[1]} - if args.first: - first_url = soup.find("a", rel=args.first).get("href") - if not first_url.startswith("https://"): - first_url = args.domain + first_url - page = requests.get(first_url) - soup = BeautifulSoup(page.text, "html.parser") - - try: - next_url = soup.find("a", **next_tag).get("href") - except AttributeError: - print(soup.find("a")) - i = 1 + next_url = args.domain + i = 0 while i < args.end: - begin = args.begin if args.begin else 1 - download = False if i < begin else True print(f"Fetching: {next_url}") next_url = fetch_page( - args.domain, i, next_url, download, img_tag, out_dir, next_tag + args.domain, i, next_url, img_tag, out_dir, next_tag ) if not next_url: break # end reached @@ -107,9 +92,6 @@ def cli(): parser.add_argument( "--img", default=None, help="a unique html_attribute::content of the comic's img element" ) - parser.add_argument( - "--first", default=None, help="html 'rel' tag of the 'first comic' button" - ) parser.add_argument( "--next", default=None, help="html attribute and content of the 'next comic' button" ) @@ -117,12 +99,6 @@ def cli(): parser.add_argument( "--limit", default=None, help="Maximum of images to download", type=int ) - parser.add_argument( - "--begin", - default=None, - help="At which page to start with downloading", - type=int, - ) parser.add_argument( "--end", default=99999,