feat: allow to specify the html attribute for --next & --img
This commit is contained in:
parent
e400554060
commit
3ea6392689
40
main.py
40
main.py
|
|
@ -11,9 +11,9 @@ def fetch_page(
|
|||
i: int,
|
||||
url: str,
|
||||
download: bool,
|
||||
img_id: str,
|
||||
img_tag: {str,str},
|
||||
out_dir: Path,
|
||||
next_rel: str,
|
||||
next_tag: {str,str},
|
||||
) -> str:
|
||||
"""Download a comic from a URL and return the URL of the next page
|
||||
|
||||
|
|
@ -21,9 +21,9 @@ def fetch_page(
|
|||
:param i: which page of the comic this is
|
||||
:param url: the URL of the page
|
||||
:param download: whether to download the comic
|
||||
:param img_id: the id of the <img> tag where the comic is located
|
||||
:param img_tag: the html attribute and its content of the <img> tag where the comic is located
|
||||
:param out_dir: the directory the comic is saved to
|
||||
:param next_rel: the tag of the "next" button
|
||||
:param next_tag: the html attribute and its content of the "next" button
|
||||
:return: the URL of the next page, None if it doesn't exist
|
||||
"""
|
||||
if not url.startswith("https://"):
|
||||
|
|
@ -32,7 +32,7 @@ def fetch_page(
|
|||
soup = BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
if download:
|
||||
img_url = soup.find("img", id=img_id).get("src")
|
||||
img_url = soup.find("img", **img_tag).get("src")
|
||||
if not img_url.startswith("https://"):
|
||||
img_url = domain + img_url
|
||||
comic = requests.get(img_url)
|
||||
|
|
@ -46,7 +46,7 @@ def fetch_page(
|
|||
output_file.write(comic.content)
|
||||
output_file.close()
|
||||
|
||||
return soup.find("a", rel=next_rel).get("href")
|
||||
return soup.find("a", **next_tag).get("href")
|
||||
|
||||
|
||||
def fetch_comic(args):
|
||||
|
|
@ -56,14 +56,18 @@ def fetch_comic(args):
|
|||
|
||||
out_dir = Path(os.getcwd()).joinpath(args.output)
|
||||
|
||||
next_rel = args.next
|
||||
img_id = args.img
|
||||
if not next_rel or not img_id:
|
||||
for style in soup.find_all("link", rel="stylesheet"):
|
||||
if style.get("href").endswith("/comiccontrol/defaultstyles.css"):
|
||||
img_id = "cc-comic" if not args.img else args.img
|
||||
next_rel = "next" if not args.next else args.next
|
||||
break
|
||||
|
||||
for style in soup.find_all("link", rel="stylesheet"):
|
||||
if style.get("href").endswith("/comiccontrol/defaultstyles.css"):
|
||||
img_tag = {"id": "cc-comic"}
|
||||
next_tag = {"rel": "next"}
|
||||
break
|
||||
if args.next:
|
||||
next_split = args.next.split("::")
|
||||
next_tag = {next_split[0]: next_split[1]}
|
||||
if args.img:
|
||||
img_split = args.img.split("::")
|
||||
img_tag = {img_split[0]: img_split[1]}
|
||||
|
||||
if args.first:
|
||||
first_url = soup.find("a", rel=args.first).get("href")
|
||||
|
|
@ -73,7 +77,7 @@ def fetch_comic(args):
|
|||
soup = BeautifulSoup(page.text, "html.parser")
|
||||
|
||||
try:
|
||||
next_url = soup.find("a", rel=next_rel).get("href")
|
||||
next_url = soup.find("a", **next_tag).get("href")
|
||||
except AttributeError:
|
||||
print(soup.find("a"))
|
||||
i = 1
|
||||
|
|
@ -82,7 +86,7 @@ def fetch_comic(args):
|
|||
download = False if i < begin else True
|
||||
print(f"Fetching: {next_url}")
|
||||
next_url = fetch_page(
|
||||
args.domain, i, next_url, download, img_id, out_dir, next_rel
|
||||
args.domain, i, next_url, download, img_tag, out_dir, next_tag
|
||||
)
|
||||
if not next_url:
|
||||
break # end reached
|
||||
|
|
@ -101,13 +105,13 @@ def cli():
|
|||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--img", default=None, help="html 'id' tag of the comic's img element(s)"
|
||||
"--img", default=None, help="a unique html_attribute::content of the comic's img element"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--first", default=None, help="html 'rel' tag of the 'first comic' button"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--next", default=None, help="html 'rel' tag of the 'next comic' button"
|
||||
"--next", default=None, help="html attribute and content of the 'next comic' button"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
|
|
|
|||
Loading…
Reference in a new issue