Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""LICENSE
2Copyright 2015 Hermann Krumrey <hermann@krumreyh.com>
4This file is part of manga-dl.
6manga-dl is free software: you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation, either version 3 of the License, or
9(at your option) any later version.
11manga-dl is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
16You should have received a copy of the GNU General Public License
17along with manga-dl. If not, see <http://www.gnu.org/licenses/>.
18LICENSE"""
20import re
21import json
22import cfscrape
23from typing import List
24from manga_dl.entities.Chapter import Chapter
25from manga_dl.scrapers.Scraper import Scraper
28class MangaDexScraper(Scraper):
29 """
30 Scraper for mangadex.org
31 """
33 @classmethod
34 def name(cls) -> str:
35 """
36 :return: The name of the scraper
37 """
38 return "mangadex"
40 @classmethod
41 def url_matches(cls, url: str) -> bool:
42 """
43 Checks whether or not an URL matches for the scraper
44 :param url: The URL to check
45 :return: Whether the URL is valid
46 """
47 return bool(re.match(r"^https://mangadex.org/title/[0-9]+", url))
49 def generate_url(self, _id: str) -> str:
50 """
51 Generates an URL based on an ID
52 :param _id: The ID to use
53 :return: The generated URL
54 """
55 return "https://mangadex.org/title/" + _id
57 def _load_chapters(self, url: str) -> List[Chapter]:
58 """
59 Loads the chapters from mangadex.org
60 :param url: The URL to scrape
61 :return: The chapters found for the series
62 """
63 scraper = cfscrape.create_scraper()
65 mangadex_id = url.split("https://mangadex.org/title/")[1].split("/")[0]
66 manga_url = "https://mangadex.org/api/manga/" + str(mangadex_id)
68 resp = scraper.get(manga_url)
70 if resp.status_code >= 300:
71 self.logger.warning("Unsuccessful request ({})"
72 .format(resp.status_code))
73 self.logger.debug(resp.text)
74 return []
76 series_info = json.loads(resp.text)
77 series_title = series_info["manga"]["title"]
78 chapter_list = series_info.get("chapter", {})
80 if self.destination is None:
81 destination = series_title
82 else:
83 destination = self.destination
85 chapters = []
87 for chapter_id, chapter in chapter_list.items():
88 chapter_url = "https://mangadex.org/api/chapter/" + str(chapter_id)
89 chapters.append(Chapter(
90 chapter_url,
91 chapter["lang_code"],
92 series_title,
93 chapter["chapter"],
94 destination,
95 self.format,
96 self.get_image_pages,
97 chapter["title"],
98 chapter["group_name"]
99 ))
101 return chapters
103 @staticmethod
104 def get_image_pages(_self: Chapter, url: str) -> List[str]:
105 """
106 Callback method for the Chapter object.
107 Loads the correct image URL for a page
108 :param _self: The chapter that calls this method
109 :param url: The base chapter URL
110 :return: The page image URLs
111 """
112 scraper = cfscrape.create_scraper()
113 resp = scraper.get(url)
115 if resp.status_code >= 300:
116 _self.logger.warning("Unsuccessful request ({})"
117 .format(resp.status_code))
118 _self.logger.debug(resp.text)
119 return []
121 chapter_info = json.loads(resp.text)
122 image_urls = []
124 server = chapter_info["server"]
125 if server == "/data/":
126 server = "CF!https://mangadex.org/data/" # Cloudflare protected
128 chapter_hash = chapter_info["hash"]
129 base_url = server + chapter_hash + "/"
131 for page in chapter_info["page_array"]:
132 image_urls.append(base_url + page)
134 return image_urls