Coverage for manga_dl/scrapers/mangadex.py: 23%

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""LICENSE

4This file is part of manga-dl.

6manga-dl is free software: you can redistribute it and/or modify

7it under the terms of the GNU General Public License as published by

8the Free Software Foundation, either version 3 of the License, or

9(at your option) any later version.

11manga-dl is distributed in the hope that it will be useful,

12but WITHOUT ANY WARRANTY; without even the implied warranty of

13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

14GNU General Public License for more details.

16You should have received a copy of the GNU General Public License

17along with manga-dl. If not, see <http://www.gnu.org/licenses/>.

18LICENSE"""

20import re

21import json

22import cfscrape

23from typing import List

24from manga_dl.entities.Chapter import Chapter

25from manga_dl.scrapers.Scraper import Scraper

28class MangaDexScraper(Scraper):

29 """

30 Scraper for mangadex.org

31 """

33 @classmethod

34 def name(cls) -> str:

35 """

36 :return: The name of the scraper

37 """

38 return "mangadex"

40 @classmethod

41 def url_matches(cls, url: str) -> bool:

42 """

43 Checks whether or not an URL matches for the scraper

44 :param url: The URL to check

45 :return: Whether the URL is valid

46 """

47 return bool(re.match(r"^https://mangadex.org/title/[0-9]+", url))

49 def generate_url(self, _id: str) -> str:

50 """

51 Generates an URL based on an ID

52 :param _id: The ID to use

53 :return: The generated URL

54 """

55 return "https://mangadex.org/title/" + _id

57 def _load_chapters(self, url: str) -> List[Chapter]:

58 """

59 Loads the chapters from mangadex.org

60 :param url: The URL to scrape

61 :return: The chapters found for the series

62 """

63 scraper = cfscrape.create_scraper()

65 mangadex_id = url.split("https://mangadex.org/title/")[1].split("/")[0]

66 manga_url = "https://mangadex.org/api/manga/" + str(mangadex_id)

68 resp = scraper.get(manga_url)

70 if resp.status_code >= 300:

71 self.logger.warning("Unsuccessful request ({})"

72 .format(resp.status_code))

73 self.logger.debug(resp.text)

74 return []

76 series_info = json.loads(resp.text)

77 series_title = series_info["manga"]["title"]

78 chapter_list = series_info.get("chapter", {})

80 if self.destination is None:

81 destination = series_title

82 else:

83 destination = self.destination

85 chapters = []

87 for chapter_id, chapter in chapter_list.items():

88 chapter_url = "https://mangadex.org/api/chapter/" + str(chapter_id)

89 chapters.append(Chapter(

90 chapter_url,

91 chapter["lang_code"],

92 series_title,

93 chapter["chapter"],

94 destination,

95 self.format,

96 self.get_image_pages,

97 chapter["title"],

98 chapter["group_name"]

99 ))

100

101 return chapters

102

103 @staticmethod

104 def get_image_pages(_self: Chapter, url: str) -> List[str]:

105 """

106 Callback method for the Chapter object.

107 Loads the correct image URL for a page

108 :param _self: The chapter that calls this method

109 :param url: The base chapter URL

110 :return: The page image URLs

111 """

112 scraper = cfscrape.create_scraper()

113 resp = scraper.get(url)

114

115 if resp.status_code >= 300:

116 _self.logger.warning("Unsuccessful request ({})"

117 .format(resp.status_code))

118 _self.logger.debug(resp.text)

119 return []

120

121 chapter_info = json.loads(resp.text)

122 image_urls = []

123

124 server = chapter_info["server"]

125 if server == "/data/":

126 server = "CF!https://mangadex.org/data/" # Cloudflare protected

127

128 chapter_hash = chapter_info["hash"]

129 base_url = server + chapter_hash + "/"

130

131 for page in chapter_info["page_array"]:

132 image_urls.append(base_url + page)

133

134 return image_urls

Coverage for manga_dl/scrapers/mangadex.py : 23%

53 statements 15 run 38 missing 0 excluded 0 partial