Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1"""LICENSE 

2Copyright 2015 Hermann Krumrey <hermann@krumreyh.com> 

3 

4This file is part of manga-dl. 

5 

6manga-dl is free software: you can redistribute it and/or modify 

7it under the terms of the GNU General Public License as published by 

8the Free Software Foundation, either version 3 of the License, or 

9(at your option) any later version. 

10 

11manga-dl is distributed in the hope that it will be useful, 

12but WITHOUT ANY WARRANTY; without even the implied warranty of 

13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

14GNU General Public License for more details. 

15 

16You should have received a copy of the GNU General Public License 

17along with manga-dl. If not, see <http://www.gnu.org/licenses/>. 

18LICENSE""" 

19 

20import re 

21import json 

22import cfscrape 

23from typing import List 

24from manga_dl.entities.Chapter import Chapter 

25from manga_dl.scrapers.Scraper import Scraper 

26 

27 

28class MangaDexScraper(Scraper): 

29 """ 

30 Scraper for mangadex.org 

31 """ 

32 

33 @classmethod 

34 def name(cls) -> str: 

35 """ 

36 :return: The name of the scraper 

37 """ 

38 return "mangadex" 

39 

40 @classmethod 

41 def url_matches(cls, url: str) -> bool: 

42 """ 

43 Checks whether or not an URL matches for the scraper 

44 :param url: The URL to check 

45 :return: Whether the URL is valid 

46 """ 

47 return bool(re.match(r"^https://mangadex.org/title/[0-9]+", url)) 

48 

49 def generate_url(self, _id: str) -> str: 

50 """ 

51 Generates an URL based on an ID 

52 :param _id: The ID to use 

53 :return: The generated URL 

54 """ 

55 return "https://mangadex.org/title/" + _id 

56 

57 def _load_chapters(self, url: str) -> List[Chapter]: 

58 """ 

59 Loads the chapters from mangadex.org 

60 :param url: The URL to scrape 

61 :return: The chapters found for the series 

62 """ 

63 scraper = cfscrape.create_scraper() 

64 

65 mangadex_id = url.split("https://mangadex.org/title/")[1].split("/")[0] 

66 manga_url = "https://mangadex.org/api/manga/" + str(mangadex_id) 

67 

68 resp = scraper.get(manga_url) 

69 

70 if resp.status_code >= 300: 

71 self.logger.warning("Unsuccessful request ({})" 

72 .format(resp.status_code)) 

73 self.logger.debug(resp.text) 

74 return [] 

75 

76 series_info = json.loads(resp.text) 

77 series_title = series_info["manga"]["title"] 

78 chapter_list = series_info.get("chapter", {}) 

79 

80 if self.destination is None: 

81 destination = series_title 

82 else: 

83 destination = self.destination 

84 

85 chapters = [] 

86 

87 for chapter_id, chapter in chapter_list.items(): 

88 chapter_url = "https://mangadex.org/api/chapter/" + str(chapter_id) 

89 chapters.append(Chapter( 

90 chapter_url, 

91 chapter["lang_code"], 

92 series_title, 

93 chapter["chapter"], 

94 destination, 

95 self.format, 

96 self.get_image_pages, 

97 chapter["title"], 

98 chapter["group_name"] 

99 )) 

100 

101 return chapters 

102 

103 @staticmethod 

104 def get_image_pages(_self: Chapter, url: str) -> List[str]: 

105 """ 

106 Callback method for the Chapter object. 

107 Loads the correct image URL for a page 

108 :param _self: The chapter that calls this method 

109 :param url: The base chapter URL 

110 :return: The page image URLs 

111 """ 

112 scraper = cfscrape.create_scraper() 

113 resp = scraper.get(url) 

114 

115 if resp.status_code >= 300: 

116 _self.logger.warning("Unsuccessful request ({})" 

117 .format(resp.status_code)) 

118 _self.logger.debug(resp.text) 

119 return [] 

120 

121 chapter_info = json.loads(resp.text) 

122 image_urls = [] 

123 

124 server = chapter_info["server"] 

125 if server == "/data/": 

126 server = "CF!https://mangadex.org/data/" # Cloudflare protected 

127 

128 chapter_hash = chapter_info["hash"] 

129 base_url = server + chapter_hash + "/" 

130 

131 for page in chapter_info["page_array"]: 

132 image_urls.append(base_url + page) 

133 

134 return image_urls