Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""LICENSE
2Copyright 2015 Hermann Krumrey <hermann@krumreyh.com>
4This file is part of toktokkie.
6toktokkie is free software: you can redistribute it and/or modify
7it under the terms of the GNU General Public License as published by
8the Free Software Foundation, either version 3 of the License, or
9(at your option) any later version.
11toktokkie is distributed in the hope that it will be useful,
12but WITHOUT ANY WARRANTY; without even the implied warranty of
13MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14GNU General Public License for more details.
16You should have received a copy of the GNU General Public License
17along with toktokkie. If not, see <http://www.gnu.org/licenses/>.
18LICENSE"""
20import os
21import json
22import time
23import logging
24from bs4 import BeautifulSoup
25from typing import List, Optional, Tuple, Dict, Any
26from puffotter.subprocess import execute_command
27from puffotter.requests import aggressive_request
28from puffotter.os import replace_illegal_ntfs_chars
29from anime_list_apis.api.AnilistApi import AnilistApi
30from anime_list_apis.models.attributes.MediaType import MediaType
33class AniTheme:
34 """
35 Class that contains all relevant information for an anime theme song.
36 Uses reddit and myanimelist to retrieve all the information
37 # TODO Integrate AniDB, since it has way more info on artists etc
38 """
40 mal_cache = {} # type: Dict[int, Dict[str, Any]]
41 """
42 Cache for repeated myanimelist requests
43 """
45 logger = logging.getLogger(__name__)
46 """
47 Logger for this class
48 """
50 def __init__(
51 self,
52 show_name: str,
53 mal_id: int,
54 theme_type: str,
55 song_name: str,
56 episodes: str,
57 media_url: str
58 ):
59 """
60 Initializes the anime theme. Loads missing data from the internet
61 while initializing, so this might take up to a couple of seconds.
62 :param show_name: The name of the show this songe is a theme song for
63 :param mal_id: The myanimelist ID of that show
64 :param theme_type: The type of theme (example: OP, ED)
65 :param song_name: The name of the song
66 :param episodes: During which episodes the theme song played
67 :param media_url: The URL to the video of this theme song
68 """
69 self.logger.info("Initializing {}".format(song_name))
71 self.show_name = replace_illegal_ntfs_chars(show_name)
72 self.mal_id = mal_id
73 self.anilist_id = AnilistApi().get_anilist_id_from_mal_id(
74 MediaType.ANIME, self.mal_id
75 )
76 self._theme_type = theme_type
77 if "OP" in theme_type:
78 self.theme_type = "OP"
79 elif "ED" in theme_type:
80 self.theme_type = "ED"
81 else:
82 self.theme_type = "Unknown"
84 self.alternate_version = False
85 if "v" in self._theme_type.lower():
86 self.alternate_version = "v1" not in self._theme_type.lower()
88 self.song_name = replace_illegal_ntfs_chars(song_name)
89 self.episodes = episodes
90 self.media_url = media_url
92 self.filename = "{} {} - {}".format(
93 self.show_name, theme_type, self.song_name
94 )
96 self.temp_webm_file = os.path.join("/tmp", self.filename + ".webm")
97 self.temp_mp3_file = os.path.join("/tmp", self.filename + ".mp3")
98 self.temp_cover_file = os.path.join("/tmp", self.filename + ".png")
100 self.mal_title, self.artist = self.__load_song_info()
101 self.mal_title = replace_illegal_ntfs_chars(self.mal_title)
102 self.artist = replace_illegal_ntfs_chars(self.artist)
104 if song_name.lower() not in self.mal_title.lower():
105 self.logger.warning("Song title mismatch: [{}!={}]".format(
106 song_name, self.mal_title
107 ))
108 if self.artist == "Unknown" or self.artist.strip() == "":
109 self.logger.warning("Unknown artist")
111 self.logger.info(self)
113 def __str__(self) -> str:
114 """
115 :return: A string representation of the object
116 """
117 return "{} {}, Title: \"{}\", Artist: \"{}\", Eps: \"{}\"".format(
118 self.show_name,
119 self._theme_type,
120 self.song_name,
121 self.artist,
122 self.episodes
123 )
125 @classmethod
126 def load_reddit_anithemes_wiki_info(
127 cls,
128 year: int,
129 season: str,
130 whitelist: Optional[List[str]] = None
131 ) -> List["AniTheme"]:
132 """
133 Loads all theme songs for a specific season and year
134 CAUTION: Currently season does nothing, it is recommended to use
135 the whitelist parameter to limit the requests done
136 :param year: The year for which to fetch the theme songs
137 :param season: The season for which to fetch the theme songs
138 :param whitelist: If provided, will ignore any series that are not
139 contained in the whitelist
140 :return: The fetched AniTheme objects
141 """
143 cls.logger.info("Loading theme info for {} {}".format(season, year))
145 url = "https://old.reddit.com/r/AnimeThemes/wiki/" \
146 "{}#wiki_{}_{}_season".format(year, year, season)
147 response = aggressive_request(url)
149 soup = BeautifulSoup(response, "html.parser")
150 listings = soup.find("div", {"class": "md wiki"})
152 children = list(listings.children)
154 while children[0].name != "h3":
155 children.pop(0)
157 current_title = ""
158 current_mal_id = 0
159 current_tables = [] # type: List[BeautifulSoup]
160 themes = [] # type: List[AniTheme]
161 while len(children) > 0:
162 element = children.pop(0)
164 if element.name == "h3":
165 if current_title != "" \
166 and (whitelist is None or current_title in whitelist):
167 print("Loading themes for {}".format(current_title))
169 data = [] # type: List[AniTheme]
170 while len(current_tables) > 0:
171 data += cls.__parse_reddit_wiki_table(
172 current_title,
173 current_mal_id,
174 current_tables.pop(0)
175 )
176 themes += data
178 current_title = element.text
179 current_tables = []
180 mal_url = element.find_all("a")[0]["href"]
181 if mal_url.endswith("/"):
182 mal_url = mal_url[0:-1]
183 try:
184 current_mal_id = int(mal_url.rsplit("/", 1)[1])
185 except ValueError:
186 current_mal_id = int(mal_url.rsplit("/", 2)[1])
188 elif element.name == "table":
189 current_tables.append(element)
191 return themes
193 @classmethod
194 def __parse_reddit_wiki_table(
195 cls,
196 title: str,
197 mal_id: int,
198 table
199 ) -> List["AniTheme"]:
200 """
201 Parses a table from reddit and generates AniTheme objects from them
202 :param title: The title of the series associated with the table
203 :param mal_id: The myanimelist ID of the series
204 :param table: The table to parse
205 :return: A list of AniTheme objects parsed from the content of
206 the table
207 """
208 data = []
210 for row in table.find_all("tr"):
211 columns = row.find_all("td")
212 if len(columns) == 0:
213 continue
214 description = columns[0].text
216 try:
217 link = columns[1].find("a")["href"]
218 except TypeError: # Avoid missing links
219 continue
221 if not description:
222 continue
224 data.append(cls(
225 show_name=title,
226 theme_type=description.split("\"", 1)[0].strip(),
227 song_name=description.split("\"", 1)[1].rsplit("\"", 1)[0],
228 media_url=link,
229 mal_id=mal_id,
230 episodes=columns[2].text
231 ))
232 return data
234 def __load_song_info(self) -> Tuple[str, str]:
235 """
236 Loads song information using myanimelist (like artist etc)
237 :return: A tuple consisting of the song title and the artist.
238 If it was not possible to figure out using myanimelist data,
239 both will be "Unknown"
240 """
241 self.logger.info("Loading song data using myanimelist")
243 url = "https://api.jikan.moe/v3/anime/{}".format(self.mal_id)
244 resp = aggressive_request(url)
246 if self.mal_id is AniTheme.mal_cache:
247 info = AniTheme.mal_cache[self.mal_id]
248 else:
249 info = json.loads(resp)
250 AniTheme.mal_cache[self.mal_id] = info
252 if self.theme_type == "OP":
253 prefix = "opening"
254 elif self.theme_type == "ED":
255 prefix = "ending"
256 else:
257 return "Unknown", "Unknown"
259 song_number = self._theme_type\
260 .lower()\
261 .replace("op", "")\
262 .replace("ed", "")\
263 .split("v")[0]\
264 .strip()
266 if song_number == "":
267 song_number = "1"
268 song_index = int(song_number) - 1
270 songs = info[prefix + "_themes"]
271 self.logger.debug(songs)
272 self.logger.debug("Using index {} ({})"
273 .format(song_index, self._theme_type))
275 try:
276 song_info = songs[song_index]
277 self.logger.debug(song_info)
279 splitted = song_info.split("\"", 2)
280 if len(splitted) == 2:
281 splitted = [""] + splitted[1].replace("\"", "").split(" by ")
282 title = splitted[1]
283 artist = splitted[2].replace("by ", "").strip()
284 return title, artist
285 except IndexError:
286 return "Unknown", "Unknown"
288 def download_webm(self):
289 """
290 Downloads the .webm video of the anime theme song to the tmp directory
291 :return: None
292 """
293 command = ["curl", "-o", self.temp_webm_file, self.media_url]
295 if os.path.exists(self.temp_webm_file) \
296 and os.path.getsize(self.temp_webm_file) > 1000:
297 # Skip existing file
298 return
300 retry_count = 0
301 while execute_command(command) != 0:
303 if retry_count > 3:
304 self.logger.warning("File download failed")
305 return
306 retry_count += 1
308 self.logger.warning("Couldn't download theme, retrying...")
309 time.sleep(15)
311 def convert_to_mp3(self):
312 """
313 Converts the previously downloaded webm file to mp3
314 :return: None
315 """
316 command = [
317 "ffmpeg",
318 "-i", self.temp_webm_file,
319 "-vn",
320 "-ab", "160k",
321 "-ar", "44100",
322 "-y", self.temp_mp3_file
323 ]
325 if not os.path.exists(self.temp_mp3_file):
326 execute_command(command)