ThemesMoeParser.kt
/*
Copyright 2016 Hermann Krumrey <hermann@krumreyh.com>
This file is part of anitheme-dl.
anitheme-dl is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
anitheme-dl is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with anitheme-dl. If not, see <http://www.gnu.org/licenses/>.
*/
package net.namibsun.anitheme.dl.lib.parsing
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.jsoup.nodes.Element
import org.jsoup.parser.Parser
import java.util.logging.Logger
/**
* ThemesMoeParser is a class that parses [themes.moe](https://themes.moe).
*
* The parser can be configured in a variety of ways to deliver different results
* using a variety of optional parameters, which all default to true
*
* For example, the class may be called with a simple
*
* ThemesMoeParser()
*
*
* but say one wants to only include Openings
*
* ThemesMoeParser(includeOp = true,
* includeEd = false)
*
* or:
*
* ThemesMoeParser(includeEd = false)
*
* @param includeOp Specifies if opening themes should be included in the result
* @param includeEd Specifes if the ending themes should be included in the result
*/
class ThemesMoeParser
constructor(
val includeOp: Boolean = true,
val includeEd: Boolean = true
) {
val logger: Logger = Logger.getLogger("ThemesMoeParser")
/**
* The base URL for the [themes.moe](https://themes.moe) PHP API
*
* Used for all POST requests
*/
val baseApiUrl = "https://themes.moe/includes"
/**
* Fetches all series for a user on one of the list services supported by
* [themes.moe](https://themes.moe).
*
* A basic usage example would be:
*
* ThemesMoeParser().fetchUserList("namboy94", MYANIMELIST)
*
* This will fetch all series for the user "namboy94" using the myanimelst
* service of [themes.moe](https://themes.moe)
*
* @param username The username for which to retrieve the list for
* @param listType The type of list to search for. Must be in the [ListTypes] enum
* @return A List of [Series] objects found while parsing the result from [themes.moe](https://themes.moe)
*/
fun fetchUserList(username: String, listType: ListTypes): List<Series> {
this.logger.info("Fetching ${listType.name} list for user $username")
val request = Jsoup.connect("${this.baseApiUrl}/get_list.php")
.data("username", username)
.data("list", listType.value).post()
return this.parseTable(request)
}
/**
* Fetches all series in a generated [themes.moe](https://themes.moe) playlist
*
* The playlist is identified by the unique id that [themes.moe](https://themes.moe)
* assigns to the playlist
*
* A basic usage example:
*
* ThemesMoeParser().fetchPlayList(15214) // Fetches all series for the playlist 15214
*
* @param playListId The unique Playlist ID
* @return A List of [Series] objects found while parsing the result from [themes.moe](https://themes.moe)
*/
fun fetchPlayList(playListId: Int): List<Series> {
this.logger.info("Fetching Playlist $playListId.")
val request = Jsoup.connect("${this.baseApiUrl}/create_playlist.php").data("plist", "$playListId").post()
return this.parseTable(normalizeTable(request))
}
/**
* Fetches all series for a specified season of anime
*
* To do this, both a season and year parameter are required
* Of course, only seasonal lists that exist on [themes.moe](https://themes.moe)
* can be fetched.
*
* A basic usage example:
*
* ThemesMoeParser().fetchSeasonList(2017, Seasons.WINTER) // Fetches all series from the 2017 winter season
*
* @param year The year for which to fetch the seasonal list
* @param season The season for which to fetch the seasonal list
* @return A List of [Series] objects found while parsing the result from [themes.moe](https://themes.moe)
*/
fun fetchSeasonList(year: Int, season: Seasons): List<Series> {
this.logger.info("Fetching Season ${season.name} for year $year")
val request = Jsoup.connect("${this.baseApiUrl}/specific_list.php")
.data("y", "$year")
.data("s", season.value).post()
return this.parseTable(request)
}
/**
* Fetches the currently popular series from the popular list on [themes.moe](https://themes.moe)
*
* This list is of course subject to change whenever the popularity of theme songs
* on [themes.moe](https://themes.moe) changes.
*
* Basic usage example:
*
* ThemesMoeParser().fetchPopularList // Fetches all series from the popular list
*/
fun fetchPopularList(): List<Series> {
this.logger.info("Fetching popular series.")
val request = Jsoup.connect("${this.baseApiUrl}/specific_list.php").data("id", "1").post()
return this.parseTable(request)
}
/**
* Searches [themes.moe](https://themes.moe) for opening and ending songs
*
* This method emulates the search form on [themes.moe](https://themes.moe)
*
* A basic usage example:
*
* ThemesMoeParser().search("One Punch Man") // Fetches all search results for 'One Punch Man'
*
* @param query The search term to search for
* @return A List of [Series] objects found while parsing the result from [themes.moe](https://themes.moe)
*/
fun search(query: String): List<Series> {
this.logger.info("Searching for: $query")
val request = Jsoup.connect("${this.baseApiUrl}/anime_search.php")
.data("search", "-1")
.data("name", query).post()
return this.parseTable(normalizeTable(request))
}
/**
* Parses a table Element from [themes.moe](https://themes.moe)
*
* The list tables from [themes.moe](https://themes.moe) contain 'td' elements,
* which each contain two 'tr' elements. The first one of these specifies the
* information about a particular series, like the name and/or [myanimelist.net](https://myanimelist.net)
* or [hummingbird.me](https://hummingbird.me) URL.
*
* The second 'tr' element contains multiple theme song elements, that each have a description and
* a video file URL.
*
* @param request The request to parse, no selects should be called before calling this method
* @return A List of [Series] objects generated while parsing the table
*/
private fun parseTable(request: Document): List<Series> {
val history = mutableListOf("")
this.logger.fine("HTML Data to parse:\n$request")
val series: MutableList<Series> = mutableListOf()
val table = request.select("tbody").select("tr")
for (entry in table) {
val name = entry.select("td")[0].text()
this.logger.info("Parsing $name")
if (name in history) {
this.logger.info("Skipping Series, already in history.")
continue
} else {
this.logger.info("Adding $name to history")
history.add(name)
val themes = parseEntries(entry)
if (themes.isNotEmpty()) {
series.add(Series(name, themes))
this.logger.info("Adding series $name with ${themes.size} themes")
} else {
this.logger.info("Skipping Series $name. No valid themes found")
}
}
}
return series
}
/**
* Separately parses the links of a series
*
* This is done by parsing the second 'td' element and using the 'a' tags to retrieve the
* description and video URL for a Series
*
* @param entry The 'tr' element of the series
* @return A List of Theme objects generated by the parser
*/
private fun parseEntries(entry: Element): List<Theme> {
val parts = entry.select("td")[1].select("a")
val themes: MutableList<Theme> = mutableListOf()
for (theme in parts) {
val description = theme.text()
val url = theme.attr("href")
if (!this.includeOp && description.toUpperCase().startsWith("OP")) {
this.logger.info("Skipping $description because Openings are disabled")
continue
} else if (!this.includeEd && description.toUpperCase().startsWith("ED")) {
this.logger.info("Skipping $description because Endings are disabled")
continue
} else {
this.logger.info("Adding Theme: {$description: $url}")
themes.add(Theme(description, url))
}
}
return themes
}
/**
* OK, so this is a weird one.
*
* The 'create_playlist.php' and 'anime_search.php' API endpoints return some weird form of malformed HTML.
* It contains '["', and everything after it is Html-entity encoded.
*
* This method tries to format the POST request result
*
* @param request The request document to normalize
* @return The normalized request
*/
private fun normalizeTable(request: Document): Document {
val normalized = Parser.unescapeEntities(request.toString(), true)
.replace("\\/", "/")
.replace("\\\"", "")
.replace("[\"", "")
return Jsoup.parse(normalized)
}
}