Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve hitomi.la support #3720

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions gallery_dl/extractor/hitomi.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,22 @@
import re


def get_nozomi_args(query):
ns, tag = query.strip().split(":")
area = ns
language = "all"

if ns == "female" or ns == "male":
area = "tag"
tag = query
elif "language" == ns:
area = None
language = tag
tag = "index"

return area, tag, language


class HitomiGalleryExtractor(GalleryExtractor):
"""Extractor for image galleries from hitomi.la"""
category = "hitomi"
Expand Down Expand Up @@ -140,6 +156,53 @@ def images(self, _):
return result


class HitomiIndexExtractor(Extractor):
"""Extractor for galleries from index searches on hitomi.la"""
category = "hitomi"
subcategory = "index"
root = "https://hitomi.la"
pattern = (r"(?:https?://)?hitomi\.la/"
r"([a-zA-Z0-9_]+)-([a-zA-Z0-9_]+)\.html")
test = (
("https://hitomi.la/index-japanese.html", {
"pattern": HitomiGalleryExtractor.pattern,
"count": ">= 35",
}),
)

def __init__(self, match):
Extractor.__init__(self, match)
self.tag, self.language = match.groups()

def items(self):
data = {"_extractor": HitomiGalleryExtractor}
nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(self.tag, self.language)
headers = {
"Origin": self.root,
"Cache-Control": "max-age=0",
}

offset = 0
total = None
while True:
headers["Referer"] = "{}/{}-{}.html?page={}".format(
self.root, self.tag, self.language, offset // 100 + 1)
headers["Range"] = "bytes={}-{}".format(offset, offset+99)
response = self.request(nozomi_url, headers=headers)

for gallery_id in decode_nozomi(response.content):
gallery_url = "{}/galleries/{}.html".format(
self.root, gallery_id)
yield Message.Queue, gallery_url, data

offset += 100
if total is None:
total = text.parse_int(
response.headers["content-range"].rpartition("/")[2])
if offset >= total:
return


class HitomiTagExtractor(Extractor):
"""Extractor for galleries from tag searches on hitomi.la"""
category = "hitomi"
Expand Down Expand Up @@ -198,6 +261,63 @@ def items(self):
return


class HitomiSearchExtractor(Extractor):
"""Extractor for galleries from multiple tag searches on hitomi.la"""
category = "hitomi"
subcategory = "search"
root = "https://hitomi.la"
pattern = (r"(?:https?://)?hitomi\.la/search.html"
r"\?([^/?#]+)")
test = (
("https://hitomi.la/search.html?tag%3Ascreenshots%20language%3Ajapanese", {
"pattern": HitomiGalleryExtractor.pattern,
"count": ">= 35",
}),
("https://hitomi.la/search.html?language%3Ajapanese%20artist%3Asumiya"),
("https://hitomi.la/search.html?group:initial_g"),
("https://hitomi.la/search.html?series:amnesia"),
("https://hitomi.la/search.html?type%3Adoujinshi"),
("https://hitomi.la/search.html?character%3Aa2"),
)

def __init__(self, match):
Extractor.__init__(self, match)
self.query = match.group(1)
self.tags = text.unquote(self.query).split(" ")

def get_nozomi_items(self, full_tag):
area, tag, language = get_nozomi_args(full_tag)

if area:
referer_base = "{}/n/{}/{}-{}.html".format(self.root, area, tag, language)
nozomi_url = "https://ltn.hitomi.la/{}/{}-{}.nozomi".format(area, tag, language)
else:
referer_base = "{}/n/{}-{}.html".format(self.root, tag, language)
nozomi_url = "https://ltn.hitomi.la/{}-{}.nozomi".format(tag, language)

headers = {
"Origin": self.root,
"Cache-Control": "max-age=0",
}

headers["Referer"] = f"{referer_base}/search.html?{self.query}"
response = self.request(nozomi_url, headers=headers)

result = set(decode_nozomi(response.content))
return result

def items(self):
data = {"_extractor": HitomiGalleryExtractor}

results = [self.get_nozomi_items(tag) for tag in self.tags]
intersects = set.intersection(*results)

for gallery_id in sorted(intersects, reverse=True):
gallery_url = "{}/galleries/{}.html".format(
self.root, gallery_id)
yield Message.Queue, gallery_url, data


@memcache(maxage=1800)
def _parse_gg(extr):
page = extr.request("https://ltn.hitomi.la/gg.js").text
Expand Down