diff --git a/src/library/archive_entry.cr b/src/library/archive_entry.cr new file mode 100644 index 0000000..cd63f17 --- /dev/null +++ b/src/library/archive_entry.cr @@ -0,0 +1,111 @@ +require "yaml" + +require "./entry" + +class ArchiveEntry < Entry + include YAML::Serializable + + getter zip_path : String + + def initialize(@zip_path, @book) + storage = Storage.default + @path = @zip_path + @encoded_path = URI.encode @zip_path + @title = File.basename @zip_path, File.extname @zip_path + @encoded_title = URI.encode @title + @size = (File.size @zip_path).humanize_bytes + id = storage.get_entry_id @zip_path, File.signature(@zip_path) + if id.nil? + id = random_str + storage.insert_entry_id({ + path: @zip_path, + id: id, + signature: File.signature(@zip_path).to_s, + }) + end + @id = id + @mtime = File.info(@zip_path).modification_time + + unless File.readable? @zip_path + @err_msg = "File #{@zip_path} is not readable." + Logger.warn "#{@err_msg} Please make sure the " \ + "file permission is configured correctly." + return + end + + archive_exception = validate_archive @zip_path + unless archive_exception.nil? + @err_msg = "Archive error: #{archive_exception}" + Logger.warn "Unable to extract archive #{@zip_path}. " \ + "Ignoring it. #{@err_msg}" + return + end + + file = ArchiveFile.new @zip_path + @pages = file.entries.count do |e| + SUPPORTED_IMG_TYPES.includes? \ + MIME.from_filename? e.filename + end + file.close + end + + private def sorted_archive_entries + ArchiveFile.open @zip_path do |file| + entries = file.entries + .select { |e| + SUPPORTED_IMG_TYPES.includes? \ + MIME.from_filename? e.filename + } + .sort! { |a, b| + compare_numerically a.filename, b.filename + } + yield file, entries + end + end + + def read_page(page_num) + raise "Unreadble archive. #{@err_msg}" if @err_msg + img = nil + begin + sorted_archive_entries do |file, entries| + page = entries[page_num - 1] + data = file.read_entry page + if data + img = Image.new data, MIME.from_filename(page.filename), + page.filename, data.size + end + end + rescue e + Logger.warn "Unable to read page #{page_num} of #{@zip_path}. Error: #{e}" + end + img + end + + def page_dimensions + sizes = [] of Hash(String, Int32) + sorted_archive_entries do |file, entries| + entries.each_with_index do |e, i| + begin + data = file.read_entry(e).not_nil! + size = ImageSize.get data + sizes << { + "width" => size.width, + "height" => size.height, + } + rescue e + Logger.warn "Failed to read page #{i} of entry #{zip_path}. #{e}" + sizes << {"width" => 1000_i32, "height" => 1000_i32} + end + end + end + sizes + end + + def examine : Bool + File.exists? @zip_path + end + + def self.is_valid?(path : String) : Bool + is_supported_file path + end +end diff --git a/src/library/cache.cr b/src/library/cache.cr index 10e4f60..f35af8b 100644 --- a/src/library/cache.cr +++ b/src/library/cache.cr @@ -76,8 +76,8 @@ class SortedEntriesCacheEntry < CacheEntry(Array(String), Array(Entry)) entries : Array(Entry), opt : SortOptions?) entries_sig = Digest::SHA1.hexdigest (entries.map &.id).to_s user_context = opt && opt.method == SortMethod::Progress ? username : "" - sig = Digest::SHA1.hexdigest (book_id + entries_sig + user_context + - (opt ? opt.to_tuple.to_s : "nil")) + sig = Digest::SHA1.hexdigest(book_id + entries_sig + user_context + + (opt ? opt.to_tuple.to_s : "nil")) "#{sig}:sorted_entries" end end @@ -101,8 +101,8 @@ class SortedTitlesCacheEntry < CacheEntry(Array(String), Array(Title)) def self.gen_key(username : String, titles : Array(Title), opt : SortOptions?) titles_sig = Digest::SHA1.hexdigest (titles.map &.id).to_s user_context = opt && opt.method == SortMethod::Progress ? username : "" - sig = Digest::SHA1.hexdigest (titles_sig + user_context + - (opt ? opt.to_tuple.to_s : "nil")) + sig = Digest::SHA1.hexdigest(titles_sig + user_context + + (opt ? opt.to_tuple.to_s : "nil")) "#{sig}:sorted_titles" end end diff --git a/src/library/dir_entry.cr b/src/library/dir_entry.cr new file mode 100644 index 0000000..0ce4e71 --- /dev/null +++ b/src/library/dir_entry.cr @@ -0,0 +1,132 @@ +require "yaml" + +require "./entry" + +class DirEntry < Entry + include YAML::Serializable + + getter dir_path : String + + @[YAML::Field(ignore: true)] + @sorted_files : Array(String)? + + @signature : String + + def initialize(@dir_path, @book) + storage = Storage.default + @path = @dir_path + @encoded_path = URI.encode @dir_path + @title = File.basename @dir_path + @encoded_title = URI.encode @title + + unless File.readable? @dir_path + @err_msg = "Directory #{@dir_path} is not readable." + Logger.warn "#{@err_msg} Please make sure the " \ + "file permission is configured correctly." + return + end + + unless DirEntry.is_valid? @dir_path + @err_msg = "Directory #{@dir_path} is not valid directory entry." + Logger.warn "#{@err_msg} Please make sure the " \ + "directory has valid images." + return + end + + size_sum = 0 + sorted_files.each do |file_path| + size_sum += File.size file_path + end + @size = size_sum.humanize_bytes + + @signature = Dir.directory_entry_signature @dir_path + id = storage.get_entry_id @dir_path, @signature + if id.nil? + id = random_str + storage.insert_entry_id({ + path: @dir_path, + id: id, + signature: @signature, + }) + end + @id = id + + @mtime = sorted_files.map do |file_path| + File.info(file_path).modification_time + end.max + @pages = sorted_files.size + end + + def read_page(page_num) + img = nil + begin + files = sorted_files + file_path = files[page_num - 1] + data = File.read(file_path).to_slice + if data + img = Image.new data, MIME.from_filename(file_path), + File.basename(file_path), data.size + end + rescue e + Logger.warn "Unable to read page #{page_num} of #{@dir_path}. Error: #{e}" + end + img + end + + def page_dimensions + sizes = [] of Hash(String, Int32) + sorted_files.each_with_index do |path, i| + data = File.read(path).to_slice + begin + data.not_nil! + size = ImageSize.get data + sizes << { + "width" => size.width, + "height" => size.height, + } + rescue e + Logger.warn "Failed to read page #{i} of entry #{@dir_path}. #{e}" + sizes << {"width" => 1000_i32, "height" => 1000_i32} + end + end + sizes + end + + def examine : Bool + existence = File.exists? @dir_path + return false unless existence + files = DirEntry.image_files @dir_path + signature = Dir.directory_entry_signature @dir_path + existence = files.size > 0 && @signature == signature + @sorted_files = nil unless existence + + # For more efficient, update a directory entry with new property + # and return true like Title.examine + existence + end + + def sorted_files + cached_sorted_files = @sorted_files + return cached_sorted_files if cached_sorted_files + @sorted_files = DirEntry.sorted_image_files @dir_path + @sorted_files.not_nil! + end + + def self.image_files(dir_path) + Dir.entries(dir_path) + .reject(&.starts_with? ".") + .map { |fn| File.join dir_path, fn } + .select { |fn| is_supported_image_file fn } + .reject { |fn| File.directory? fn } + .select { |fn| File.readable? fn } + end + + def self.sorted_image_files(dir_path) + self.image_files(dir_path) + .sort { |a, b| compare_numerically a, b } + end + + def self.is_valid?(path : String) : Bool + image_files(path).size > 0 + end +end diff --git a/src/library/entry.cr b/src/library/entry.cr index dd50ed3..16666ea 100644 --- a/src/library/entry.cr +++ b/src/library/entry.cr @@ -1,66 +1,55 @@ require "image_size" -require "yaml" -class Entry - include YAML::Serializable +private def node_has_key(node : YAML::Nodes::Mapping, key : String) + node.nodes + .map_with_index { |n, i| {n, i} } + .select(&.[1].even?) + .map(&.[0]) + .select(YAML::Nodes::Scalar) + .map(&.as(YAML::Nodes::Scalar).value) + .includes? key +end - getter zip_path : String, book : Title, title : String, - size : String, pages : Int32, id : String, encoded_path : String, - encoded_title : String, mtime : Time, err_msg : String? +abstract class Entry + getter id : String, book : Title, title : String, path : String, + size : String, pages : Int32, mtime : Time, + encoded_path : String, encoded_title : String, err_msg : String? - @[YAML::Field(ignore: true)] - @sort_title : String? + def initialize( + @id, @title, @book, @path, + @size, @pages, @mtime, + @encoded_path, @encoded_title, @err_msg + ) + end - def initialize(@zip_path, @book) - storage = Storage.default - @encoded_path = URI.encode @zip_path - @title = File.basename @zip_path, File.extname @zip_path - @encoded_title = URI.encode @title - @size = (File.size @zip_path).humanize_bytes - id = storage.get_entry_id @zip_path, File.signature(@zip_path) - if id.nil? - id = random_str - storage.insert_entry_id({ - path: @zip_path, - id: id, - signature: File.signature(@zip_path).to_s, - }) + def self.new(ctx : YAML::ParseContext, node : YAML::Nodes::Node) + unless node.is_a? YAML::Nodes::Mapping + raise "Unexpected node type in YAML" end - @id = id - @mtime = File.info(@zip_path).modification_time - - unless File.readable? @zip_path - @err_msg = "File #{@zip_path} is not readable." - Logger.warn "#{@err_msg} Please make sure the " \ - "file permission is configured correctly." - return + # Doing YAML::Any.new(ctx, node) here causes a weird error, so + # instead we are using a more hacky approach (see `node_has_key`). + # TODO: Use a more elegant approach + if node_has_key node, "zip_path" + ArchiveEntry.new ctx, node + elsif node_has_key node, "dir_path" + DirEntry.new ctx, node + else + raise "Unknown entry found in YAML cache. Try deleting the " \ + "`library.yml.gz` file" end - - archive_exception = validate_archive @zip_path - unless archive_exception.nil? - @err_msg = "Archive error: #{archive_exception}" - Logger.warn "Unable to extract archive #{@zip_path}. " \ - "Ignoring it. #{@err_msg}" - return - end - - file = ArchiveFile.new @zip_path - @pages = file.entries.count do |e| - SUPPORTED_IMG_TYPES.includes? \ - MIME.from_filename? e.filename - end - file.close end def build_json(*, slim = false) JSON.build do |json| json.object do - {% for str in %w(zip_path title size id) %} - json.field {{str}}, @{{str.id}} + {% for str in %w(path title size id) %} + json.field {{str}}, {{str.id}} {% end %} if err_msg json.field "err_msg", err_msg end + json.field "zip_path", path # for API backward compatability + json.field "path", path json.field "title_id", @book.id json.field "title_title", @book.title json.field "sort_title", sort_title @@ -74,6 +63,9 @@ class Entry end end + @[YAML::Field(ignore: true)] + @sort_title : String? + def sort_title sort_title_cached = @sort_title return sort_title_cached if sort_title_cached @@ -131,58 +123,6 @@ class Entry url end - private def sorted_archive_entries - ArchiveFile.open @zip_path do |file| - entries = file.entries - .select { |e| - SUPPORTED_IMG_TYPES.includes? \ - MIME.from_filename? e.filename - } - .sort! { |a, b| - compare_numerically a.filename, b.filename - } - yield file, entries - end - end - - def read_page(page_num) - raise "Unreadble archive. #{@err_msg}" if @err_msg - img = nil - begin - sorted_archive_entries do |file, entries| - page = entries[page_num - 1] - data = file.read_entry page - if data - img = Image.new data, MIME.from_filename(page.filename), - page.filename, data.size - end - end - rescue e - Logger.warn "Unable to read page #{page_num} of #{@zip_path}. Error: #{e}" - end - img - end - - def page_dimensions - sizes = [] of Hash(String, Int32) - sorted_archive_entries do |file, entries| - entries.each_with_index do |e, i| - begin - data = file.read_entry(e).not_nil! - size = ImageSize.get data - sizes << { - "width" => size.width, - "height" => size.height, - } - rescue e - Logger.warn "Failed to read page #{i} of entry #{zip_path}. #{e}" - sizes << {"width" => 1000_i32, "height" => 1000_i32} - end - end - end - sizes - end - def next_entry(username) entries = @book.sorted_entries username idx = entries.index self @@ -197,20 +137,6 @@ class Entry entries[idx - 1] end - def date_added - date_added = nil - TitleInfo.new @book.dir do |info| - info_da = info.date_added[@title]? - if info_da.nil? - date_added = info.date_added[@title] = ctime @zip_path - info.save - else - date_added = info_da - end - end - date_added.not_nil! # is it ok to set not_nil! here? - end - # For backward backward compatibility with v0.1.0, we save entry titles # instead of IDs in info.json def save_progress(username, page) @@ -290,7 +216,7 @@ class Entry end Storage.default.save_thumbnail @id, img rescue e - Logger.warn "Failed to generate thumbnail for file #{@zip_path}. #{e}" + Logger.warn "Failed to generate thumbnail for file #{path}. #{e}" end img @@ -299,4 +225,34 @@ class Entry def get_thumbnail : Image? Storage.default.get_thumbnail @id end + + def date_added : Time + date_added = Time::UNIX_EPOCH + TitleInfo.new @book.dir do |info| + info_da = info.date_added[@title]? + if info_da.nil? + date_added = info.date_added[@title] = ctime path + info.save + else + date_added = info_da + end + end + date_added + end + + # Hack to have abstract class methods + # https://github.com/crystal-lang/crystal/issues/5956 + private module ClassMethods + abstract def is_valid?(path : String) : Bool + end + + macro inherited + extend ClassMethods + end + + abstract def read_page(page_num) + + abstract def page_dimensions + + abstract def examine : Bool? end diff --git a/src/library/title.cr b/src/library/title.cr index e3d79d5..e9873f2 100644 --- a/src/library/title.cr +++ b/src/library/title.cr @@ -49,13 +49,18 @@ class Title path = File.join dir, fn if File.directory? path title = Title.new path, @id, cache - next if title.entries.size == 0 && title.titles.size == 0 - Library.default.title_hash[title.id] = title - @title_ids << title.id + unless title.entries.size == 0 && title.titles.size == 0 + Library.default.title_hash[title.id] = title + @title_ids << title.id + end + if DirEntry.is_valid? path + entry = DirEntry.new path, self + @entries << entry if entry.pages > 0 || entry.err_msg + end next end if is_supported_file path - entry = Entry.new path, self + entry = ArchiveEntry.new path, self @entries << entry if entry.pages > 0 || entry.err_msg end end @@ -127,12 +132,12 @@ class Title previous_entries_size = @entries.size @entries.select! do |entry| - existence = File.exists? entry.zip_path + existence = entry.examine Fiber.yield context["deleted_entry_ids"] << entry.id unless existence existence end - remained_entry_zip_paths = @entries.map &.zip_path + remained_entry_paths = @entries.map &.path is_titles_added = false is_entries_added = false @@ -140,29 +145,43 @@ class Title next if fn.starts_with? "." path = File.join dir, fn if File.directory? path + unless remained_entry_paths.includes? path + if DirEntry.is_valid? path + entry = DirEntry.new path, self + if entry.pages > 0 || entry.err_msg + @entries << entry + is_entries_added = true + context["deleted_entry_ids"].select! do |deleted_entry_id| + entry.id != deleted_entry_id + end + end + end + end + next if remained_title_dirs.includes? path title = Title.new path, @id, context["cached_contents_signature"] - next if title.entries.size == 0 && title.titles.size == 0 - Library.default.title_hash[title.id] = title - @title_ids << title.id - is_titles_added = true + unless title.entries.size == 0 && title.titles.size == 0 + Library.default.title_hash[title.id] = title + @title_ids << title.id + is_titles_added = true - # We think they are removed, but they are here! - # Cancel reserved jobs - revival_title_ids = [title.id] + title.deep_titles.map &.id - context["deleted_title_ids"].select! do |deleted_title_id| - !(revival_title_ids.includes? deleted_title_id) - end - revival_entry_ids = title.deep_entries.map &.id - context["deleted_entry_ids"].select! do |deleted_entry_id| - !(revival_entry_ids.includes? deleted_entry_id) + # We think they are removed, but they are here! + # Cancel reserved jobs + revival_title_ids = [title.id] + title.deep_titles.map &.id + context["deleted_title_ids"].select! do |deleted_title_id| + !(revival_title_ids.includes? deleted_title_id) + end + revival_entry_ids = title.deep_entries.map &.id + context["deleted_entry_ids"].select! do |deleted_entry_id| + !(revival_entry_ids.includes? deleted_entry_id) + end end next end if is_supported_file path - next if remained_entry_zip_paths.includes? path - entry = Entry.new path, self + next if remained_entry_paths.includes? path + entry = ArchiveEntry.new path, self if entry.pages > 0 || entry.err_msg @entries << entry is_entries_added = true @@ -627,7 +646,7 @@ class Title @entries.each do |e| next if da.has_key? e.title - da[e.title] = ctime e.zip_path + da[e.title] = ctime e.path end TitleInfo.new @dir do |info| diff --git a/src/library/types.cr b/src/library/types.cr index 973aa5e..d6a014f 100644 --- a/src/library/types.cr +++ b/src/library/types.cr @@ -1,13 +1,3 @@ -SUPPORTED_IMG_TYPES = %w( - image/jpeg - image/png - image/webp - image/apng - image/avif - image/gif - image/svg+xml -) - enum SortMethod Auto Title diff --git a/src/routes/api.cr b/src/routes/api.cr index e664b28..89b4a30 100644 --- a/src/routes/api.cr +++ b/src/routes/api.cr @@ -40,7 +40,7 @@ struct APIRouter Koa.schema "entry", { "pages" => Int32, "mtime" => Int64, - }.merge(s %w(zip_path title size id title_id display_name cover_url)), + }.merge(s %w(zip_path path title size id title_id display_name cover_url)), desc: "An entry in a book" Koa.schema "title", { @@ -142,8 +142,13 @@ struct APIRouter env.response.status_code = 304 "" else + if entry.is_a? DirEntry + cache_control = "no-cache, max-age=86400" + else + cache_control = "public, max-age=86400" + end env.response.headers["ETag"] = e_tag - env.response.headers["Cache-Control"] = "public, max-age=86400" + env.response.headers["Cache-Control"] = cache_control send_img env, img end rescue e @@ -1138,15 +1143,24 @@ struct APIRouter entry = title.get_entry eid raise "Entry ID `#{eid}` of `#{title.title}` not found" if entry.nil? - file_hash = Digest::SHA1.hexdigest (entry.zip_path + entry.mtime.to_s) + if entry.is_a? DirEntry + file_hash = Digest::SHA1.hexdigest(entry.path + entry.mtime.to_s + entry.size) + else + file_hash = Digest::SHA1.hexdigest(entry.path + entry.mtime.to_s) + end e_tag = "W/#{file_hash}" if e_tag == prev_e_tag env.response.status_code = 304 send_text env, "" else sizes = entry.page_dimensions + if entry.is_a? DirEntry + cache_control = "no-cache, max-age=86400" + else + cache_control = "public, max-age=86400" + end env.response.headers["ETag"] = e_tag - env.response.headers["Cache-Control"] = "public, max-age=86400" + env.response.headers["Cache-Control"] = cache_control send_json env, { "success" => true, "dimensions" => sizes, @@ -1172,7 +1186,7 @@ struct APIRouter title = (Library.default.get_title env.params.url["tid"]).not_nil! entry = (title.get_entry env.params.url["eid"]).not_nil! - send_attachment env, entry.zip_path + send_attachment env, entry.path rescue e Logger.error e env.response.status_code = 404 diff --git a/src/routes/reader.cr b/src/routes/reader.cr index 40b86aa..f76dc2d 100644 --- a/src/routes/reader.cr +++ b/src/routes/reader.cr @@ -53,6 +53,7 @@ struct ReaderRouter render "src/views/reader.html.ecr" rescue e Logger.error e + Logger.debug e.backtrace? env.response.status_code = 404 end end diff --git a/src/util/signature.cr b/src/util/signature.cr index 5ca3e14..8d2b961 100644 --- a/src/util/signature.cr +++ b/src/util/signature.cr @@ -19,7 +19,7 @@ class File # information as long as the above changes do not happen together with # a file/folder rename, with no library scan in between. def self.signature(filename) : UInt64 - if is_supported_file filename + if ArchiveEntry.is_valid?(filename) || is_supported_image_file(filename) File.info(filename).inode else 0u64 @@ -64,10 +64,11 @@ class Dir path = File.join dirname, fn if File.directory? path signatures << Dir.contents_signature path, cache + signatures << fn if DirEntry.is_valid? path else # Only add its signature value to `signatures` when it is a # supported file - signatures << fn if is_supported_file fn + signatures << fn if ArchiveEntry.is_valid? fn end Fiber.yield end @@ -76,4 +77,19 @@ class Dir cache[dirname] = hash hash end + + def self.directory_entry_signature(dirname, cache = {} of String => String) + return cache[dirname + "?entry"] if cache[dirname + "?entry"]? + Fiber.yield + signatures = [] of String + image_files = DirEntry.sorted_image_files dirname + if image_files.size > 0 + image_files.each do |path| + signatures << File.signature(path).to_s + end + end + hash = Digest::SHA1.hexdigest(signatures.join) + cache[dirname + "?entry"] = hash + hash + end end diff --git a/src/util/util.cr b/src/util/util.cr index e7b1b1a..e08bd9d 100644 --- a/src/util/util.cr +++ b/src/util/util.cr @@ -3,6 +3,15 @@ ENTRIES_IN_HOME_SECTIONS = 8 UPLOAD_URL_PREFIX = "/uploads" STATIC_DIRS = %w(/css /js /img /webfonts /favicon.ico /robots.txt) SUPPORTED_FILE_EXTNAMES = [".zip", ".cbz", ".rar", ".cbr"] +SUPPORTED_IMG_TYPES = %w( + image/jpeg + image/png + image/webp + image/apng + image/avif + image/gif + image/svg+xml +) def random_str UUID.random.to_s.gsub "-", "" @@ -49,6 +58,10 @@ def is_supported_file(path) SUPPORTED_FILE_EXTNAMES.includes? File.extname(path).downcase end +def is_supported_image_file(path) + SUPPORTED_IMG_TYPES.includes? MIME.from_filename? path +end + struct Int def or(other : Int) if self == 0 diff --git a/src/views/opds/title.xml.ecr b/src/views/opds/title.xml.ecr index b159687..1d82490 100644 --- a/src/views/opds/title.xml.ecr +++ b/src/views/opds/title.xml.ecr @@ -29,7 +29,7 @@ - + diff --git a/src/views/reader-error.html.ecr b/src/views/reader-error.html.ecr index 62a80fc..ad3580f 100644 --- a/src/views/reader-error.html.ecr +++ b/src/views/reader-error.html.ecr @@ -5,7 +5,7 @@