Add fb2 parser

This commit is contained in:
2025-09-08 20:33:23 +03:00
parent 96f15ab51e
commit 7037f6f316
6 changed files with 359 additions and 17 deletions

191
Cargo.lock generated
View File

@@ -17,6 +17,21 @@ version = "2.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
[[package]]
name = "android_system_properties"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
dependencies = [
"libc",
]
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "backtrace"
version = "0.3.75"
@@ -44,12 +59,36 @@ version = "3.19.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
[[package]]
name = "cc"
version = "1.2.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5252b3d2648e5eedbc1a6f501e3c795e07025c1e93bbf8bbdd6eef7f447a6d54"
dependencies = [
"find-msvc-tools",
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9"
[[package]]
name = "chrono"
version = "0.4.42"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
dependencies = [
"iana-time-zone",
"js-sys",
"num-traits",
"serde",
"wasm-bindgen",
"windows-link 0.2.0",
]
[[package]]
name = "convert_case"
version = "0.8.0"
@@ -59,6 +98,12 @@ dependencies = [
"unicode-segmentation",
]
[[package]]
name = "core-foundation-sys"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
[[package]]
name = "displaydoc"
version = "0.2.5"
@@ -102,6 +147,24 @@ dependencies = [
"syn",
]
[[package]]
name = "fb2"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2d27bbfaf8178bef57815d3b6169100928316099922e419668ee50f61e87e7a"
dependencies = [
"chrono",
"language-tags",
"quick-xml 0.30.0",
"serde",
]
[[package]]
name = "find-msvc-tools"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fd99930f64d146689264c637b5af2f0233a933bef0d8570e2526bf9e083192d"
[[package]]
name = "form_urlencoded"
version = "1.2.2"
@@ -135,6 +198,30 @@ version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
name = "iana-time-zone"
version = "0.1.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
dependencies = [
"android_system_properties",
"core-foundation-sys",
"iana-time-zone-haiku",
"js-sys",
"log",
"wasm-bindgen",
"windows-core",
]
[[package]]
name = "iana-time-zone-haiku"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
dependencies = [
"cc",
]
[[package]]
name = "icu_collections"
version = "2.0.0"
@@ -285,6 +372,15 @@ dependencies = [
"wasm-bindgen",
]
[[package]]
name = "language-tags"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4345964bb142484797b161f473a503a434de77149dd8c7427788c6e13379388"
dependencies = [
"serde",
]
[[package]]
name = "libc"
version = "0.2.175"
@@ -329,6 +425,15 @@ dependencies = [
"windows-sys",
]
[[package]]
name = "num-traits"
version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
]
[[package]]
name = "object"
version = "0.36.7"
@@ -348,9 +453,12 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
name = "opds"
version = "0.1.0"
dependencies = [
"chrono",
"envman",
"fb2",
"inotify",
"quick-xml",
"log",
"quick-xml 0.38.3",
"serde",
"url",
"uuid",
@@ -395,6 +503,16 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "quick-xml"
version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
dependencies = [
"memchr",
"serde",
]
[[package]]
name = "quick-xml"
version = "0.38.3"
@@ -487,6 +605,12 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbfa15b3dddfee50a0fff136974b3e1bde555604ba463834a7eb7deb6417705d"
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "slab"
version = "0.4.11"
@@ -713,6 +837,71 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "windows-core"
version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
dependencies = [
"windows-implement",
"windows-interface",
"windows-link 0.1.3",
"windows-result",
"windows-strings",
]
[[package]]
name = "windows-implement"
version = "0.60.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-interface"
version = "0.59.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "windows-link"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
[[package]]
name = "windows-link"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "45e46c0661abb7180e7b9c281db115305d49ca1709ab8242adf09666d2173c65"
[[package]]
name = "windows-result"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
dependencies = [
"windows-link 0.1.3",
]
[[package]]
name = "windows-strings"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
dependencies = [
"windows-link 0.1.3",
]
[[package]]
name = "windows-sys"
version = "0.59.0"

View File

@@ -8,6 +8,8 @@ url = "2"
inotify = "0.11.0"
serde = { version = "1.0.219", features = ["derive"] }
envman = "2.0.0"
fb2 = "0.4.4"
chrono = "0.4.42"
uuid = { version = "1.18.1", features = [
"v4",
"v7",
@@ -18,6 +20,7 @@ uuid = { version = "1.18.1", features = [
"rng",
] }
quick-xml = { version = "0.38.3", features = ["serialize", "serde"] }
log = "0.4.28"
[profile.release]
lto = "fat"

View File

@@ -0,0 +1,145 @@
use quick_xml::events::Event;
use quick_xml::Reader;
use uuid::Uuid;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
use crate::domain::author;
use crate::domain::book::Book;
pub fn parse(path: &Path) -> Result<Book, String> {
let file = File::open(path).map_err(|e| e.to_string())?;
let mut reader = Reader::from_reader(BufReader::new(file));
let mut buf = Vec::new();
let mut title = String::new();
let mut lang = String::new();
let mut description = String::new();
let mut keywords = Vec::new();
let mut authors = Vec::new();
let mut published_at = String::new();
let mut publisher = String::new();
let mut in_title = false;
let mut in_lang = false;
let mut in_keywords = false;
let mut in_description = false;
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) => match e.name().as_ref() {
b"book-title" => in_title = true,
b"lang" => in_lang = true,
b"keywords" => in_keywords = true,
b"annotation" => { in_description = true; description.clear(); },
b"author" => {
let mut buf_author = Vec::new();
let mut first_name = String::new();
let mut last_name = None;
let mut middle_name = None;
let mut nickname = None;
loop {
match reader.read_event_into(&mut buf_author) {
Ok(Event::Start(c)) => match c.name().as_ref() {
b"first-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) {
first_name = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}
}
b"last-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) {
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned();
if !val.is_empty() { last_name = Some(val); }
}
}
b"middle-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) {
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned();
if !val.is_empty() { middle_name = Some(val); }
}
}
b"nickname" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) {
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned();
if !val.is_empty() { nickname = Some(val); }
}
}
_ => {}
}
Ok(Event::End(c)) if c.name().as_ref() == b"author" => break,
Ok(Event::Eof) => break,
_ => {}
}
buf_author.clear();
}
let mut author = author::Author::new();
if let Some(nick) = nickname {
author.first_name = nick;
} else {
author.first_name = first_name.clone();
author.last_name = last_name.clone();
author.middle_name = middle_name.clone();
}
authors.push(author);
}
b"year" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
published_at = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}
}
b"publisher" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
publisher = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}
}
_ => {}
},
Ok(Event::Text(t)) if in_title => {
title = t.xml_content().map_err(|e| e.to_string())?.into_owned();
},
Ok(Event::Text(t)) if in_lang => {
lang = t.xml_content().map_err(|e| e.to_string())?.into_owned();
},
Ok(Event::Text(t)) if in_keywords => {
let raw = t.xml_content().map_err(|e| e.to_string())?;
for tag in raw.split(',').map(str::trim).filter(|s| !s.is_empty()) {
keywords.push(tag.to_string());
}
},
Ok(Event::Text(t)) if in_description => {
let txt = t.xml_content().map_err(|e| e.to_string())?;
if !txt.trim().is_empty() {
if !description.is_empty() {
description.push(' ');
}
description.push_str(&txt);
}
},
Ok(Event::End(e)) => match e.name().as_ref() {
b"book-title" => in_title = false,
b"lang" => in_lang = false,
b"keywords" => in_keywords = false,
b"annotation" => in_description = false,
_ => {}
},
Ok(Event::Eof) => break,
Err(e) => return Err(e.to_string()),
_ => {}
}
buf.clear();
}
Ok(Book{
id: Uuid::new_v4(),
title,
author: authors,
language: lang,
description,
tags: keywords,
published_at,
publisher,
updated: chrono::Utc::now().to_rfc3339(),
})
}

View File

@@ -2,6 +2,7 @@ use crate::domain::book::Book;
use std::path::PathBuf;
mod rs;
mod fb2;
#[derive(Debug)]
pub enum Error {
@@ -11,6 +12,7 @@ pub enum Error {
pub fn parse(path: &PathBuf) -> Result<Book, Error> {
match path.extension().and_then(|s| s.to_str()) {
Some("rs") => rs::parse(path).map_err(Error::ParseError),
Some("fb2") => fb2::parse(path).map_err(Error::ParseError),
Some(_) | None => Err(Error::NotSupported),
}
}

View File

@@ -160,10 +160,12 @@ impl Repository<book::Book, BookFilter> for BookRepository {
fn filter(&self, f: BookFilter) -> Box<dyn Iterator<Item = book::Book>> {
let mut author_ids: Vec<String> = vec![];
let mut use_author = false;
if let Some(author) = f.author {
if let Some(id) = author.id {
author_ids.push(id);
use_author = true;
}
if let Some(name) = author.name {
@@ -175,12 +177,13 @@ impl Repository<book::Book, BookFilter> for BookRepository {
&& author.clone().middle_name.unwrap().contains(&name))
{
author_ids.push(id.to_string());
use_author = true;
}
}
}
}
if author_ids.is_empty() {
if author_ids.is_empty() && use_author {
return Box::new(std::iter::empty::<book::Book>())
}

View File

@@ -11,9 +11,9 @@ fn main() {
let filter = BookFilter {
author: Some(AuthorFilter{
id: None,
name: Some("rs".to_string()),
name: None, //Some("rs".to_string()),
}),
title: Some("service".to_string()),
title: Some("Сборник".to_string()),
language: None,
description: None,
tags: None,
@@ -30,17 +30,17 @@ fn main() {
println!("{:?}", book.unwrap().author);
}
sleep(Duration::new(10, 0));
let filter = BookFilter {
author: None,
title: Some("foo".to_string()),
language: None,
description: None,
tags: None,
published_at: None,
publisher: None,
updated: None,
};
println!("{}", to_xml_string(&app.books.books_feed(filter)).unwrap());
// sleep(Duration::new(10, 0));
//
// let filter = BookFilter {
// author: None,
// title: Some("foo".to_string()),
// language: None,
// description: None,
// tags: None,
// published_at: None,
// publisher: None,
// updated: None,
// };
// println!("{}", to_xml_string(&app.books.books_feed(filter)).unwrap());
}