Add fb2 parser

This commit is contained in:
2025-09-08 20:33:23 +03:00
parent 96f15ab51e
commit 7037f6f316
6 changed files with 359 additions and 17 deletions

View File

@@ -0,0 +1,145 @@
use quick_xml::events::Event;
use quick_xml::Reader;
use uuid::Uuid;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
use crate::domain::author;
use crate::domain::book::Book;
pub fn parse(path: &Path) -> Result<Book, String> {
let file = File::open(path).map_err(|e| e.to_string())?;
let mut reader = Reader::from_reader(BufReader::new(file));
let mut buf = Vec::new();
let mut title = String::new();
let mut lang = String::new();
let mut description = String::new();
let mut keywords = Vec::new();
let mut authors = Vec::new();
let mut published_at = String::new();
let mut publisher = String::new();
let mut in_title = false;
let mut in_lang = false;
let mut in_keywords = false;
let mut in_description = false;
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) => match e.name().as_ref() {
b"book-title" => in_title = true,
b"lang" => in_lang = true,
b"keywords" => in_keywords = true,
b"annotation" => { in_description = true; description.clear(); },
b"author" => {
let mut buf_author = Vec::new();
let mut first_name = String::new();
let mut last_name = None;
let mut middle_name = None;
let mut nickname = None;
loop {
match reader.read_event_into(&mut buf_author) {
Ok(Event::Start(c)) => match c.name().as_ref() {
b"first-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) {
first_name = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}
}
b"last-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) {
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned();
if !val.is_empty() { last_name = Some(val); }
}
}
b"middle-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) {
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned();
if !val.is_empty() { middle_name = Some(val); }
}
}
b"nickname" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) {
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned();
if !val.is_empty() { nickname = Some(val); }
}
}
_ => {}
}
Ok(Event::End(c)) if c.name().as_ref() == b"author" => break,
Ok(Event::Eof) => break,
_ => {}
}
buf_author.clear();
}
let mut author = author::Author::new();
if let Some(nick) = nickname {
author.first_name = nick;
} else {
author.first_name = first_name.clone();
author.last_name = last_name.clone();
author.middle_name = middle_name.clone();
}
authors.push(author);
}
b"year" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
published_at = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}
}
b"publisher" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
publisher = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}
}
_ => {}
},
Ok(Event::Text(t)) if in_title => {
title = t.xml_content().map_err(|e| e.to_string())?.into_owned();
},
Ok(Event::Text(t)) if in_lang => {
lang = t.xml_content().map_err(|e| e.to_string())?.into_owned();
},
Ok(Event::Text(t)) if in_keywords => {
let raw = t.xml_content().map_err(|e| e.to_string())?;
for tag in raw.split(',').map(str::trim).filter(|s| !s.is_empty()) {
keywords.push(tag.to_string());
}
},
Ok(Event::Text(t)) if in_description => {
let txt = t.xml_content().map_err(|e| e.to_string())?;
if !txt.trim().is_empty() {
if !description.is_empty() {
description.push(' ');
}
description.push_str(&txt);
}
},
Ok(Event::End(e)) => match e.name().as_ref() {
b"book-title" => in_title = false,
b"lang" => in_lang = false,
b"keywords" => in_keywords = false,
b"annotation" => in_description = false,
_ => {}
},
Ok(Event::Eof) => break,
Err(e) => return Err(e.to_string()),
_ => {}
}
buf.clear();
}
Ok(Book{
id: Uuid::new_v4(),
title,
author: authors,
language: lang,
description,
tags: keywords,
published_at,
publisher,
updated: chrono::Utc::now().to_rfc3339(),
})
}

View File

@@ -2,6 +2,7 @@ use crate::domain::book::Book;
use std::path::PathBuf;
mod rs;
mod fb2;
#[derive(Debug)]
pub enum Error {
@@ -11,6 +12,7 @@ pub enum Error {
pub fn parse(path: &PathBuf) -> Result<Book, Error> {
match path.extension().and_then(|s| s.to_str()) {
Some("rs") => rs::parse(path).map_err(Error::ParseError),
Some("fb2") => fb2::parse(path).map_err(Error::ParseError),
Some(_) | None => Err(Error::NotSupported),
}
}

View File

@@ -160,10 +160,12 @@ impl Repository<book::Book, BookFilter> for BookRepository {
fn filter(&self, f: BookFilter) -> Box<dyn Iterator<Item = book::Book>> {
let mut author_ids: Vec<String> = vec![];
let mut use_author = false;
if let Some(author) = f.author {
if let Some(id) = author.id {
author_ids.push(id);
use_author = true;
}
if let Some(name) = author.name {
@@ -175,12 +177,13 @@ impl Repository<book::Book, BookFilter> for BookRepository {
&& author.clone().middle_name.unwrap().contains(&name))
{
author_ids.push(id.to_string());
use_author = true;
}
}
}
}
if author_ids.is_empty() {
if author_ids.is_empty() && use_author {
return Box::new(std::iter::empty::<book::Book>())
}

View File

@@ -11,9 +11,9 @@ fn main() {
let filter = BookFilter {
author: Some(AuthorFilter{
id: None,
name: Some("rs".to_string()),
name: None, //Some("rs".to_string()),
}),
title: Some("service".to_string()),
title: Some("Сборник".to_string()),
language: None,
description: None,
tags: None,
@@ -30,17 +30,17 @@ fn main() {
println!("{:?}", book.unwrap().author);
}
sleep(Duration::new(10, 0));
let filter = BookFilter {
author: None,
title: Some("foo".to_string()),
language: None,
description: None,
tags: None,
published_at: None,
publisher: None,
updated: None,
};
println!("{}", to_xml_string(&app.books.books_feed(filter)).unwrap());
// sleep(Duration::new(10, 0));
//
// let filter = BookFilter {
// author: None,
// title: Some("foo".to_string()),
// language: None,
// description: None,
// tags: None,
// published_at: None,
// publisher: None,
// updated: None,
// };
// println!("{}", to_xml_string(&app.books.books_feed(filter)).unwrap());
}