Compare commits

..

2 Commits

Author SHA1 Message Date
18fa38120b ref: Optimize fb2 parser 2025-09-09 17:42:38 +03:00
bf81b5d8f4 ref: Improve Book instantiation 2025-09-09 17:39:59 +03:00
4 changed files with 66 additions and 53 deletions

View File

@@ -1,11 +1,10 @@
use crate::domain::author;
use crate::domain::book::Book;
use quick_xml::events::Event; use quick_xml::events::Event;
use quick_xml::Reader; use quick_xml::Reader;
use uuid::Uuid;
use std::fs::File; use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
use std::path::Path; use std::path::Path;
use crate::domain::author;
use crate::domain::book::Book;
/// Parses an XML file located at the given path and extracts information about books. /// Parses an XML file located at the given path and extracts information about books.
/// ///
@@ -82,26 +81,23 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
let mut reader = Reader::from_reader(BufReader::new(file)); let mut reader = Reader::from_reader(BufReader::new(file));
let mut buf = Vec::new(); let mut buf = Vec::new();
let mut title = String::new();
let mut lang = String::new();
let mut description = String::new();
let mut keywords = Vec::new();
let mut authors = Vec::new();
let mut published_at = String::new();
let mut publisher = String::new();
let mut in_title = false; let mut in_title = false;
let mut in_lang = false; let mut in_lang = false;
let mut in_keywords = false; let mut in_keywords = false;
let mut in_description = false; let mut in_description = false;
let mut book = Book::new();
loop { loop {
match reader.read_event_into(&mut buf) { match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) => match e.name().as_ref() { Ok(Event::Start(e)) => match e.name().as_ref() {
b"book-title" => in_title = true, b"book-title" => in_title = true,
b"lang" => in_lang = true, b"lang" => in_lang = true,
b"keywords" => in_keywords = true, b"keywords" => in_keywords = true,
b"annotation" => { in_description = true; description.clear(); }, b"annotation" => {
in_description = true;
book.description.clear();
}
b"author" => { b"author" => {
let mut buf_author = Vec::new(); let mut buf_author = Vec::new();
let mut first_name = String::new(); let mut first_name = String::new();
@@ -113,30 +109,56 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
match reader.read_event_into(&mut buf_author) { match reader.read_event_into(&mut buf_author) {
Ok(Event::Start(c)) => match c.name().as_ref() { Ok(Event::Start(c)) => match c.name().as_ref() {
b"first-name" => { b"first-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) { if let Ok(Event::Text(t)) =
first_name = t.xml_content().map_err(|e| e.to_string())?.into_owned(); reader.read_event_into(&mut buf_author)
{
first_name = t
.xml_content()
.map_err(|e| e.to_string())?
.into_owned();
} }
} }
b"last-name" => { b"last-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) { if let Ok(Event::Text(t)) =
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned(); reader.read_event_into(&mut buf_author)
if !val.is_empty() { last_name = Some(val); } {
let val = t
.xml_content()
.map_err(|e| e.to_string())?
.into_owned();
if !val.is_empty() {
last_name = Some(val);
}
} }
} }
b"middle-name" => { b"middle-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) { if let Ok(Event::Text(t)) =
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned(); reader.read_event_into(&mut buf_author)
if !val.is_empty() { middle_name = Some(val); } {
let val = t
.xml_content()
.map_err(|e| e.to_string())?
.into_owned();
if !val.is_empty() {
middle_name = Some(val);
}
} }
} }
b"nickname" => { b"nickname" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) { if let Ok(Event::Text(t)) =
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned(); reader.read_event_into(&mut buf_author)
if !val.is_empty() { nickname = Some(val); } {
let val = t
.xml_content()
.map_err(|e| e.to_string())?
.into_owned();
if !val.is_empty() {
nickname = Some(val);
}
} }
} }
_ => {} _ => {}
} },
Ok(Event::End(c)) if c.name().as_ref() == b"author" => break, Ok(Event::End(c)) if c.name().as_ref() == b"author" => break,
Ok(Event::Eof) => break, Ok(Event::Eof) => break,
_ => {} _ => {}
@@ -152,41 +174,41 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
author.last_name = last_name.clone(); author.last_name = last_name.clone();
author.middle_name = middle_name.clone(); author.middle_name = middle_name.clone();
} }
authors.push(author); book.author.push(author);
} }
b"year" => { b"year" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) { if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
published_at = t.xml_content().map_err(|e| e.to_string())?.into_owned(); book.published_at = t.xml_content().map_err(|e| e.to_string())?.into_owned();
} }
} }
b"publisher" => { b"publisher" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) { if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
publisher = t.xml_content().map_err(|e| e.to_string())?.into_owned(); book.publisher = t.xml_content().map_err(|e| e.to_string())?.into_owned();
} }
} }
_ => {} _ => {}
}, },
Ok(Event::Text(t)) if in_title => { Ok(Event::Text(t)) if in_title => {
title = t.xml_content().map_err(|e| e.to_string())?.into_owned(); book.title = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}, }
Ok(Event::Text(t)) if in_lang => { Ok(Event::Text(t)) if in_lang => {
lang = t.xml_content().map_err(|e| e.to_string())?.into_owned(); book.language = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}, }
Ok(Event::Text(t)) if in_keywords => { Ok(Event::Text(t)) if in_keywords => {
let raw = t.xml_content().map_err(|e| e.to_string())?; let raw = t.xml_content().map_err(|e| e.to_string())?;
for tag in raw.split(',').map(str::trim).filter(|s| !s.is_empty()) { for tag in raw.split(',').map(str::trim).filter(|s| !s.is_empty()) {
keywords.push(tag.to_string()); book.tags.push(tag.to_string());
} }
}, }
Ok(Event::Text(t)) if in_description => { Ok(Event::Text(t)) if in_description => {
let txt = t.xml_content().map_err(|e| e.to_string())?; let txt = t.xml_content().map_err(|e| e.to_string())?;
if !txt.trim().is_empty() { if !txt.trim().is_empty() {
if !description.is_empty() { if !book.description.is_empty() {
description.push(' '); book.description.push(' ');
} }
description.push_str(&txt); book.description.push_str(&txt);
} }
}, }
Ok(Event::End(e)) => match e.name().as_ref() { Ok(Event::End(e)) => match e.name().as_ref() {
b"book-title" => in_title = false, b"book-title" => in_title = false,
b"lang" => in_lang = false, b"lang" => in_lang = false,
@@ -201,15 +223,5 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
buf.clear(); buf.clear();
} }
Ok(vec![Book{ Ok(vec![book])
id: Uuid::new_v4(),
title,
author: authors,
language: lang,
description,
tags: keywords,
published_at,
publisher,
updated: chrono::Utc::now().to_rfc3339(),
}])
} }

View File

@@ -1,5 +1,6 @@
use crate::domain::author; use crate::domain::author;
use std::fmt; use std::fmt;
use chrono::{DateTime, Utc};
use uuid::Uuid; use uuid::Uuid;
#[derive(Clone, PartialEq, Eq)] #[derive(Clone, PartialEq, Eq)]
@@ -12,7 +13,7 @@ pub struct Book {
pub tags: Vec<String>, pub tags: Vec<String>,
pub published_at: String, pub published_at: String,
pub publisher: String, pub publisher: String,
pub updated: String, pub updated: DateTime<Utc>,
} }
impl Book { impl Book {
@@ -26,7 +27,7 @@ impl Book {
tags: vec![], tags: vec![],
published_at: "".to_string(), published_at: "".to_string(),
publisher: "".to_string(), publisher: "".to_string(),
updated: "".to_string(), updated: chrono::Utc::now(),
} }
} }

View File

@@ -90,7 +90,7 @@ impl From<&Book> for Entry {
Entry{ Entry{
title: book.title.clone(), title: book.title.clone(),
id: book.id.to_string().clone(), id: book.id.to_string().clone(),
updated: book.updated.clone(), updated: book.updated.to_rfc3339(),
author: book.author.clone().into_iter().map(|a| a.into()).collect(), author: book.author.clone().into_iter().map(|a| a.into()).collect(),
language: (!book.language.is_empty()).then(|| book.language.clone()), language: (!book.language.is_empty()).then(|| book.language.clone()),
issued: (!book.published_at.is_empty()).then(|| book.published_at.clone()), issued: (!book.published_at.is_empty()).then(|| book.published_at.clone()),

View File

@@ -87,7 +87,7 @@ impl From<book::Book> for Book {
tags: book.tags, tags: book.tags,
published_at: book.published_at, published_at: book.published_at,
publisher: book.publisher, publisher: book.publisher,
updated: book.updated, updated: book.updated.to_rfc3339(),
} }
} }
} }
@@ -113,7 +113,7 @@ impl Into<book::Book> for Book {
tags: self.tags, tags: self.tags,
published_at: self.published_at, published_at: self.published_at,
publisher: self.publisher, publisher: self.publisher,
updated: self.updated, updated: chrono::DateTime::parse_from_rfc3339(&self.updated).unwrap_or_default().to_utc(),
} }
} }
} }