Compare commits

..

2 Commits

Author SHA1 Message Date
18fa38120b ref: Optimize fb2 parser 2025-09-09 17:42:38 +03:00
bf81b5d8f4 ref: Improve Book instantiation 2025-09-09 17:39:59 +03:00
4 changed files with 66 additions and 53 deletions

View File

@@ -1,11 +1,10 @@
use crate::domain::author;
use crate::domain::book::Book;
use quick_xml::events::Event;
use quick_xml::Reader;
use uuid::Uuid;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
use crate::domain::author;
use crate::domain::book::Book;
/// Parses an XML file located at the given path and extracts information about books.
///
@@ -82,26 +81,23 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
let mut reader = Reader::from_reader(BufReader::new(file));
let mut buf = Vec::new();
let mut title = String::new();
let mut lang = String::new();
let mut description = String::new();
let mut keywords = Vec::new();
let mut authors = Vec::new();
let mut published_at = String::new();
let mut publisher = String::new();
let mut in_title = false;
let mut in_lang = false;
let mut in_keywords = false;
let mut in_description = false;
let mut book = Book::new();
loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) => match e.name().as_ref() {
b"book-title" => in_title = true,
b"lang" => in_lang = true,
b"keywords" => in_keywords = true,
b"annotation" => { in_description = true; description.clear(); },
b"annotation" => {
in_description = true;
book.description.clear();
}
b"author" => {
let mut buf_author = Vec::new();
let mut first_name = String::new();
@@ -113,30 +109,56 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
match reader.read_event_into(&mut buf_author) {
Ok(Event::Start(c)) => match c.name().as_ref() {
b"first-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) {
first_name = t.xml_content().map_err(|e| e.to_string())?.into_owned();
if let Ok(Event::Text(t)) =
reader.read_event_into(&mut buf_author)
{
first_name = t
.xml_content()
.map_err(|e| e.to_string())?
.into_owned();
}
}
b"last-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) {
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned();
if !val.is_empty() { last_name = Some(val); }
if let Ok(Event::Text(t)) =
reader.read_event_into(&mut buf_author)
{
let val = t
.xml_content()
.map_err(|e| e.to_string())?
.into_owned();
if !val.is_empty() {
last_name = Some(val);
}
}
}
b"middle-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) {
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned();
if !val.is_empty() { middle_name = Some(val); }
if let Ok(Event::Text(t)) =
reader.read_event_into(&mut buf_author)
{
let val = t
.xml_content()
.map_err(|e| e.to_string())?
.into_owned();
if !val.is_empty() {
middle_name = Some(val);
}
}
}
b"nickname" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) {
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned();
if !val.is_empty() { nickname = Some(val); }
if let Ok(Event::Text(t)) =
reader.read_event_into(&mut buf_author)
{
let val = t
.xml_content()
.map_err(|e| e.to_string())?
.into_owned();
if !val.is_empty() {
nickname = Some(val);
}
}
}
_ => {}
}
},
Ok(Event::End(c)) if c.name().as_ref() == b"author" => break,
Ok(Event::Eof) => break,
_ => {}
@@ -152,41 +174,41 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
author.last_name = last_name.clone();
author.middle_name = middle_name.clone();
}
authors.push(author);
book.author.push(author);
}
b"year" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
published_at = t.xml_content().map_err(|e| e.to_string())?.into_owned();
book.published_at = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}
}
b"publisher" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
publisher = t.xml_content().map_err(|e| e.to_string())?.into_owned();
book.publisher = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}
}
_ => {}
},
Ok(Event::Text(t)) if in_title => {
title = t.xml_content().map_err(|e| e.to_string())?.into_owned();
},
book.title = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}
Ok(Event::Text(t)) if in_lang => {
lang = t.xml_content().map_err(|e| e.to_string())?.into_owned();
},
book.language = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}
Ok(Event::Text(t)) if in_keywords => {
let raw = t.xml_content().map_err(|e| e.to_string())?;
for tag in raw.split(',').map(str::trim).filter(|s| !s.is_empty()) {
keywords.push(tag.to_string());
book.tags.push(tag.to_string());
}
}
},
Ok(Event::Text(t)) if in_description => {
let txt = t.xml_content().map_err(|e| e.to_string())?;
if !txt.trim().is_empty() {
if !description.is_empty() {
description.push(' ');
if !book.description.is_empty() {
book.description.push(' ');
}
book.description.push_str(&txt);
}
description.push_str(&txt);
}
},
Ok(Event::End(e)) => match e.name().as_ref() {
b"book-title" => in_title = false,
b"lang" => in_lang = false,
@@ -201,15 +223,5 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
buf.clear();
}
Ok(vec![Book{
id: Uuid::new_v4(),
title,
author: authors,
language: lang,
description,
tags: keywords,
published_at,
publisher,
updated: chrono::Utc::now().to_rfc3339(),
}])
Ok(vec![book])
}

View File

@@ -1,5 +1,6 @@
use crate::domain::author;
use std::fmt;
use chrono::{DateTime, Utc};
use uuid::Uuid;
#[derive(Clone, PartialEq, Eq)]
@@ -12,7 +13,7 @@ pub struct Book {
pub tags: Vec<String>,
pub published_at: String,
pub publisher: String,
pub updated: String,
pub updated: DateTime<Utc>,
}
impl Book {
@@ -26,7 +27,7 @@ impl Book {
tags: vec![],
published_at: "".to_string(),
publisher: "".to_string(),
updated: "".to_string(),
updated: chrono::Utc::now(),
}
}

View File

@@ -90,7 +90,7 @@ impl From<&Book> for Entry {
Entry{
title: book.title.clone(),
id: book.id.to_string().clone(),
updated: book.updated.clone(),
updated: book.updated.to_rfc3339(),
author: book.author.clone().into_iter().map(|a| a.into()).collect(),
language: (!book.language.is_empty()).then(|| book.language.clone()),
issued: (!book.published_at.is_empty()).then(|| book.published_at.clone()),

View File

@@ -87,7 +87,7 @@ impl From<book::Book> for Book {
tags: book.tags,
published_at: book.published_at,
publisher: book.publisher,
updated: book.updated,
updated: book.updated.to_rfc3339(),
}
}
}
@@ -113,7 +113,7 @@ impl Into<book::Book> for Book {
tags: self.tags,
published_at: self.published_at,
publisher: self.publisher,
updated: self.updated,
updated: chrono::DateTime::parse_from_rfc3339(&self.updated).unwrap_or_default().to_utc(),
}
}
}