feat: Save book file path, refactoring

Close #1
This commit is contained in:
2025-09-09 21:16:14 +03:00
parent 18fa38120b
commit 00dc0e39b9
9 changed files with 85 additions and 195 deletions

View File

@@ -11,6 +11,7 @@ use crate::domain::book::Book;
use std::collections::VecDeque;
use std::fs;
use std::path::PathBuf;
use crate::application::parsers::Source;
pub struct Loader {
root: PathBuf,
@@ -76,7 +77,7 @@ impl Iterator for LoaderIter {
impl LoaderIter {
fn parse_path(path: &PathBuf) -> Option<Vec<Book>> {
match parsers::parse(&path) {
match parsers::parse(Source{ path: (*path).clone(), reader: None}) {
Ok(books) => return Some(books),
Err(err) => {
match err {

View File

@@ -8,6 +8,7 @@ use std::collections::VecDeque;
use std::ffi::OsStr;
use std::io;
use std::path::PathBuf;
use crate::application::parsers::Source;
const BUFFER_SIZE: usize = 4096;
@@ -72,7 +73,7 @@ impl<'a> LoaderIter<'a> {
return None;
}
match parsers::parse(&path) {
match parsers::parse(Source{path: path.clone(), reader: None}) {
Ok(books) => {
for book in &books {
println!("{}", book);

View File

@@ -3,82 +3,18 @@ use crate::domain::book::Book;
use quick_xml::events::Event;
use quick_xml::Reader;
use std::fs::File;
use std::io::BufReader;
use std::io::{BufReader, Read, BufRead};
use std::path::Path;
/// Parses an XML file located at the given path and extracts information about books.
///
/// This function processes the XML structure using a streaming XML reader to extract details
/// about books, including:
/// - Title
/// - Language
/// - Keywords
/// - Authors (including optional details like first name, last name, middle name, and nickname)
/// - Publication year
/// - Publisher
/// - Description
///
/// # Arguments
///
/// * `path` - A reference to the file path (`&Path`) of the XML file to parse.
///
/// # Returns
///
/// Returns a `Result` where:
/// - `Ok(Vec<Book>)` contains a vector of `Book` objects constructed from the parsed XML.
/// - `Err(String)` contains an error message if the parsing fails at any stage.
///
/// # Errors
///
/// Returns an error in the following scenarios:
/// - Unable to open the file specified by `path`.
/// - Malformed XML data in the file.
/// - Issues during data extraction, such as reading incomplete or invalid values.
///
/// # Example
///
/// ```ignore
/// use std::path::Path;
/// let path = Path::new("books.xml");
/// match parse(&path) {
/// Ok(books) => {
/// for book in books {
/// println!("Book Title: {}", book.title);
/// }
/// },
/// Err(err) => eprintln!("Failed to parse XML file: {}", err),
/// }
/// ```
///
/// # XML Structure
///
/// The XML should follow a specific schema with the following relevant elements:
/// - `<book-title>`: Title of the book.
/// - `<lang>`: Language of the book.
/// - `<keywords>`: A comma-separated list of keywords/tags.
/// - `<author>`: Contains subfields `<first-name>`, `<last-name>`, `<middle-name>`, or `<nickname>`.
/// - `<year>`: Year of publication.
/// - `<publisher>`: Publisher's name.
/// - `<annotation>`: Description or annotation of the book.
///
/// # Notes
///
/// - Author data is flexible; if a nickname exists, it will override other name details.
/// - The resulting `Vec<Book>` contains just one book object, as indicated in the implementation.
///
/// # Dependencies
///
/// This function depends on the following crates:
/// - `quick-xml`: For fast XML parsing.
/// - `uuid`: To generate a unique identifier for each book.
/// - `chrono`: To serialize the current timestamp as an RFC3339 string.
///
/// # See Also
///
/// `Book` structure, which represents the parsed data for an individual book.
pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
let file = File::open(path).map_err(|e| e.to_string())?;
let mut reader = Reader::from_reader(BufReader::new(file));
let reader = BufReader::new(file);
parse_with_reader(Box::new(reader), path)
}
pub fn parse_with_reader<R: BufRead>(reader: R, path: &Path) -> Result<Vec<Book>, String> {
let mut reader = Reader::from_reader(reader);
let mut buf = Vec::new();
let mut in_title = false;
@@ -87,6 +23,7 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
let mut in_description = false;
let mut book = Book::new();
book.source = path.into();
loop {
match reader.read_event_into(&mut buf) {

View File

@@ -1,17 +1,13 @@
use std::fmt;
use crate::domain::book::Book;
use std::fmt;
use std::fs::File;
use std::io::{BufReader, BufRead};
use std::path::PathBuf;
mod rs;
mod fb2;
mod rs;
mod zip;
/// Error enumeration representing possible errors that can occur when parsing files.
///
/// This enumeration has the following variants:
/// - `NotSupported`: Indicates that the file format or extension is not supported.
/// - `ParseError`: Contains a `String` representing the error message when a parsing process fails.
#[derive(Debug)]
pub enum Error {
NotSupported,
@@ -27,53 +23,28 @@ impl fmt::Display for Error {
}
}
/// Parses a file at the given path and attempts to convert its contents into a vector of `Book` objects.
///
/// This function determines the file type based on its extension and delegates the parsing duties
/// to the appropriate module. Supported file extensions are:
/// - `.rs`: Processed by the `rs` module.
/// - `.fb2`: Processed by the `fb2` module.
/// - `.zip`: Processed by the `zip` module.
///
/// If the file's extension is unsupported or missing, this function returns a `NotSupported` error.
///
/// # Arguments
///
/// * `path` - A reference to a `PathBuf` that represents the file path to be parsed.
///
/// # Returns
///
/// * `Ok(Vec<Book>)` - A vector of `Book` objects if the file was successfully parsed.
/// * `Err(Error)` - An error if the file could not be parsed, the parsing process encountered
/// an issue, or the file extension is not supported.
///
/// # Errors
///
/// - `Error::ParseError` - If the file parsing fails.
/// - `Error::NotSupported` - If the file's extension is unsupported or missing.
///
/// # Examples
///
/// ```ignore
/// use std::path::PathBuf;
///
/// let path = PathBuf::from("example.rs");
/// let books = parse(&path);
/// match books {
/// Ok(book_list) => println!("Parsed {} books.", book_list.len()),
/// Err(e) => println!("Failed to parse file: {:?}", e),
/// }
/// ```
///
/// # Notes
///
/// Ensure that the appropriate parsers (`rs`, `fb2`, `zip`) are properly implemented
/// and handle all required logic for their respective file types to avoid unexpected errors.
pub fn parse(path: &PathBuf) -> Result<Vec<Book>, Error> {
pub struct Source {
pub path: PathBuf,
pub reader: Option<Box<dyn BufRead>>, // Сделал reader опциональным
}
pub fn parse(source: Source) -> Result<Vec<Book>, Error> {
let path = &source.path;
match path.extension().and_then(|s| s.to_str()) {
Some("rs") => rs::parse(path).map_err(Error::ParseError),
Some("fb2") => fb2::parse(path).map_err(Error::ParseError),
Some("zip") => zip::parse(path).map_err(Error::ParseError),
Some("fb2") => {
// Если reader предоставлен, используем его, иначе открываем файл
match source.reader {
Some(reader) => fb2::parse_with_reader(reader, path).map_err(Error::ParseError),
None => fb2::parse(path).map_err(Error::ParseError),
}
},
Some("zip") => {
// ZIP всегда открывает файлы сам, так как нужен Seek
let file = File::open(path).map_err(|e| Error::ParseError(e.to_string()))?;
zip::parse_direct(file, path).map_err(Error::ParseError)
},
Some(_) | None => Err(Error::NotSupported),
}
}

View File

@@ -1,72 +1,44 @@
use crate::application::parsers;
use crate::application::parsers::{parse as parse_source, Source};
use crate::domain::book::Book;
use std::fs::File;
use std::io::BufReader;
use std::path::{Path, PathBuf};
use std::io::{BufRead, Cursor, Read};
use std::path::Path;
use zip::ZipArchive;
/// Parses a ZIP archive to extract a collection of `Book` objects.
///
/// This function takes a path to a ZIP archive file, reads its contents, and processes
/// each file within the archive to extract `Book` objects using a custom parser. If any
/// errors occur during file access, archive extraction, or parsing, they are returned as
/// a `String`. On success, it returns a vector of `Book` objects contained in the archive.
///
/// # Arguments
///
/// * `path` - A reference to a `Path` representing the file system path to the ZIP archive.
///
/// # Returns
///
/// * `Ok(Vec<Book>)` - A vector containing the `Book` objects successfully parsed
/// from the files in the archive.
/// * `Err(String)` - An error message if any step in opening the file, reading the archive,
/// or parsing the files fails.
///
/// # Errors
///
/// This function returns an error in the following cases:
/// * If the ZIP file cannot be opened.
/// * If the ZIP archive cannot be read.
/// * If an individual file within the archive cannot be accessed.
/// * If the parsing of a file fails.
///
/// # Example
///
/// ```ignore
/// use std::path::Path;
/// use your_crate::parse;
///
/// let path = Path::new("books_archive.zip");
/// match parse(&path) {
/// Ok(books) => {
/// for book in books {
/// println!("Parsed book: {:?}", book);
/// }
/// }
/// Err(e) => eprintln!("Failed to parse books: {}", e),
/// }
/// ```
///
/// # Dependencies
///
/// This function relies on the `ZipArchive` for working with ZIP files and a `parsers`
/// module for custom file parsing logic.
pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
let file = File::open(path).map_err(|e| e.to_string())?;
let reader = BufReader::new(file);
let mut archive = ZipArchive::new(reader).map_err(|e| e.to_string())?;
pub fn parse_direct<R: Read + std::io::Seek + 'static>(
reader: R,
path: &Path,
) -> Result<Vec<Book>, String> {
let archive = ZipArchive::new(reader).map_err(|e| e.to_string())?;
let mut books: Vec<Book> = Vec::new();
for i in 0..archive.len() {
let file = archive.by_index(i).map_err(|e| e.to_string())?;
let name = file.name().to_string();
// Нам нужно знать путь к архиву для открытия файлов внутри него
let archive_path = path.to_path_buf();
match parsers::parse(&PathBuf::from(name.to_lowercase())) {
for i in 0..archive.len() {
// Открываем архив заново для каждого файла, так как ZipArchive consumes files
let file = File::open(&archive_path).map_err(|e| e.to_string())?;
let mut archive = ZipArchive::new(file).map_err(|e| e.to_string())?;
let mut zip_file = archive.by_index(i).map_err(|e| e.to_string())?;
let name = zip_file.name().to_string();
let file_path = path.to_path_buf().join("#").join(&name);
// Читаем содержимое файла в память и оборачиваем в BufReader
let mut contents = Vec::new();
zip_file
.read_to_end(&mut contents)
.map_err(|e| e.to_string())?;
let file_reader: Box<dyn BufRead> = Box::new(Cursor::new(contents)); // Изменили на BufRead
let source = Source {
path: file_path,
reader: Some(file_reader),
};
match parse_source(source) {
Ok(new_books) => books.extend(new_books),
Err(e) => return Err(e.to_string()),
Err(e) => return Err(format!("Error parsing {}: {}", name, e)),
}
}

View File

@@ -68,13 +68,14 @@ impl Books {
}
pub fn add_books_from_path(&mut self) {
let iter = fs::Loader::new(PathBuf::from(&self.root));
let books = fs::Loader::new(PathBuf::from(&self.root))
.into_iter()
.map(|mut book| {book.source = book.source.strip_prefix(&self.root).unwrap().into(); book})
.collect();
match self.repo.lock() {
Ok(mut repo) => {
for book in iter {
repo.add(book);
}
repo.bulk_add(books);
}
Err(err) => eprintln!("{}", err),
}

View File

@@ -1,5 +1,6 @@
use crate::domain::author;
use std::fmt;
use std::path::PathBuf;
use chrono::{DateTime, Utc};
use uuid::Uuid;
@@ -14,6 +15,7 @@ pub struct Book {
pub published_at: String,
pub publisher: String,
pub updated: DateTime<Utc>,
pub source: PathBuf
}
impl Book {
@@ -28,6 +30,7 @@ impl Book {
published_at: "".to_string(),
publisher: "".to_string(),
updated: chrono::Utc::now(),
source: PathBuf::new()
}
}
@@ -60,7 +63,7 @@ impl fmt::Display for Book {
.collect::<Vec<_>>()
.join(";");
write!(f, "{} by {}", self.title, authors)
write!(f, "{} by {} at {}", self.title, authors, self.source.to_str().unwrap())
}
}

View File

@@ -74,6 +74,7 @@ struct Book {
published_at: String,
publisher: String,
updated: String,
source: String,
}
impl From<book::Book> for Book {
@@ -88,6 +89,7 @@ impl From<book::Book> for Book {
published_at: book.published_at,
publisher: book.publisher,
updated: book.updated.to_rfc3339(),
source: book.source.as_os_str().to_str().unwrap().to_string(),
}
}
}
@@ -114,6 +116,7 @@ impl Into<book::Book> for Book {
published_at: self.published_at,
publisher: self.publisher,
updated: chrono::DateTime::parse_from_rfc3339(&self.updated).unwrap_or_default().to_utc(),
source: self.source.into(),
}
}
}

View File

@@ -13,7 +13,7 @@ fn main() {
id: None,
name: None, //Some("rs".to_string()),
}),
title: Some("Сборник".to_string()),
title: Some("пов".to_string()),
language: None,
description: None,
tags: None,
@@ -27,7 +27,8 @@ fn main() {
if let Some(book) = res.entry.iter().next() {
let book = app.repo.lock().unwrap().get(book.id.to_string().clone());
println!("{:?}", book.unwrap().author);
println!("{:?}", book.clone().unwrap().author);
println!("{}", book.unwrap());
}
// sleep(Duration::new(10, 0));