From 00dc0e39b9167dd01ee477f163c658b0aa658328 Mon Sep 17 00:00:00 2001 From: derfenix Date: Tue, 9 Sep 2025 21:16:14 +0300 Subject: [PATCH] feat: Save book file path, refactoring Close #1 --- src/application/loaders/fs.rs | 3 +- src/application/loaders/inotify.rs | 3 +- src/application/parsers/fb2.rs | 81 ++--------------- src/application/parsers/mod.rs | 77 +++++----------- src/application/parsers/zip.rs | 94 +++++++------------- src/application/services/books.rs | 9 +- src/domain/book.rs | 5 +- src/infrastructure/repository/inmem/books.rs | 3 + src/main.rs | 5 +- 9 files changed, 85 insertions(+), 195 deletions(-) diff --git a/src/application/loaders/fs.rs b/src/application/loaders/fs.rs index 31a1e1f..efa4ec3 100644 --- a/src/application/loaders/fs.rs +++ b/src/application/loaders/fs.rs @@ -11,6 +11,7 @@ use crate::domain::book::Book; use std::collections::VecDeque; use std::fs; use std::path::PathBuf; +use crate::application::parsers::Source; pub struct Loader { root: PathBuf, @@ -76,7 +77,7 @@ impl Iterator for LoaderIter { impl LoaderIter { fn parse_path(path: &PathBuf) -> Option> { - match parsers::parse(&path) { + match parsers::parse(Source{ path: (*path).clone(), reader: None}) { Ok(books) => return Some(books), Err(err) => { match err { diff --git a/src/application/loaders/inotify.rs b/src/application/loaders/inotify.rs index ef9b4cd..2f2baec 100644 --- a/src/application/loaders/inotify.rs +++ b/src/application/loaders/inotify.rs @@ -8,6 +8,7 @@ use std::collections::VecDeque; use std::ffi::OsStr; use std::io; use std::path::PathBuf; +use crate::application::parsers::Source; const BUFFER_SIZE: usize = 4096; @@ -72,7 +73,7 @@ impl<'a> LoaderIter<'a> { return None; } - match parsers::parse(&path) { + match parsers::parse(Source{path: path.clone(), reader: None}) { Ok(books) => { for book in &books { println!("{}", book); diff --git a/src/application/parsers/fb2.rs b/src/application/parsers/fb2.rs index 5c12d71..a92887a 100644 --- a/src/application/parsers/fb2.rs +++ b/src/application/parsers/fb2.rs @@ -3,82 +3,18 @@ use crate::domain::book::Book; use quick_xml::events::Event; use quick_xml::Reader; use std::fs::File; -use std::io::BufReader; +use std::io::{BufReader, Read, BufRead}; use std::path::Path; -/// Parses an XML file located at the given path and extracts information about books. -/// -/// This function processes the XML structure using a streaming XML reader to extract details -/// about books, including: -/// - Title -/// - Language -/// - Keywords -/// - Authors (including optional details like first name, last name, middle name, and nickname) -/// - Publication year -/// - Publisher -/// - Description -/// -/// # Arguments -/// -/// * `path` - A reference to the file path (`&Path`) of the XML file to parse. -/// -/// # Returns -/// -/// Returns a `Result` where: -/// - `Ok(Vec)` contains a vector of `Book` objects constructed from the parsed XML. -/// - `Err(String)` contains an error message if the parsing fails at any stage. -/// -/// # Errors -/// -/// Returns an error in the following scenarios: -/// - Unable to open the file specified by `path`. -/// - Malformed XML data in the file. -/// - Issues during data extraction, such as reading incomplete or invalid values. -/// -/// # Example -/// -/// ```ignore -/// use std::path::Path; -/// let path = Path::new("books.xml"); -/// match parse(&path) { -/// Ok(books) => { -/// for book in books { -/// println!("Book Title: {}", book.title); -/// } -/// }, -/// Err(err) => eprintln!("Failed to parse XML file: {}", err), -/// } -/// ``` -/// -/// # XML Structure -/// -/// The XML should follow a specific schema with the following relevant elements: -/// - ``: Title of the book. -/// - ``: Language of the book. -/// - ``: A comma-separated list of keywords/tags. -/// - ``: Contains subfields ``, ``, ``, or ``. -/// - ``: Year of publication. -/// - ``: Publisher's name. -/// - ``: Description or annotation of the book. -/// -/// # Notes -/// -/// - Author data is flexible; if a nickname exists, it will override other name details. -/// - The resulting `Vec` contains just one book object, as indicated in the implementation. -/// -/// # Dependencies -/// -/// This function depends on the following crates: -/// - `quick-xml`: For fast XML parsing. -/// - `uuid`: To generate a unique identifier for each book. -/// - `chrono`: To serialize the current timestamp as an RFC3339 string. -/// -/// # See Also -/// -/// `Book` structure, which represents the parsed data for an individual book. pub fn parse(path: &Path) -> Result, String> { let file = File::open(path).map_err(|e| e.to_string())?; - let mut reader = Reader::from_reader(BufReader::new(file)); + let reader = BufReader::new(file); + parse_with_reader(Box::new(reader), path) +} + + +pub fn parse_with_reader(reader: R, path: &Path) -> Result, String> { + let mut reader = Reader::from_reader(reader); let mut buf = Vec::new(); let mut in_title = false; @@ -87,6 +23,7 @@ pub fn parse(path: &Path) -> Result, String> { let mut in_description = false; let mut book = Book::new(); + book.source = path.into(); loop { match reader.read_event_into(&mut buf) { diff --git a/src/application/parsers/mod.rs b/src/application/parsers/mod.rs index 640bb74..77b51aa 100644 --- a/src/application/parsers/mod.rs +++ b/src/application/parsers/mod.rs @@ -1,17 +1,13 @@ -use std::fmt; use crate::domain::book::Book; +use std::fmt; +use std::fs::File; +use std::io::{BufReader, BufRead}; use std::path::PathBuf; -mod rs; mod fb2; +mod rs; mod zip; - -/// Error enumeration representing possible errors that can occur when parsing files. -/// -/// This enumeration has the following variants: -/// - `NotSupported`: Indicates that the file format or extension is not supported. -/// - `ParseError`: Contains a `String` representing the error message when a parsing process fails. #[derive(Debug)] pub enum Error { NotSupported, @@ -27,53 +23,28 @@ impl fmt::Display for Error { } } -/// Parses a file at the given path and attempts to convert its contents into a vector of `Book` objects. -/// -/// This function determines the file type based on its extension and delegates the parsing duties -/// to the appropriate module. Supported file extensions are: -/// - `.rs`: Processed by the `rs` module. -/// - `.fb2`: Processed by the `fb2` module. -/// - `.zip`: Processed by the `zip` module. -/// -/// If the file's extension is unsupported or missing, this function returns a `NotSupported` error. -/// -/// # Arguments -/// -/// * `path` - A reference to a `PathBuf` that represents the file path to be parsed. -/// -/// # Returns -/// -/// * `Ok(Vec)` - A vector of `Book` objects if the file was successfully parsed. -/// * `Err(Error)` - An error if the file could not be parsed, the parsing process encountered -/// an issue, or the file extension is not supported. -/// -/// # Errors -/// -/// - `Error::ParseError` - If the file parsing fails. -/// - `Error::NotSupported` - If the file's extension is unsupported or missing. -/// -/// # Examples -/// -/// ```ignore -/// use std::path::PathBuf; -/// -/// let path = PathBuf::from("example.rs"); -/// let books = parse(&path); -/// match books { -/// Ok(book_list) => println!("Parsed {} books.", book_list.len()), -/// Err(e) => println!("Failed to parse file: {:?}", e), -/// } -/// ``` -/// -/// # Notes -/// -/// Ensure that the appropriate parsers (`rs`, `fb2`, `zip`) are properly implemented -/// and handle all required logic for their respective file types to avoid unexpected errors. -pub fn parse(path: &PathBuf) -> Result, Error> { +pub struct Source { + pub path: PathBuf, + pub reader: Option>, // Сделал reader опциональным +} + +pub fn parse(source: Source) -> Result, Error> { + let path = &source.path; + match path.extension().and_then(|s| s.to_str()) { Some("rs") => rs::parse(path).map_err(Error::ParseError), - Some("fb2") => fb2::parse(path).map_err(Error::ParseError), - Some("zip") => zip::parse(path).map_err(Error::ParseError), + Some("fb2") => { + // Если reader предоставлен, используем его, иначе открываем файл + match source.reader { + Some(reader) => fb2::parse_with_reader(reader, path).map_err(Error::ParseError), + None => fb2::parse(path).map_err(Error::ParseError), + } + }, + Some("zip") => { + // ZIP всегда открывает файлы сам, так как нужен Seek + let file = File::open(path).map_err(|e| Error::ParseError(e.to_string()))?; + zip::parse_direct(file, path).map_err(Error::ParseError) + }, Some(_) | None => Err(Error::NotSupported), } } diff --git a/src/application/parsers/zip.rs b/src/application/parsers/zip.rs index 54adbce..14ec5ad 100644 --- a/src/application/parsers/zip.rs +++ b/src/application/parsers/zip.rs @@ -1,72 +1,44 @@ -use crate::application::parsers; +use crate::application::parsers::{parse as parse_source, Source}; use crate::domain::book::Book; use std::fs::File; -use std::io::BufReader; -use std::path::{Path, PathBuf}; +use std::io::{BufRead, Cursor, Read}; +use std::path::Path; use zip::ZipArchive; - -/// Parses a ZIP archive to extract a collection of `Book` objects. -/// -/// This function takes a path to a ZIP archive file, reads its contents, and processes -/// each file within the archive to extract `Book` objects using a custom parser. If any -/// errors occur during file access, archive extraction, or parsing, they are returned as -/// a `String`. On success, it returns a vector of `Book` objects contained in the archive. -/// -/// # Arguments -/// -/// * `path` - A reference to a `Path` representing the file system path to the ZIP archive. -/// -/// # Returns -/// -/// * `Ok(Vec)` - A vector containing the `Book` objects successfully parsed -/// from the files in the archive. -/// * `Err(String)` - An error message if any step in opening the file, reading the archive, -/// or parsing the files fails. -/// -/// # Errors -/// -/// This function returns an error in the following cases: -/// * If the ZIP file cannot be opened. -/// * If the ZIP archive cannot be read. -/// * If an individual file within the archive cannot be accessed. -/// * If the parsing of a file fails. -/// -/// # Example -/// -/// ```ignore -/// use std::path::Path; -/// use your_crate::parse; -/// -/// let path = Path::new("books_archive.zip"); -/// match parse(&path) { -/// Ok(books) => { -/// for book in books { -/// println!("Parsed book: {:?}", book); -/// } -/// } -/// Err(e) => eprintln!("Failed to parse books: {}", e), -/// } -/// ``` -/// -/// # Dependencies -/// -/// This function relies on the `ZipArchive` for working with ZIP files and a `parsers` -/// module for custom file parsing logic. -pub fn parse(path: &Path) -> Result, String> { - let file = File::open(path).map_err(|e| e.to_string())?; - let reader = BufReader::new(file); - let mut archive = ZipArchive::new(reader).map_err(|e| e.to_string())?; - +pub fn parse_direct( + reader: R, + path: &Path, +) -> Result, String> { + let archive = ZipArchive::new(reader).map_err(|e| e.to_string())?; let mut books: Vec = Vec::new(); - for i in 0..archive.len() { - let file = archive.by_index(i).map_err(|e| e.to_string())?; - let name = file.name().to_string(); + // Нам нужно знать путь к архиву для открытия файлов внутри него + let archive_path = path.to_path_buf(); - match parsers::parse(&PathBuf::from(name.to_lowercase())) { + for i in 0..archive.len() { + // Открываем архив заново для каждого файла, так как ZipArchive consumes files + let file = File::open(&archive_path).map_err(|e| e.to_string())?; + let mut archive = ZipArchive::new(file).map_err(|e| e.to_string())?; + let mut zip_file = archive.by_index(i).map_err(|e| e.to_string())?; + + let name = zip_file.name().to_string(); + let file_path = path.to_path_buf().join("#").join(&name); + + // Читаем содержимое файла в память и оборачиваем в BufReader + let mut contents = Vec::new(); + zip_file + .read_to_end(&mut contents) + .map_err(|e| e.to_string())?; + + let file_reader: Box = Box::new(Cursor::new(contents)); // Изменили на BufRead + let source = Source { + path: file_path, + reader: Some(file_reader), + }; + + match parse_source(source) { Ok(new_books) => books.extend(new_books), - Err(e) => return Err(e.to_string()), + Err(e) => return Err(format!("Error parsing {}: {}", name, e)), } } diff --git a/src/application/services/books.rs b/src/application/services/books.rs index 8243ddf..3805832 100644 --- a/src/application/services/books.rs +++ b/src/application/services/books.rs @@ -68,13 +68,14 @@ impl Books { } pub fn add_books_from_path(&mut self) { - let iter = fs::Loader::new(PathBuf::from(&self.root)); + let books = fs::Loader::new(PathBuf::from(&self.root)) + .into_iter() + .map(|mut book| {book.source = book.source.strip_prefix(&self.root).unwrap().into(); book}) + .collect(); match self.repo.lock() { Ok(mut repo) => { - for book in iter { - repo.add(book); - } + repo.bulk_add(books); } Err(err) => eprintln!("{}", err), } diff --git a/src/domain/book.rs b/src/domain/book.rs index 4c938da..95e8725 100644 --- a/src/domain/book.rs +++ b/src/domain/book.rs @@ -1,5 +1,6 @@ use crate::domain::author; use std::fmt; +use std::path::PathBuf; use chrono::{DateTime, Utc}; use uuid::Uuid; @@ -14,6 +15,7 @@ pub struct Book { pub published_at: String, pub publisher: String, pub updated: DateTime, + pub source: PathBuf } impl Book { @@ -28,6 +30,7 @@ impl Book { published_at: "".to_string(), publisher: "".to_string(), updated: chrono::Utc::now(), + source: PathBuf::new() } } @@ -60,7 +63,7 @@ impl fmt::Display for Book { .collect::>() .join(";"); - write!(f, "{} by {}", self.title, authors) + write!(f, "{} by {} at {}", self.title, authors, self.source.to_str().unwrap()) } } diff --git a/src/infrastructure/repository/inmem/books.rs b/src/infrastructure/repository/inmem/books.rs index c998064..bcb66ce 100644 --- a/src/infrastructure/repository/inmem/books.rs +++ b/src/infrastructure/repository/inmem/books.rs @@ -74,6 +74,7 @@ struct Book { published_at: String, publisher: String, updated: String, + source: String, } impl From for Book { @@ -88,6 +89,7 @@ impl From for Book { published_at: book.published_at, publisher: book.publisher, updated: book.updated.to_rfc3339(), + source: book.source.as_os_str().to_str().unwrap().to_string(), } } } @@ -114,6 +116,7 @@ impl Into for Book { published_at: self.published_at, publisher: self.publisher, updated: chrono::DateTime::parse_from_rfc3339(&self.updated).unwrap_or_default().to_utc(), + source: self.source.into(), } } } diff --git a/src/main.rs b/src/main.rs index b9cbc15..c6a7f4b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,7 +13,7 @@ fn main() { id: None, name: None, //Some("rs".to_string()), }), - title: Some("Сборник".to_string()), + title: Some("пов".to_string()), language: None, description: None, tags: None, @@ -27,7 +27,8 @@ fn main() { if let Some(book) = res.entry.iter().next() { let book = app.repo.lock().unwrap().get(book.id.to_string().clone()); - println!("{:?}", book.unwrap().author); + println!("{:?}", book.clone().unwrap().author); + println!("{}", book.unwrap()); } // sleep(Duration::new(10, 0));