Compare commits

...

6 Commits

Author SHA1 Message Date
ec8cc12bf8 fix: Minor bugfix 2025-09-09 22:50:42 +03:00
4a7ed0974c feat: Deduplicate fb2 authors 2025-09-09 22:50:31 +03:00
5dc2cb112d doc: Regenerate AI docs for parsers, cleanup 2025-09-09 21:25:31 +03:00
00dc0e39b9 feat: Save book file path, refactoring
Close #1
2025-09-09 21:16:14 +03:00
18fa38120b ref: Optimize fb2 parser 2025-09-09 17:42:38 +03:00
bf81b5d8f4 ref: Improve Book instantiation 2025-09-09 17:39:59 +03:00
11 changed files with 368 additions and 259 deletions

View File

@@ -11,6 +11,7 @@ use crate::domain::book::Book;
use std::collections::VecDeque; use std::collections::VecDeque;
use std::fs; use std::fs;
use std::path::PathBuf; use std::path::PathBuf;
use crate::application::parsers::Source;
pub struct Loader { pub struct Loader {
root: PathBuf, root: PathBuf,
@@ -76,7 +77,7 @@ impl Iterator for LoaderIter {
impl LoaderIter { impl LoaderIter {
fn parse_path(path: &PathBuf) -> Option<Vec<Book>> { fn parse_path(path: &PathBuf) -> Option<Vec<Book>> {
match parsers::parse(&path) { match parsers::parse(Source{ path: (*path).clone(), reader: None}) {
Ok(books) => return Some(books), Ok(books) => return Some(books),
Err(err) => { Err(err) => {
match err { match err {

View File

@@ -8,6 +8,7 @@ use std::collections::VecDeque;
use std::ffi::OsStr; use std::ffi::OsStr;
use std::io; use std::io;
use std::path::PathBuf; use std::path::PathBuf;
use crate::application::parsers::Source;
const BUFFER_SIZE: usize = 4096; const BUFFER_SIZE: usize = 4096;
@@ -72,7 +73,7 @@ impl<'a> LoaderIter<'a> {
return None; return None;
} }
match parsers::parse(&path) { match parsers::parse(Source{path: path.clone(), reader: None}) {
Ok(books) => { Ok(books) => {
for book in &books { for book in &books {
println!("{}", book); println!("{}", book);

View File

@@ -1,107 +1,134 @@
use quick_xml::events::Event;
use quick_xml::Reader;
use uuid::Uuid;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
use crate::domain::author; use crate::domain::author;
use crate::domain::book::Book; use crate::domain::book::Book;
use quick_xml::events::Event;
use quick_xml::Reader;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::Path;
/// Parses an XML file located at the given path and extracts information about books. /// Parses the content of a file at the given path to extract a list of `Book` objects.
///
/// This function processes the XML structure using a streaming XML reader to extract details
/// about books, including:
/// - Title
/// - Language
/// - Keywords
/// - Authors (including optional details like first name, last name, middle name, and nickname)
/// - Publication year
/// - Publisher
/// - Description
/// ///
/// # Arguments /// # Arguments
/// /// * `path` - A reference to a `Path` representing the location of the file to be parsed.
/// * `path` - A reference to the file path (`&Path`) of the XML file to parse.
/// ///
/// # Returns /// # Returns
/// /// * `Ok(Vec<Book>)` - A vector containing the list of `Book` objects on successful parsing.
/// Returns a `Result` where: /// * `Err(String)` - A string containing an error message if the file could not be opened or parsing failed.
/// - `Ok(Vec<Book>)` contains a vector of `Book` objects constructed from the parsed XML.
/// - `Err(String)` contains an error message if the parsing fails at any stage.
/// ///
/// # Errors /// # Errors
/// /// This function returns an error in the following cases:
/// Returns an error in the following scenarios: /// * If the file at the specified path cannot be opened, the corresponding IO error is converted to a string and returned.
/// - Unable to open the file specified by `path`. /// * If the parsing logic inside `parse_with_reader` fails, the returned error from that function is propagated.
/// - Malformed XML data in the file.
/// - Issues during data extraction, such as reading incomplete or invalid values.
/// ///
/// # Example /// # Example
///
/// ```ignore /// ```ignore
/// use std::path::Path; /// use std::path::Path;
///
/// let path = Path::new("books.txt");
/// let result = parse(path);
///
/// match result {
/// Ok(books) => println!("Parsed {} books", books.len()),
/// Err(err) => eprintln!("Error parsing books: {}", err),
/// }
/// ```
pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
let file = File::open(path).map_err(|e| e.to_string())?;
let reader = BufReader::new(file);
parse_with_reader(Box::new(reader), path)
}
/// Parses an XML document using a buffered reader and extracts book information.
///
/// This function reads an XML document containing book details and maps it to a `Vec<Book>`.
/// It supports parsing various elements such as title, language, keywords, author details,
/// publisher, description, and publication year. Errors during parsing are propagated as strings.
///
/// # Type Parameters
/// - `R`: A type that implements the [`BufRead`](std::io::BufRead) trait for buffered input.
///
/// # Arguments
/// - `reader`: A buffered reader instance (e.g., [`std::io::BufReader`]) to read the XML content.
/// - `path`: A reference to a [`Path`](std::path::Path) representing the source file or data for the book.
///
/// # Returns
/// - `Ok(Vec<Book>)` if parsing was successful, containing a vector of parsed `Book` results.
/// - `Err(String)` if an error occurred during parsing, containing a string description of the error.
///
/// # XML Parsing Details
/// The following XML tags are processed:
/// - `<book-title>`: Extracted as the book's title.
/// - `<lang>`: Extracted as the book's language.
/// - `<keywords>`: Split by commas into individual tags for the book.
/// - `<annotation>`: Extracted into a multiline description for the book.
/// - `<author>`: Processes child elements such as `<first-name>`, `<last-name>`, `<middle-name>`,
/// and `<nickname>` to construct an author's full details.
/// - `<year>`: Extracted as the book's publication year.
/// - `<publisher>`: Extracted as the book's publisher.
///
/// # Behavior
/// - Multiple authors are supported through repeated `<author>` tags.
/// - Whitespace is trimmed from all parsed text.
/// - If no content is available for certain fields (e.g., `<last-name>`), they may remain `None`
/// or their equivalent default.
///
/// # Errors
/// This function returns an error in the following cases:
/// - The XML data contains malformed or invalid content.
/// - Unexpected EOF is encountered during XML parsing.
/// - Any other IO or XML parsing errors occur.
///
/// # Example
/// ```ignore
/// use std::fs::File;
/// use std::io::BufReader;
/// use std::path::Path;
///
/// let file = File::open("books.xml").expect("Failed to open file");
/// let reader = BufReader::new(file);
/// let path = Path::new("books.xml"); /// let path = Path::new("books.xml");
/// match parse(&path) { ///
/// let result = parse_with_reader(reader, &path);
/// match result {
/// Ok(books) => { /// Ok(books) => {
/// for book in books { /// for book in books {
/// println!("Book Title: {}", book.title); /// println!("Book title: {}", book.title);
/// } /// }
/// }, /// }
/// Err(err) => eprintln!("Failed to parse XML file: {}", err), /// Err(e) => eprintln!("Error parsing XML: {}", e),
/// } /// }
/// ``` /// ```
/// ///
/// # XML Structure /// # Dependencies
/// /// This function relies on an external XML parsing library capable of handling streaming XML,
/// The XML should follow a specific schema with the following relevant elements: /// such as `quick-xml`. The `Book` and `author::Author` structs, along with their methods (e.g., `new`),
/// - `<book-title>`: Title of the book. /// must be defined elsewhere in the codebase.
/// - `<lang>`: Language of the book.
/// - `<keywords>`: A comma-separated list of keywords/tags.
/// - `<author>`: Contains subfields `<first-name>`, `<last-name>`, `<middle-name>`, or `<nickname>`.
/// - `<year>`: Year of publication.
/// - `<publisher>`: Publisher's name.
/// - `<annotation>`: Description or annotation of the book.
/// ///
/// # Notes /// # Notes
/// /// - The function assumes that the XML tags match the expected structure. Unrecognized tags are ignored.
/// - Author data is flexible; if a nickname exists, it will override other name details. /// - It is assumed that the user will extend support for future XML element types as needed.
/// - The resulting `Vec<Book>` contains just one book object, as indicated in the implementation. pub fn parse_with_reader<R: BufRead>(reader: R, path: &Path) -> Result<Vec<Book>, String> {
/// let mut reader = Reader::from_reader(reader);
/// # Dependencies
///
/// This function depends on the following crates:
/// - `quick-xml`: For fast XML parsing.
/// - `uuid`: To generate a unique identifier for each book.
/// - `chrono`: To serialize the current timestamp as an RFC3339 string.
///
/// # See Also
///
/// `Book` structure, which represents the parsed data for an individual book.
pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
let file = File::open(path).map_err(|e| e.to_string())?;
let mut reader = Reader::from_reader(BufReader::new(file));
let mut buf = Vec::new(); let mut buf = Vec::new();
let mut title = String::new();
let mut lang = String::new();
let mut description = String::new();
let mut keywords = Vec::new();
let mut authors = Vec::new();
let mut published_at = String::new();
let mut publisher = String::new();
let mut in_title = false; let mut in_title = false;
let mut in_lang = false; let mut in_lang = false;
let mut in_keywords = false; let mut in_keywords = false;
let mut in_description = false; let mut in_description = false;
let mut book = Book::new();
book.source = path.into();
loop { loop {
match reader.read_event_into(&mut buf) { match reader.read_event_into(&mut buf) {
Ok(Event::Start(e)) => match e.name().as_ref() { Ok(Event::Start(e)) => match e.name().as_ref() {
b"book-title" => in_title = true, b"book-title" => in_title = true,
b"lang" => in_lang = true, b"lang" => in_lang = true,
b"keywords" => in_keywords = true, b"keywords" => in_keywords = true,
b"annotation" => { in_description = true; description.clear(); }, b"annotation" => {
in_description = true;
book.description.clear();
}
b"author" => { b"author" => {
let mut buf_author = Vec::new(); let mut buf_author = Vec::new();
let mut first_name = String::new(); let mut first_name = String::new();
@@ -113,30 +140,56 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
match reader.read_event_into(&mut buf_author) { match reader.read_event_into(&mut buf_author) {
Ok(Event::Start(c)) => match c.name().as_ref() { Ok(Event::Start(c)) => match c.name().as_ref() {
b"first-name" => { b"first-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) { if let Ok(Event::Text(t)) =
first_name = t.xml_content().map_err(|e| e.to_string())?.into_owned(); reader.read_event_into(&mut buf_author)
{
first_name = t
.xml_content()
.map_err(|e| e.to_string())?
.into_owned();
} }
} }
b"last-name" => { b"last-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) { if let Ok(Event::Text(t)) =
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned(); reader.read_event_into(&mut buf_author)
if !val.is_empty() { last_name = Some(val); } {
let val = t
.xml_content()
.map_err(|e| e.to_string())?
.into_owned();
if !val.is_empty() {
last_name = Some(val);
}
} }
} }
b"middle-name" => { b"middle-name" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) { if let Ok(Event::Text(t)) =
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned(); reader.read_event_into(&mut buf_author)
if !val.is_empty() { middle_name = Some(val); } {
let val = t
.xml_content()
.map_err(|e| e.to_string())?
.into_owned();
if !val.is_empty() {
middle_name = Some(val);
}
} }
} }
b"nickname" => { b"nickname" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf_author) { if let Ok(Event::Text(t)) =
let val = t.xml_content().map_err(|e| e.to_string())?.into_owned(); reader.read_event_into(&mut buf_author)
if !val.is_empty() { nickname = Some(val); } {
let val = t
.xml_content()
.map_err(|e| e.to_string())?
.into_owned();
if !val.is_empty() {
nickname = Some(val);
}
} }
} }
_ => {} _ => {}
} },
Ok(Event::End(c)) if c.name().as_ref() == b"author" => break, Ok(Event::End(c)) if c.name().as_ref() == b"author" => break,
Ok(Event::Eof) => break, Ok(Event::Eof) => break,
_ => {} _ => {}
@@ -152,41 +205,42 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
author.last_name = last_name.clone(); author.last_name = last_name.clone();
author.middle_name = middle_name.clone(); author.middle_name = middle_name.clone();
} }
authors.push(author); book.author.push(author);
} }
b"year" => { b"year" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) { if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
published_at = t.xml_content().map_err(|e| e.to_string())?.into_owned(); book.published_at =
t.xml_content().map_err(|e| e.to_string())?.into_owned();
} }
} }
b"publisher" => { b"publisher" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) { if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
publisher = t.xml_content().map_err(|e| e.to_string())?.into_owned(); book.publisher = t.xml_content().map_err(|e| e.to_string())?.into_owned();
} }
} }
_ => {} _ => {}
}, },
Ok(Event::Text(t)) if in_title => { Ok(Event::Text(t)) if in_title => {
title = t.xml_content().map_err(|e| e.to_string())?.into_owned(); book.title = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}, }
Ok(Event::Text(t)) if in_lang => { Ok(Event::Text(t)) if in_lang => {
lang = t.xml_content().map_err(|e| e.to_string())?.into_owned(); book.language = t.xml_content().map_err(|e| e.to_string())?.into_owned();
}, }
Ok(Event::Text(t)) if in_keywords => { Ok(Event::Text(t)) if in_keywords => {
let raw = t.xml_content().map_err(|e| e.to_string())?; let raw = t.xml_content().map_err(|e| e.to_string())?;
for tag in raw.split(',').map(str::trim).filter(|s| !s.is_empty()) { for tag in raw.split(',').map(str::trim).filter(|s| !s.is_empty()) {
keywords.push(tag.to_string()); book.tags.push(tag.to_string());
}
} }
},
Ok(Event::Text(t)) if in_description => { Ok(Event::Text(t)) if in_description => {
let txt = t.xml_content().map_err(|e| e.to_string())?; let txt = t.xml_content().map_err(|e| e.to_string())?;
if !txt.trim().is_empty() { if !txt.trim().is_empty() {
if !description.is_empty() { if !book.description.is_empty() {
description.push(' '); book.description.push(' ');
}
book.description.push_str(&txt);
} }
description.push_str(&txt);
} }
},
Ok(Event::End(e)) => match e.name().as_ref() { Ok(Event::End(e)) => match e.name().as_ref() {
b"book-title" => in_title = false, b"book-title" => in_title = false,
b"lang" => in_lang = false, b"lang" => in_lang = false,
@@ -201,15 +255,7 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
buf.clear(); buf.clear();
} }
Ok(vec![Book{ book.author.dedup_by(|a, b| a.uniq_id() == b.uniq_id());
id: Uuid::new_v4(),
title, Ok(vec![book])
author: authors,
language: lang,
description,
tags: keywords,
published_at,
publisher,
updated: chrono::Utc::now().to_rfc3339(),
}])
} }

View File

@@ -1,17 +1,13 @@
use std::fmt;
use crate::domain::book::Book; use crate::domain::book::Book;
use std::fmt;
use std::fs::File;
use std::io::BufRead;
use std::path::PathBuf; use std::path::PathBuf;
mod rs;
mod fb2; mod fb2;
mod rs;
mod zip; mod zip;
/// Error enumeration representing possible errors that can occur when parsing files.
///
/// This enumeration has the following variants:
/// - `NotSupported`: Indicates that the file format or extension is not supported.
/// - `ParseError`: Contains a `String` representing the error message when a parsing process fails.
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
NotSupported, NotSupported,
@@ -27,53 +23,129 @@ impl fmt::Display for Error {
} }
} }
/// Parses a file at the given path and attempts to convert its contents into a vector of `Book` objects. /// Represents a source file or location that can be read from.
/// ///
/// This function determines the file type based on its extension and delegates the parsing duties /// The `Source` struct encapsulates information about a file or source location, including its
/// to the appropriate module. Supported file extensions are: /// path and an optional reader for reading the source's contents. This allows for flexibility
/// - `.rs`: Processed by the `rs` module. /// in creating `Source` instances where the reading mechanism may be deferred or not immediately available.
/// - `.fb2`: Processed by the `fb2` module.
/// - `.zip`: Processed by the `zip` module.
/// ///
/// If the file's extension is unsupported or missing, this function returns a `NotSupported` error. /// # Fields
/// ///
/// # Arguments /// * `path` - A `PathBuf` representing the file system path to the source. This path must be valid.
/// /// * `reader` - An optional boxed dynamic trait object (`Option<Box<dyn BufRead>>`) that represents
/// * `path` - A reference to a `PathBuf` that represents the file path to be parsed. /// a buffered reader capable of reading the contents of the source. If `None`, no reader
/// /// is currently defined or available.
/// # Returns
///
/// * `Ok(Vec<Book>)` - A vector of `Book` objects if the file was successfully parsed.
/// * `Err(Error)` - An error if the file could not be parsed, the parsing process encountered
/// an issue, or the file extension is not supported.
///
/// # Errors
///
/// - `Error::ParseError` - If the file parsing fails.
/// - `Error::NotSupported` - If the file's extension is unsupported or missing.
/// ///
/// # Examples /// # Examples
/// ///
/// ```ignore /// ```ignore
/// use std::fs::File;
/// use std::io::{BufReader, BufRead};
/// use std::path::PathBuf; /// use std::path::PathBuf;
/// ///
/// let path = PathBuf::from("example.rs"); /// let path = PathBuf::from("example.txt");
/// let books = parse(&path); /// let file = File::open(&path).expect("Failed to open file");
/// match books { /// let reader = BufReader::new(file);
/// Ok(book_list) => println!("Parsed {} books.", book_list.len()), ///
/// Err(e) => println!("Failed to parse file: {:?}", e), /// let source = Source {
/// path,
/// reader: Some(Box::new(reader)),
/// };
/// ```
///
/// In the above example, a `Source` instance is created with a valid path and a buffered reader.
///
/// ```ignore
/// use std::path::PathBuf;
///
/// let source = Source {
/// path: PathBuf::from("example.txt"),
/// reader: None,
/// };
/// ```
///
/// In this example, a `Source` instance is created with a path but no reader. This might be useful
/// for cases where reading is deferred to a later point in time.
pub struct Source {
pub path: PathBuf,
pub reader: Option<Box<dyn BufRead>>, // Сделал reader опциональным
}
/// Parses a `Source` to extract a vector of `Book` objects.
///
/// This function handles different file types based on the file's extension.
/// Supported file types and their parsing strategies include:
/// - `.rs`: Uses the `rs::parse` function to handle Rust source files.
/// - `.fb2`: Handles FictionBook 2 (FB2) files. This supports:
/// - Using a provided `reader` (if `source.reader` is given).
/// - Opening and parsing directly from the file otherwise.
/// - `.zip`: Handles ZIP archives. Always opens the file itself as ZIP parsing
/// requires the file to implement the `Seek` trait.
///
/// If the file extension is unsupported or undefined, the function returns
/// an `Error::NotSupported`.
///
/// # Parameters
///
/// - `source`: A `Source` object containing the file path and an optional `reader`
/// for parsing.
///
/// # Returns
///
/// - `Ok(Vec<Book>)`: If the parsing is successful, returns a vector of `Book` objects.
/// - `Err(Error)`: If an error occurs during parsing or if the file type is unsupported.
///
/// # Errors
///
/// - Returns `Error::ParseError` if there is an issue reading or parsing the file.
/// - Returns `Error::NotSupported` if the file type is unsupported.
///
/// # Examples
///
/// ```ignore
/// let source = Source {
/// path: PathBuf::from("example.fb2"),
/// reader: None,
/// };
/// let result = parse(source);
/// match result {
/// Ok(books) => println!("Parsed successfully: {:?}", books),
/// Err(error) => eprintln!("Failed to parse: {:?}", error),
/// } /// }
/// ``` /// ```
/// ///
/// # Notes /// # Notes
/// ///
/// Ensure that the appropriate parsers (`rs`, `fb2`, `zip`) are properly implemented /// - For `.fb2` files, if a `reader` is provided in the `Source` object, it will
/// and handle all required logic for their respective file types to avoid unexpected errors. /// be used for parsing. Otherwise, the function will open the file and parse it.
pub fn parse(path: &PathBuf) -> Result<Vec<Book>, Error> { /// - `.zip` files require the file to support the `Seek` trait and will always
/// be opened directly from the file system.
///
/// # File Types
///
/// - `.rs`: Rust source files.
/// - `.fb2`: FictionBook 2 files.
/// - `.zip`: ZIP archives.
///
/// # See Also
///
/// - [`rs::parse`](#)
/// - [`fb2::parse`](#)
/// - [`fb2::parse_with_reader`](#)
/// - [`zip::parse`](#)
pub fn parse(source: Source) -> Result<Vec<Book>, Error> {
let path = &source.path;
match path.extension().and_then(|s| s.to_str()) { match path.extension().and_then(|s| s.to_str()) {
Some("rs") => rs::parse(path).map_err(Error::ParseError), Some("rs") => rs::parse(path).map_err(Error::ParseError),
Some("fb2") => fb2::parse(path).map_err(Error::ParseError), Some("fb2") => match source.reader {
Some("zip") => zip::parse(path).map_err(Error::ParseError), Some(reader) => fb2::parse_with_reader(reader, path).map_err(Error::ParseError),
None => fb2::parse(path).map_err(Error::ParseError),
},
Some("zip") => {
let file = File::open(path).map_err(|e| Error::ParseError(e.to_string()))?;
zip::parse(file, path).map_err(Error::ParseError)
}
Some(_) | None => Err(Error::NotSupported), Some(_) | None => Err(Error::NotSupported),
} }
} }

View File

@@ -2,44 +2,6 @@ use crate::domain::author::Author;
use crate::domain::book::Book; use crate::domain::book::Book;
use std::path::PathBuf; use std::path::PathBuf;
/// Parses a given file path into a vector containing a `Book` object.
///
/// # Arguments
///
/// * `path` - A reference to a `PathBuf` that represents the file path to be parsed.
///
/// # Returns
///
/// * `Result<Vec<Book>, String>` -
/// - On success, returns a `Vec<Book>` with a single `Book` object populated based on the input path.
/// - On failure, returns an error `String` describing the issue.
///
/// The function performs the following steps:
///
/// 1. Creates a new instance of `Book`.
/// 2. Sets the `title` of the `Book` to the string representation of the input path.
/// 3. Creates a new instance of `Author`.
/// 4. Sets the `first_name` of the `Author` to the string representation of the file extension of `path`.
/// 5. Pushes the `Author` into the `author` vector of the `Book`.
/// 6. Returns a `Vec<Book>` containing the newly created `Book`.
///
/// # Panics
///
/// The function will panic if the input path does not contain a file extension
/// (i.e., when `path.extension()` returns `None`).
///
/// # Example
///
/// ```ignore
/// use std::path::PathBuf;
///
/// let path = PathBuf::from("example.txt");
/// let books = parse(&path).unwrap();
///
/// assert_eq!(books.len(), 1);
/// assert_eq!(books[0].title, "example.txt");
/// assert_eq!(books[0].author[0].first_name, "txt");
/// ```
pub fn parse(path: &PathBuf) -> Result<Vec<Book>, String> { pub fn parse(path: &PathBuf) -> Result<Vec<Book>, String> {
let mut book = Book::new(); let mut book = Book::new();
@@ -48,6 +10,7 @@ pub fn parse(path: &PathBuf) -> Result<Vec<Book>, String> {
let mut author = Author::new(); let mut author = Author::new();
author.first_name = path.extension().unwrap().to_string_lossy().to_string(); author.first_name = path.extension().unwrap().to_string_lossy().to_string();
book.author.push(author); book.author.push(author);
book.source = path.into();
return Ok(vec![ book]); return Ok(vec![ book]);
} }

View File

@@ -1,72 +1,102 @@
use crate::application::parsers; use crate::application::parsers::{parse as parse_source, Source};
use crate::domain::book::Book; use crate::domain::book::Book;
use std::fs::File; use std::fs::File;
use std::io::BufReader; use std::io::{BufRead, Cursor, Read};
use std::path::{Path, PathBuf}; use std::path::Path;
use zip::ZipArchive; use zip::ZipArchive;
/// Parses a ZIP archive to extract and process book data.
/// Parses a ZIP archive to extract a collection of `Book` objects.
/// ///
/// This function takes a path to a ZIP archive file, reads its contents, and processes /// This function reads a given ZIP archive, processes each file inside, and attempts to parse
/// each file within the archive to extract `Book` objects using a custom parser. If any /// them into a vector of `Book` objects. Each file in the archive is expected to be in a format
/// errors occur during file access, archive extraction, or parsing, they are returned as /// compatible with the `parse_source` function.
/// a `String`. On success, it returns a vector of `Book` objects contained in the archive. ///
/// # Type Parameters
/// - `R`: A type that implements both [`Read`](std::io::Read) and [`Seek`](std::io::Seek), which allows
/// reading and seeking operations on the input file.
/// ///
/// # Arguments /// # Arguments
/// /// - `reader`: A reader implementing `Read` and `Seek`, used to access the ZIP archive.
/// * `path` - A reference to a `Path` representing the file system path to the ZIP archive. /// - `path`: A reference to a [`Path`](std::path::Path) representing the file system path to the ZIP archive.
/// ///
/// # Returns /// # Returns
/// /// - `Ok(Vec<Book>)`: If parsing is successful, returns a vector of `Book` objects extracted from
/// * `Ok(Vec<Book>)` - A vector containing the `Book` objects successfully parsed /// the ZIP archive.
/// from the files in the archive. /// - `Err(String)`: If an error occurs, returns a descriptive error message as a `String`.
/// * `Err(String)` - An error message if any step in opening the file, reading the archive,
/// or parsing the files fails.
/// ///
/// # Errors /// # Errors
/// /// The function may return an error in the following cases:
/// This function returns an error in the following cases: /// - If the ZIP archive cannot be opened.
/// * If the ZIP file cannot be opened. /// - If the function encounters issues reading from the ZIP archive or its files.
/// * If the ZIP archive cannot be read. /// - If a file within the archive cannot be parsed into `Book` objects.
/// * If an individual file within the archive cannot be accessed.
/// * If the parsing of a file fails.
/// ///
/// # Example /// # Example
///
/// ```ignore /// ```ignore
/// use std::fs::File;
/// use std::path::Path; /// use std::path::Path;
/// use your_crate::parse; /// use your_crate::parse_direct;
/// ///
/// let path = Path::new("books_archive.zip"); /// let file = File::open("test_books.zip").expect("Failed to open file");
/// match parse(&path) { /// let path = Path::new("test_books.zip");
/// Ok(books) => { ///
/// for book in books { /// match parse_direct(file, &path) {
/// println!("Parsed book: {:?}", book); /// Ok(books) => println!("Parsed {} books", books.len()),
/// } /// Err(e) => eprintln!("Error parsing archive: {}", e),
/// }
/// Err(e) => eprintln!("Failed to parse books: {}", e),
/// } /// }
/// ``` /// ```
/// ///
/// # Dependencies /// # Implementation Details
/// - The function processes each file inside the archive by reopening the archive for each iteration. This
/// ensures the `ZipArchive` does not consume other files during iteration.
/// - File contents are read into memory as raw bytes and wrapped in a [`BufReader`](std::io::BufRead) for further processing.
/// - Each file's path is dynamically constructed to include a placeholder `#` directory and its original name.
/// - The main parsing is delegated to the `parse_source` function, which returns either parsed books or an error.
/// ///
/// This function relies on the `ZipArchive` for working with ZIP files and a `parsers` /// # Notes
/// module for custom file parsing logic. /// - As the ZIP archive is re-opened for each file, the performance may be impacted for large archives.
pub fn parse(path: &Path) -> Result<Vec<Book>, String> { /// - The `parse_source` function is expected to be defined elsewhere in the codebase to handle the specific parsing logic.
let file = File::open(path).map_err(|e| e.to_string())?; ///
let reader = BufReader::new(file); /// # Dependencies
let mut archive = ZipArchive::new(reader).map_err(|e| e.to_string())?; /// - This function depends on the `zip` crate to handle ZIP file operations and a custom `Book` and `Source` structure
/// for processing individual entries.
///
/// # See Also
/// - [`ZipArchive`](zip::read::ZipArchive): Used to interact with the ZIP archive.
/// - `parse_source`: Required function that processes individual entries to extract `Book` data.
pub fn parse<R: Read + std::io::Seek + 'static>(
reader: R,
path: &Path,
) -> Result<Vec<Book>, String> {
let archive = ZipArchive::new(reader).map_err(|e| e.to_string())?;
let mut books: Vec<Book> = Vec::new(); let mut books: Vec<Book> = Vec::new();
for i in 0..archive.len() { // Нам нужно знать путь к архиву для открытия файлов внутри него
let file = archive.by_index(i).map_err(|e| e.to_string())?; let archive_path = path.to_path_buf();
let name = file.name().to_string();
match parsers::parse(&PathBuf::from(name.to_lowercase())) { for i in 0..archive.len() {
// Открываем архив заново для каждого файла, так как ZipArchive consumes files
let file = File::open(&archive_path).map_err(|e| e.to_string())?;
let mut archive = ZipArchive::new(file).map_err(|e| e.to_string())?;
let mut zip_file = archive.by_index(i).map_err(|e| e.to_string())?;
let name = zip_file.name().to_string();
let file_path = path.to_path_buf().join("#").join(&name);
// Читаем содержимое файла в память и оборачиваем в BufReader
let mut contents = Vec::new();
zip_file
.read_to_end(&mut contents)
.map_err(|e| e.to_string())?;
let file_reader: Box<dyn BufRead> = Box::new(Cursor::new(contents)); // Изменили на BufRead
let source = Source {
path: file_path,
reader: Some(file_reader),
};
match parse_source(source) {
Ok(new_books) => books.extend(new_books), Ok(new_books) => books.extend(new_books),
Err(e) => return Err(e.to_string()), Err(e) => return Err(format!("Error parsing {}: {}", name, e)),
} }
} }

View File

@@ -68,13 +68,17 @@ impl Books {
} }
pub fn add_books_from_path(&mut self) { pub fn add_books_from_path(&mut self) {
let iter = fs::Loader::new(PathBuf::from(&self.root)); let books = fs::Loader::new(PathBuf::from(&self.root))
.into_iter()
.map(|mut book| {match book.source.strip_prefix(&self.root) {
Ok(path) => book.source = path.to_path_buf(),
Err(err) => eprintln!("strip source prefix: {}", err)
}; book})
.collect();
match self.repo.lock() { match self.repo.lock() {
Ok(mut repo) => { Ok(mut repo) => {
for book in iter { repo.bulk_add(books);
repo.add(book);
}
} }
Err(err) => eprintln!("{}", err), Err(err) => eprintln!("{}", err),
} }

View File

@@ -1,5 +1,7 @@
use crate::domain::author; use crate::domain::author;
use std::fmt; use std::fmt;
use std::path::PathBuf;
use chrono::{DateTime, Utc};
use uuid::Uuid; use uuid::Uuid;
#[derive(Clone, PartialEq, Eq)] #[derive(Clone, PartialEq, Eq)]
@@ -12,7 +14,8 @@ pub struct Book {
pub tags: Vec<String>, pub tags: Vec<String>,
pub published_at: String, pub published_at: String,
pub publisher: String, pub publisher: String,
pub updated: String, pub updated: DateTime<Utc>,
pub source: PathBuf
} }
impl Book { impl Book {
@@ -26,7 +29,8 @@ impl Book {
tags: vec![], tags: vec![],
published_at: "".to_string(), published_at: "".to_string(),
publisher: "".to_string(), publisher: "".to_string(),
updated: "".to_string(), updated: chrono::Utc::now(),
source: PathBuf::new()
} }
} }
@@ -59,7 +63,7 @@ impl fmt::Display for Book {
.collect::<Vec<_>>() .collect::<Vec<_>>()
.join(";"); .join(";");
write!(f, "{} by {}", self.title, authors) write!(f, "{} by {} at {}", self.title, authors, self.source.to_str().unwrap())
} }
} }

View File

@@ -90,7 +90,7 @@ impl From<&Book> for Entry {
Entry{ Entry{
title: book.title.clone(), title: book.title.clone(),
id: book.id.to_string().clone(), id: book.id.to_string().clone(),
updated: book.updated.clone(), updated: book.updated.to_rfc3339(),
author: book.author.clone().into_iter().map(|a| a.into()).collect(), author: book.author.clone().into_iter().map(|a| a.into()).collect(),
language: (!book.language.is_empty()).then(|| book.language.clone()), language: (!book.language.is_empty()).then(|| book.language.clone()),
issued: (!book.published_at.is_empty()).then(|| book.published_at.clone()), issued: (!book.published_at.is_empty()).then(|| book.published_at.clone()),

View File

@@ -74,6 +74,7 @@ struct Book {
published_at: String, published_at: String,
publisher: String, publisher: String,
updated: String, updated: String,
source: String,
} }
impl From<book::Book> for Book { impl From<book::Book> for Book {
@@ -87,7 +88,8 @@ impl From<book::Book> for Book {
tags: book.tags, tags: book.tags,
published_at: book.published_at, published_at: book.published_at,
publisher: book.publisher, publisher: book.publisher,
updated: book.updated, updated: book.updated.to_rfc3339(),
source: book.source.as_os_str().to_str().unwrap().to_string(),
} }
} }
} }
@@ -113,7 +115,8 @@ impl Into<book::Book> for Book {
tags: self.tags, tags: self.tags,
published_at: self.published_at, published_at: self.published_at,
publisher: self.publisher, publisher: self.publisher,
updated: self.updated, updated: chrono::DateTime::parse_from_rfc3339(&self.updated).unwrap_or_default().to_utc(),
source: self.source.into(),
} }
} }
} }

View File

@@ -1,9 +1,7 @@
use opds::demo; use opds::demo;
use opds::domain::repository::{AuthorFilter, Repository};
use opds::domain::repository::BookFilter; use opds::domain::repository::BookFilter;
use opds::domain::repository::{AuthorFilter};
use quick_xml::se::to_string as to_xml_string; use quick_xml::se::to_string as to_xml_string;
use std::thread::sleep;
use std::time::Duration;
fn main() { fn main() {
let app = demo(); let app = demo();
@@ -13,7 +11,7 @@ fn main() {
id: None, id: None,
name: None, //Some("rs".to_string()), name: None, //Some("rs".to_string()),
}), }),
title: Some("Сборник".to_string()), title: Some("пов".to_string()),
language: None, language: None,
description: None, description: None,
tags: None, tags: None,
@@ -27,20 +25,7 @@ fn main() {
if let Some(book) = res.entry.iter().next() { if let Some(book) = res.entry.iter().next() {
let book = app.repo.lock().unwrap().get(book.id.to_string().clone()); let book = app.repo.lock().unwrap().get(book.id.to_string().clone());
println!("{:?}", book.unwrap().author); println!("{:?}", book.clone().unwrap().author);
println!("{}", book.unwrap());
} }
// sleep(Duration::new(10, 0));
//
// let filter = BookFilter {
// author: None,
// title: Some("foo".to_string()),
// language: None,
// description: None,
// tags: None,
// published_at: None,
// publisher: None,
// updated: None,
// };
// println!("{}", to_xml_string(&app.books.books_feed(filter)).unwrap());
} }