Compare commits
4 Commits
18fa38120b
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
ec8cc12bf8
|
|||
|
4a7ed0974c
|
|||
|
5dc2cb112d
|
|||
|
00dc0e39b9
|
@@ -11,6 +11,7 @@ use crate::domain::book::Book;
|
|||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
use std::fs;
|
use std::fs;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use crate::application::parsers::Source;
|
||||||
|
|
||||||
pub struct Loader {
|
pub struct Loader {
|
||||||
root: PathBuf,
|
root: PathBuf,
|
||||||
@@ -76,7 +77,7 @@ impl Iterator for LoaderIter {
|
|||||||
|
|
||||||
impl LoaderIter {
|
impl LoaderIter {
|
||||||
fn parse_path(path: &PathBuf) -> Option<Vec<Book>> {
|
fn parse_path(path: &PathBuf) -> Option<Vec<Book>> {
|
||||||
match parsers::parse(&path) {
|
match parsers::parse(Source{ path: (*path).clone(), reader: None}) {
|
||||||
Ok(books) => return Some(books),
|
Ok(books) => return Some(books),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
match err {
|
match err {
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ use std::collections::VecDeque;
|
|||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
use crate::application::parsers::Source;
|
||||||
|
|
||||||
const BUFFER_SIZE: usize = 4096;
|
const BUFFER_SIZE: usize = 4096;
|
||||||
|
|
||||||
@@ -72,7 +73,7 @@ impl<'a> LoaderIter<'a> {
|
|||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
match parsers::parse(&path) {
|
match parsers::parse(Source{path: path.clone(), reader: None}) {
|
||||||
Ok(books) => {
|
Ok(books) => {
|
||||||
for book in &books {
|
for book in &books {
|
||||||
println!("{}", book);
|
println!("{}", book);
|
||||||
|
|||||||
@@ -3,82 +3,112 @@ use crate::domain::book::Book;
|
|||||||
use quick_xml::events::Event;
|
use quick_xml::events::Event;
|
||||||
use quick_xml::Reader;
|
use quick_xml::Reader;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
use std::io::{BufRead, BufReader};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
/// Parses an XML file located at the given path and extracts information about books.
|
/// Parses the content of a file at the given path to extract a list of `Book` objects.
|
||||||
///
|
|
||||||
/// This function processes the XML structure using a streaming XML reader to extract details
|
|
||||||
/// about books, including:
|
|
||||||
/// - Title
|
|
||||||
/// - Language
|
|
||||||
/// - Keywords
|
|
||||||
/// - Authors (including optional details like first name, last name, middle name, and nickname)
|
|
||||||
/// - Publication year
|
|
||||||
/// - Publisher
|
|
||||||
/// - Description
|
|
||||||
///
|
///
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
/// * `path` - A reference to a `Path` representing the location of the file to be parsed.
|
||||||
/// * `path` - A reference to the file path (`&Path`) of the XML file to parse.
|
|
||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
/// * `Ok(Vec<Book>)` - A vector containing the list of `Book` objects on successful parsing.
|
||||||
/// Returns a `Result` where:
|
/// * `Err(String)` - A string containing an error message if the file could not be opened or parsing failed.
|
||||||
/// - `Ok(Vec<Book>)` contains a vector of `Book` objects constructed from the parsed XML.
|
|
||||||
/// - `Err(String)` contains an error message if the parsing fails at any stage.
|
|
||||||
///
|
///
|
||||||
/// # Errors
|
/// # Errors
|
||||||
///
|
/// This function returns an error in the following cases:
|
||||||
/// Returns an error in the following scenarios:
|
/// * If the file at the specified path cannot be opened, the corresponding IO error is converted to a string and returned.
|
||||||
/// - Unable to open the file specified by `path`.
|
/// * If the parsing logic inside `parse_with_reader` fails, the returned error from that function is propagated.
|
||||||
/// - Malformed XML data in the file.
|
|
||||||
/// - Issues during data extraction, such as reading incomplete or invalid values.
|
|
||||||
///
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
///
|
|
||||||
/// ```ignore
|
/// ```ignore
|
||||||
/// use std::path::Path;
|
/// use std::path::Path;
|
||||||
|
///
|
||||||
|
/// let path = Path::new("books.txt");
|
||||||
|
/// let result = parse(path);
|
||||||
|
///
|
||||||
|
/// match result {
|
||||||
|
/// Ok(books) => println!("Parsed {} books", books.len()),
|
||||||
|
/// Err(err) => eprintln!("Error parsing books: {}", err),
|
||||||
|
/// }
|
||||||
|
/// ```
|
||||||
|
pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
|
||||||
|
let file = File::open(path).map_err(|e| e.to_string())?;
|
||||||
|
let reader = BufReader::new(file);
|
||||||
|
parse_with_reader(Box::new(reader), path)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses an XML document using a buffered reader and extracts book information.
|
||||||
|
///
|
||||||
|
/// This function reads an XML document containing book details and maps it to a `Vec<Book>`.
|
||||||
|
/// It supports parsing various elements such as title, language, keywords, author details,
|
||||||
|
/// publisher, description, and publication year. Errors during parsing are propagated as strings.
|
||||||
|
///
|
||||||
|
/// # Type Parameters
|
||||||
|
/// - `R`: A type that implements the [`BufRead`](std::io::BufRead) trait for buffered input.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// - `reader`: A buffered reader instance (e.g., [`std::io::BufReader`]) to read the XML content.
|
||||||
|
/// - `path`: A reference to a [`Path`](std::path::Path) representing the source file or data for the book.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
/// - `Ok(Vec<Book>)` if parsing was successful, containing a vector of parsed `Book` results.
|
||||||
|
/// - `Err(String)` if an error occurred during parsing, containing a string description of the error.
|
||||||
|
///
|
||||||
|
/// # XML Parsing Details
|
||||||
|
/// The following XML tags are processed:
|
||||||
|
/// - `<book-title>`: Extracted as the book's title.
|
||||||
|
/// - `<lang>`: Extracted as the book's language.
|
||||||
|
/// - `<keywords>`: Split by commas into individual tags for the book.
|
||||||
|
/// - `<annotation>`: Extracted into a multiline description for the book.
|
||||||
|
/// - `<author>`: Processes child elements such as `<first-name>`, `<last-name>`, `<middle-name>`,
|
||||||
|
/// and `<nickname>` to construct an author's full details.
|
||||||
|
/// - `<year>`: Extracted as the book's publication year.
|
||||||
|
/// - `<publisher>`: Extracted as the book's publisher.
|
||||||
|
///
|
||||||
|
/// # Behavior
|
||||||
|
/// - Multiple authors are supported through repeated `<author>` tags.
|
||||||
|
/// - Whitespace is trimmed from all parsed text.
|
||||||
|
/// - If no content is available for certain fields (e.g., `<last-name>`), they may remain `None`
|
||||||
|
/// or their equivalent default.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
/// This function returns an error in the following cases:
|
||||||
|
/// - The XML data contains malformed or invalid content.
|
||||||
|
/// - Unexpected EOF is encountered during XML parsing.
|
||||||
|
/// - Any other IO or XML parsing errors occur.
|
||||||
|
///
|
||||||
|
/// # Example
|
||||||
|
/// ```ignore
|
||||||
|
/// use std::fs::File;
|
||||||
|
/// use std::io::BufReader;
|
||||||
|
/// use std::path::Path;
|
||||||
|
///
|
||||||
|
/// let file = File::open("books.xml").expect("Failed to open file");
|
||||||
|
/// let reader = BufReader::new(file);
|
||||||
/// let path = Path::new("books.xml");
|
/// let path = Path::new("books.xml");
|
||||||
/// match parse(&path) {
|
///
|
||||||
|
/// let result = parse_with_reader(reader, &path);
|
||||||
|
/// match result {
|
||||||
/// Ok(books) => {
|
/// Ok(books) => {
|
||||||
/// for book in books {
|
/// for book in books {
|
||||||
/// println!("Book Title: {}", book.title);
|
/// println!("Book title: {}", book.title);
|
||||||
/// }
|
/// }
|
||||||
/// },
|
/// }
|
||||||
/// Err(err) => eprintln!("Failed to parse XML file: {}", err),
|
/// Err(e) => eprintln!("Error parsing XML: {}", e),
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// # XML Structure
|
/// # Dependencies
|
||||||
///
|
/// This function relies on an external XML parsing library capable of handling streaming XML,
|
||||||
/// The XML should follow a specific schema with the following relevant elements:
|
/// such as `quick-xml`. The `Book` and `author::Author` structs, along with their methods (e.g., `new`),
|
||||||
/// - `<book-title>`: Title of the book.
|
/// must be defined elsewhere in the codebase.
|
||||||
/// - `<lang>`: Language of the book.
|
|
||||||
/// - `<keywords>`: A comma-separated list of keywords/tags.
|
|
||||||
/// - `<author>`: Contains subfields `<first-name>`, `<last-name>`, `<middle-name>`, or `<nickname>`.
|
|
||||||
/// - `<year>`: Year of publication.
|
|
||||||
/// - `<publisher>`: Publisher's name.
|
|
||||||
/// - `<annotation>`: Description or annotation of the book.
|
|
||||||
///
|
///
|
||||||
/// # Notes
|
/// # Notes
|
||||||
///
|
/// - The function assumes that the XML tags match the expected structure. Unrecognized tags are ignored.
|
||||||
/// - Author data is flexible; if a nickname exists, it will override other name details.
|
/// - It is assumed that the user will extend support for future XML element types as needed.
|
||||||
/// - The resulting `Vec<Book>` contains just one book object, as indicated in the implementation.
|
pub fn parse_with_reader<R: BufRead>(reader: R, path: &Path) -> Result<Vec<Book>, String> {
|
||||||
///
|
let mut reader = Reader::from_reader(reader);
|
||||||
/// # Dependencies
|
|
||||||
///
|
|
||||||
/// This function depends on the following crates:
|
|
||||||
/// - `quick-xml`: For fast XML parsing.
|
|
||||||
/// - `uuid`: To generate a unique identifier for each book.
|
|
||||||
/// - `chrono`: To serialize the current timestamp as an RFC3339 string.
|
|
||||||
///
|
|
||||||
/// # See Also
|
|
||||||
///
|
|
||||||
/// `Book` structure, which represents the parsed data for an individual book.
|
|
||||||
pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
|
|
||||||
let file = File::open(path).map_err(|e| e.to_string())?;
|
|
||||||
let mut reader = Reader::from_reader(BufReader::new(file));
|
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
|
|
||||||
let mut in_title = false;
|
let mut in_title = false;
|
||||||
@@ -87,6 +117,7 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
|
|||||||
let mut in_description = false;
|
let mut in_description = false;
|
||||||
|
|
||||||
let mut book = Book::new();
|
let mut book = Book::new();
|
||||||
|
book.source = path.into();
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
match reader.read_event_into(&mut buf) {
|
match reader.read_event_into(&mut buf) {
|
||||||
@@ -178,7 +209,8 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
|
|||||||
}
|
}
|
||||||
b"year" => {
|
b"year" => {
|
||||||
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
|
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
|
||||||
book.published_at = t.xml_content().map_err(|e| e.to_string())?.into_owned();
|
book.published_at =
|
||||||
|
t.xml_content().map_err(|e| e.to_string())?.into_owned();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
b"publisher" => {
|
b"publisher" => {
|
||||||
@@ -223,5 +255,7 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
|
|||||||
buf.clear();
|
buf.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
book.author.dedup_by(|a, b| a.uniq_id() == b.uniq_id());
|
||||||
|
|
||||||
Ok(vec![book])
|
Ok(vec![book])
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,17 +1,13 @@
|
|||||||
use std::fmt;
|
|
||||||
use crate::domain::book::Book;
|
use crate::domain::book::Book;
|
||||||
|
use std::fmt;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::BufRead;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
mod rs;
|
|
||||||
mod fb2;
|
mod fb2;
|
||||||
|
mod rs;
|
||||||
mod zip;
|
mod zip;
|
||||||
|
|
||||||
|
|
||||||
/// Error enumeration representing possible errors that can occur when parsing files.
|
|
||||||
///
|
|
||||||
/// This enumeration has the following variants:
|
|
||||||
/// - `NotSupported`: Indicates that the file format or extension is not supported.
|
|
||||||
/// - `ParseError`: Contains a `String` representing the error message when a parsing process fails.
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum Error {
|
pub enum Error {
|
||||||
NotSupported,
|
NotSupported,
|
||||||
@@ -27,53 +23,129 @@ impl fmt::Display for Error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parses a file at the given path and attempts to convert its contents into a vector of `Book` objects.
|
/// Represents a source file or location that can be read from.
|
||||||
///
|
///
|
||||||
/// This function determines the file type based on its extension and delegates the parsing duties
|
/// The `Source` struct encapsulates information about a file or source location, including its
|
||||||
/// to the appropriate module. Supported file extensions are:
|
/// path and an optional reader for reading the source's contents. This allows for flexibility
|
||||||
/// - `.rs`: Processed by the `rs` module.
|
/// in creating `Source` instances where the reading mechanism may be deferred or not immediately available.
|
||||||
/// - `.fb2`: Processed by the `fb2` module.
|
|
||||||
/// - `.zip`: Processed by the `zip` module.
|
|
||||||
///
|
///
|
||||||
/// If the file's extension is unsupported or missing, this function returns a `NotSupported` error.
|
/// # Fields
|
||||||
///
|
///
|
||||||
/// # Arguments
|
/// * `path` - A `PathBuf` representing the file system path to the source. This path must be valid.
|
||||||
///
|
/// * `reader` - An optional boxed dynamic trait object (`Option<Box<dyn BufRead>>`) that represents
|
||||||
/// * `path` - A reference to a `PathBuf` that represents the file path to be parsed.
|
/// a buffered reader capable of reading the contents of the source. If `None`, no reader
|
||||||
///
|
/// is currently defined or available.
|
||||||
/// # Returns
|
|
||||||
///
|
|
||||||
/// * `Ok(Vec<Book>)` - A vector of `Book` objects if the file was successfully parsed.
|
|
||||||
/// * `Err(Error)` - An error if the file could not be parsed, the parsing process encountered
|
|
||||||
/// an issue, or the file extension is not supported.
|
|
||||||
///
|
|
||||||
/// # Errors
|
|
||||||
///
|
|
||||||
/// - `Error::ParseError` - If the file parsing fails.
|
|
||||||
/// - `Error::NotSupported` - If the file's extension is unsupported or missing.
|
|
||||||
///
|
///
|
||||||
/// # Examples
|
/// # Examples
|
||||||
///
|
///
|
||||||
/// ```ignore
|
/// ```ignore
|
||||||
|
/// use std::fs::File;
|
||||||
|
/// use std::io::{BufReader, BufRead};
|
||||||
/// use std::path::PathBuf;
|
/// use std::path::PathBuf;
|
||||||
///
|
///
|
||||||
/// let path = PathBuf::from("example.rs");
|
/// let path = PathBuf::from("example.txt");
|
||||||
/// let books = parse(&path);
|
/// let file = File::open(&path).expect("Failed to open file");
|
||||||
/// match books {
|
/// let reader = BufReader::new(file);
|
||||||
/// Ok(book_list) => println!("Parsed {} books.", book_list.len()),
|
///
|
||||||
/// Err(e) => println!("Failed to parse file: {:?}", e),
|
/// let source = Source {
|
||||||
|
/// path,
|
||||||
|
/// reader: Some(Box::new(reader)),
|
||||||
|
/// };
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// In the above example, a `Source` instance is created with a valid path and a buffered reader.
|
||||||
|
///
|
||||||
|
/// ```ignore
|
||||||
|
/// use std::path::PathBuf;
|
||||||
|
///
|
||||||
|
/// let source = Source {
|
||||||
|
/// path: PathBuf::from("example.txt"),
|
||||||
|
/// reader: None,
|
||||||
|
/// };
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// In this example, a `Source` instance is created with a path but no reader. This might be useful
|
||||||
|
/// for cases where reading is deferred to a later point in time.
|
||||||
|
pub struct Source {
|
||||||
|
pub path: PathBuf,
|
||||||
|
pub reader: Option<Box<dyn BufRead>>, // Сделал reader опциональным
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parses a `Source` to extract a vector of `Book` objects.
|
||||||
|
///
|
||||||
|
/// This function handles different file types based on the file's extension.
|
||||||
|
/// Supported file types and their parsing strategies include:
|
||||||
|
/// - `.rs`: Uses the `rs::parse` function to handle Rust source files.
|
||||||
|
/// - `.fb2`: Handles FictionBook 2 (FB2) files. This supports:
|
||||||
|
/// - Using a provided `reader` (if `source.reader` is given).
|
||||||
|
/// - Opening and parsing directly from the file otherwise.
|
||||||
|
/// - `.zip`: Handles ZIP archives. Always opens the file itself as ZIP parsing
|
||||||
|
/// requires the file to implement the `Seek` trait.
|
||||||
|
///
|
||||||
|
/// If the file extension is unsupported or undefined, the function returns
|
||||||
|
/// an `Error::NotSupported`.
|
||||||
|
///
|
||||||
|
/// # Parameters
|
||||||
|
///
|
||||||
|
/// - `source`: A `Source` object containing the file path and an optional `reader`
|
||||||
|
/// for parsing.
|
||||||
|
///
|
||||||
|
/// # Returns
|
||||||
|
///
|
||||||
|
/// - `Ok(Vec<Book>)`: If the parsing is successful, returns a vector of `Book` objects.
|
||||||
|
/// - `Err(Error)`: If an error occurs during parsing or if the file type is unsupported.
|
||||||
|
///
|
||||||
|
/// # Errors
|
||||||
|
///
|
||||||
|
/// - Returns `Error::ParseError` if there is an issue reading or parsing the file.
|
||||||
|
/// - Returns `Error::NotSupported` if the file type is unsupported.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```ignore
|
||||||
|
/// let source = Source {
|
||||||
|
/// path: PathBuf::from("example.fb2"),
|
||||||
|
/// reader: None,
|
||||||
|
/// };
|
||||||
|
/// let result = parse(source);
|
||||||
|
/// match result {
|
||||||
|
/// Ok(books) => println!("Parsed successfully: {:?}", books),
|
||||||
|
/// Err(error) => eprintln!("Failed to parse: {:?}", error),
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// # Notes
|
/// # Notes
|
||||||
///
|
///
|
||||||
/// Ensure that the appropriate parsers (`rs`, `fb2`, `zip`) are properly implemented
|
/// - For `.fb2` files, if a `reader` is provided in the `Source` object, it will
|
||||||
/// and handle all required logic for their respective file types to avoid unexpected errors.
|
/// be used for parsing. Otherwise, the function will open the file and parse it.
|
||||||
pub fn parse(path: &PathBuf) -> Result<Vec<Book>, Error> {
|
/// - `.zip` files require the file to support the `Seek` trait and will always
|
||||||
|
/// be opened directly from the file system.
|
||||||
|
///
|
||||||
|
/// # File Types
|
||||||
|
///
|
||||||
|
/// - `.rs`: Rust source files.
|
||||||
|
/// - `.fb2`: FictionBook 2 files.
|
||||||
|
/// - `.zip`: ZIP archives.
|
||||||
|
///
|
||||||
|
/// # See Also
|
||||||
|
///
|
||||||
|
/// - [`rs::parse`](#)
|
||||||
|
/// - [`fb2::parse`](#)
|
||||||
|
/// - [`fb2::parse_with_reader`](#)
|
||||||
|
/// - [`zip::parse`](#)
|
||||||
|
pub fn parse(source: Source) -> Result<Vec<Book>, Error> {
|
||||||
|
let path = &source.path;
|
||||||
|
|
||||||
match path.extension().and_then(|s| s.to_str()) {
|
match path.extension().and_then(|s| s.to_str()) {
|
||||||
Some("rs") => rs::parse(path).map_err(Error::ParseError),
|
Some("rs") => rs::parse(path).map_err(Error::ParseError),
|
||||||
Some("fb2") => fb2::parse(path).map_err(Error::ParseError),
|
Some("fb2") => match source.reader {
|
||||||
Some("zip") => zip::parse(path).map_err(Error::ParseError),
|
Some(reader) => fb2::parse_with_reader(reader, path).map_err(Error::ParseError),
|
||||||
|
None => fb2::parse(path).map_err(Error::ParseError),
|
||||||
|
},
|
||||||
|
Some("zip") => {
|
||||||
|
let file = File::open(path).map_err(|e| Error::ParseError(e.to_string()))?;
|
||||||
|
zip::parse(file, path).map_err(Error::ParseError)
|
||||||
|
}
|
||||||
Some(_) | None => Err(Error::NotSupported),
|
Some(_) | None => Err(Error::NotSupported),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,44 +2,6 @@ use crate::domain::author::Author;
|
|||||||
use crate::domain::book::Book;
|
use crate::domain::book::Book;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
/// Parses a given file path into a vector containing a `Book` object.
|
|
||||||
///
|
|
||||||
/// # Arguments
|
|
||||||
///
|
|
||||||
/// * `path` - A reference to a `PathBuf` that represents the file path to be parsed.
|
|
||||||
///
|
|
||||||
/// # Returns
|
|
||||||
///
|
|
||||||
/// * `Result<Vec<Book>, String>` -
|
|
||||||
/// - On success, returns a `Vec<Book>` with a single `Book` object populated based on the input path.
|
|
||||||
/// - On failure, returns an error `String` describing the issue.
|
|
||||||
///
|
|
||||||
/// The function performs the following steps:
|
|
||||||
///
|
|
||||||
/// 1. Creates a new instance of `Book`.
|
|
||||||
/// 2. Sets the `title` of the `Book` to the string representation of the input path.
|
|
||||||
/// 3. Creates a new instance of `Author`.
|
|
||||||
/// 4. Sets the `first_name` of the `Author` to the string representation of the file extension of `path`.
|
|
||||||
/// 5. Pushes the `Author` into the `author` vector of the `Book`.
|
|
||||||
/// 6. Returns a `Vec<Book>` containing the newly created `Book`.
|
|
||||||
///
|
|
||||||
/// # Panics
|
|
||||||
///
|
|
||||||
/// The function will panic if the input path does not contain a file extension
|
|
||||||
/// (i.e., when `path.extension()` returns `None`).
|
|
||||||
///
|
|
||||||
/// # Example
|
|
||||||
///
|
|
||||||
/// ```ignore
|
|
||||||
/// use std::path::PathBuf;
|
|
||||||
///
|
|
||||||
/// let path = PathBuf::from("example.txt");
|
|
||||||
/// let books = parse(&path).unwrap();
|
|
||||||
///
|
|
||||||
/// assert_eq!(books.len(), 1);
|
|
||||||
/// assert_eq!(books[0].title, "example.txt");
|
|
||||||
/// assert_eq!(books[0].author[0].first_name, "txt");
|
|
||||||
/// ```
|
|
||||||
pub fn parse(path: &PathBuf) -> Result<Vec<Book>, String> {
|
pub fn parse(path: &PathBuf) -> Result<Vec<Book>, String> {
|
||||||
let mut book = Book::new();
|
let mut book = Book::new();
|
||||||
|
|
||||||
@@ -48,6 +10,7 @@ pub fn parse(path: &PathBuf) -> Result<Vec<Book>, String> {
|
|||||||
let mut author = Author::new();
|
let mut author = Author::new();
|
||||||
author.first_name = path.extension().unwrap().to_string_lossy().to_string();
|
author.first_name = path.extension().unwrap().to_string_lossy().to_string();
|
||||||
book.author.push(author);
|
book.author.push(author);
|
||||||
|
book.source = path.into();
|
||||||
|
|
||||||
return Ok(vec![ book]);
|
return Ok(vec![ book]);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,72 +1,102 @@
|
|||||||
use crate::application::parsers;
|
use crate::application::parsers::{parse as parse_source, Source};
|
||||||
use crate::domain::book::Book;
|
use crate::domain::book::Book;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::BufReader;
|
use std::io::{BufRead, Cursor, Read};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::Path;
|
||||||
use zip::ZipArchive;
|
use zip::ZipArchive;
|
||||||
|
|
||||||
|
/// Parses a ZIP archive to extract and process book data.
|
||||||
/// Parses a ZIP archive to extract a collection of `Book` objects.
|
|
||||||
///
|
///
|
||||||
/// This function takes a path to a ZIP archive file, reads its contents, and processes
|
/// This function reads a given ZIP archive, processes each file inside, and attempts to parse
|
||||||
/// each file within the archive to extract `Book` objects using a custom parser. If any
|
/// them into a vector of `Book` objects. Each file in the archive is expected to be in a format
|
||||||
/// errors occur during file access, archive extraction, or parsing, they are returned as
|
/// compatible with the `parse_source` function.
|
||||||
/// a `String`. On success, it returns a vector of `Book` objects contained in the archive.
|
///
|
||||||
|
/// # Type Parameters
|
||||||
|
/// - `R`: A type that implements both [`Read`](std::io::Read) and [`Seek`](std::io::Seek), which allows
|
||||||
|
/// reading and seeking operations on the input file.
|
||||||
///
|
///
|
||||||
/// # Arguments
|
/// # Arguments
|
||||||
///
|
/// - `reader`: A reader implementing `Read` and `Seek`, used to access the ZIP archive.
|
||||||
/// * `path` - A reference to a `Path` representing the file system path to the ZIP archive.
|
/// - `path`: A reference to a [`Path`](std::path::Path) representing the file system path to the ZIP archive.
|
||||||
///
|
///
|
||||||
/// # Returns
|
/// # Returns
|
||||||
///
|
/// - `Ok(Vec<Book>)`: If parsing is successful, returns a vector of `Book` objects extracted from
|
||||||
/// * `Ok(Vec<Book>)` - A vector containing the `Book` objects successfully parsed
|
/// the ZIP archive.
|
||||||
/// from the files in the archive.
|
/// - `Err(String)`: If an error occurs, returns a descriptive error message as a `String`.
|
||||||
/// * `Err(String)` - An error message if any step in opening the file, reading the archive,
|
|
||||||
/// or parsing the files fails.
|
|
||||||
///
|
///
|
||||||
/// # Errors
|
/// # Errors
|
||||||
///
|
/// The function may return an error in the following cases:
|
||||||
/// This function returns an error in the following cases:
|
/// - If the ZIP archive cannot be opened.
|
||||||
/// * If the ZIP file cannot be opened.
|
/// - If the function encounters issues reading from the ZIP archive or its files.
|
||||||
/// * If the ZIP archive cannot be read.
|
/// - If a file within the archive cannot be parsed into `Book` objects.
|
||||||
/// * If an individual file within the archive cannot be accessed.
|
|
||||||
/// * If the parsing of a file fails.
|
|
||||||
///
|
///
|
||||||
/// # Example
|
/// # Example
|
||||||
///
|
|
||||||
/// ```ignore
|
/// ```ignore
|
||||||
|
/// use std::fs::File;
|
||||||
/// use std::path::Path;
|
/// use std::path::Path;
|
||||||
/// use your_crate::parse;
|
/// use your_crate::parse_direct;
|
||||||
///
|
///
|
||||||
/// let path = Path::new("books_archive.zip");
|
/// let file = File::open("test_books.zip").expect("Failed to open file");
|
||||||
/// match parse(&path) {
|
/// let path = Path::new("test_books.zip");
|
||||||
/// Ok(books) => {
|
///
|
||||||
/// for book in books {
|
/// match parse_direct(file, &path) {
|
||||||
/// println!("Parsed book: {:?}", book);
|
/// Ok(books) => println!("Parsed {} books", books.len()),
|
||||||
/// }
|
/// Err(e) => eprintln!("Error parsing archive: {}", e),
|
||||||
/// }
|
|
||||||
/// Err(e) => eprintln!("Failed to parse books: {}", e),
|
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// # Dependencies
|
/// # Implementation Details
|
||||||
|
/// - The function processes each file inside the archive by reopening the archive for each iteration. This
|
||||||
|
/// ensures the `ZipArchive` does not consume other files during iteration.
|
||||||
|
/// - File contents are read into memory as raw bytes and wrapped in a [`BufReader`](std::io::BufRead) for further processing.
|
||||||
|
/// - Each file's path is dynamically constructed to include a placeholder `#` directory and its original name.
|
||||||
|
/// - The main parsing is delegated to the `parse_source` function, which returns either parsed books or an error.
|
||||||
///
|
///
|
||||||
/// This function relies on the `ZipArchive` for working with ZIP files and a `parsers`
|
/// # Notes
|
||||||
/// module for custom file parsing logic.
|
/// - As the ZIP archive is re-opened for each file, the performance may be impacted for large archives.
|
||||||
pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
|
/// - The `parse_source` function is expected to be defined elsewhere in the codebase to handle the specific parsing logic.
|
||||||
let file = File::open(path).map_err(|e| e.to_string())?;
|
///
|
||||||
let reader = BufReader::new(file);
|
/// # Dependencies
|
||||||
let mut archive = ZipArchive::new(reader).map_err(|e| e.to_string())?;
|
/// - This function depends on the `zip` crate to handle ZIP file operations and a custom `Book` and `Source` structure
|
||||||
|
/// for processing individual entries.
|
||||||
|
///
|
||||||
|
/// # See Also
|
||||||
|
/// - [`ZipArchive`](zip::read::ZipArchive): Used to interact with the ZIP archive.
|
||||||
|
/// - `parse_source`: Required function that processes individual entries to extract `Book` data.
|
||||||
|
pub fn parse<R: Read + std::io::Seek + 'static>(
|
||||||
|
reader: R,
|
||||||
|
path: &Path,
|
||||||
|
) -> Result<Vec<Book>, String> {
|
||||||
|
let archive = ZipArchive::new(reader).map_err(|e| e.to_string())?;
|
||||||
let mut books: Vec<Book> = Vec::new();
|
let mut books: Vec<Book> = Vec::new();
|
||||||
|
|
||||||
for i in 0..archive.len() {
|
// Нам нужно знать путь к архиву для открытия файлов внутри него
|
||||||
let file = archive.by_index(i).map_err(|e| e.to_string())?;
|
let archive_path = path.to_path_buf();
|
||||||
let name = file.name().to_string();
|
|
||||||
|
|
||||||
match parsers::parse(&PathBuf::from(name.to_lowercase())) {
|
for i in 0..archive.len() {
|
||||||
|
// Открываем архив заново для каждого файла, так как ZipArchive consumes files
|
||||||
|
let file = File::open(&archive_path).map_err(|e| e.to_string())?;
|
||||||
|
let mut archive = ZipArchive::new(file).map_err(|e| e.to_string())?;
|
||||||
|
let mut zip_file = archive.by_index(i).map_err(|e| e.to_string())?;
|
||||||
|
|
||||||
|
let name = zip_file.name().to_string();
|
||||||
|
let file_path = path.to_path_buf().join("#").join(&name);
|
||||||
|
|
||||||
|
// Читаем содержимое файла в память и оборачиваем в BufReader
|
||||||
|
let mut contents = Vec::new();
|
||||||
|
zip_file
|
||||||
|
.read_to_end(&mut contents)
|
||||||
|
.map_err(|e| e.to_string())?;
|
||||||
|
|
||||||
|
let file_reader: Box<dyn BufRead> = Box::new(Cursor::new(contents)); // Изменили на BufRead
|
||||||
|
let source = Source {
|
||||||
|
path: file_path,
|
||||||
|
reader: Some(file_reader),
|
||||||
|
};
|
||||||
|
|
||||||
|
match parse_source(source) {
|
||||||
Ok(new_books) => books.extend(new_books),
|
Ok(new_books) => books.extend(new_books),
|
||||||
Err(e) => return Err(e.to_string()),
|
Err(e) => return Err(format!("Error parsing {}: {}", name, e)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -68,13 +68,17 @@ impl Books {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn add_books_from_path(&mut self) {
|
pub fn add_books_from_path(&mut self) {
|
||||||
let iter = fs::Loader::new(PathBuf::from(&self.root));
|
let books = fs::Loader::new(PathBuf::from(&self.root))
|
||||||
|
.into_iter()
|
||||||
|
.map(|mut book| {match book.source.strip_prefix(&self.root) {
|
||||||
|
Ok(path) => book.source = path.to_path_buf(),
|
||||||
|
Err(err) => eprintln!("strip source prefix: {}", err)
|
||||||
|
}; book})
|
||||||
|
.collect();
|
||||||
|
|
||||||
match self.repo.lock() {
|
match self.repo.lock() {
|
||||||
Ok(mut repo) => {
|
Ok(mut repo) => {
|
||||||
for book in iter {
|
repo.bulk_add(books);
|
||||||
repo.add(book);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Err(err) => eprintln!("{}", err),
|
Err(err) => eprintln!("{}", err),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
use crate::domain::author;
|
use crate::domain::author;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
use std::path::PathBuf;
|
||||||
use chrono::{DateTime, Utc};
|
use chrono::{DateTime, Utc};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
@@ -14,6 +15,7 @@ pub struct Book {
|
|||||||
pub published_at: String,
|
pub published_at: String,
|
||||||
pub publisher: String,
|
pub publisher: String,
|
||||||
pub updated: DateTime<Utc>,
|
pub updated: DateTime<Utc>,
|
||||||
|
pub source: PathBuf
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Book {
|
impl Book {
|
||||||
@@ -28,6 +30,7 @@ impl Book {
|
|||||||
published_at: "".to_string(),
|
published_at: "".to_string(),
|
||||||
publisher: "".to_string(),
|
publisher: "".to_string(),
|
||||||
updated: chrono::Utc::now(),
|
updated: chrono::Utc::now(),
|
||||||
|
source: PathBuf::new()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -60,7 +63,7 @@ impl fmt::Display for Book {
|
|||||||
.collect::<Vec<_>>()
|
.collect::<Vec<_>>()
|
||||||
.join(";");
|
.join(";");
|
||||||
|
|
||||||
write!(f, "{} by {}", self.title, authors)
|
write!(f, "{} by {} at {}", self.title, authors, self.source.to_str().unwrap())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -74,6 +74,7 @@ struct Book {
|
|||||||
published_at: String,
|
published_at: String,
|
||||||
publisher: String,
|
publisher: String,
|
||||||
updated: String,
|
updated: String,
|
||||||
|
source: String,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<book::Book> for Book {
|
impl From<book::Book> for Book {
|
||||||
@@ -88,6 +89,7 @@ impl From<book::Book> for Book {
|
|||||||
published_at: book.published_at,
|
published_at: book.published_at,
|
||||||
publisher: book.publisher,
|
publisher: book.publisher,
|
||||||
updated: book.updated.to_rfc3339(),
|
updated: book.updated.to_rfc3339(),
|
||||||
|
source: book.source.as_os_str().to_str().unwrap().to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -114,6 +116,7 @@ impl Into<book::Book> for Book {
|
|||||||
published_at: self.published_at,
|
published_at: self.published_at,
|
||||||
publisher: self.publisher,
|
publisher: self.publisher,
|
||||||
updated: chrono::DateTime::parse_from_rfc3339(&self.updated).unwrap_or_default().to_utc(),
|
updated: chrono::DateTime::parse_from_rfc3339(&self.updated).unwrap_or_default().to_utc(),
|
||||||
|
source: self.source.into(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
23
src/main.rs
23
src/main.rs
@@ -1,9 +1,7 @@
|
|||||||
use opds::demo;
|
use opds::demo;
|
||||||
use opds::domain::repository::{AuthorFilter, Repository};
|
|
||||||
use opds::domain::repository::BookFilter;
|
use opds::domain::repository::BookFilter;
|
||||||
|
use opds::domain::repository::{AuthorFilter};
|
||||||
use quick_xml::se::to_string as to_xml_string;
|
use quick_xml::se::to_string as to_xml_string;
|
||||||
use std::thread::sleep;
|
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let app = demo();
|
let app = demo();
|
||||||
@@ -13,7 +11,7 @@ fn main() {
|
|||||||
id: None,
|
id: None,
|
||||||
name: None, //Some("rs".to_string()),
|
name: None, //Some("rs".to_string()),
|
||||||
}),
|
}),
|
||||||
title: Some("Сборник".to_string()),
|
title: Some("пов".to_string()),
|
||||||
language: None,
|
language: None,
|
||||||
description: None,
|
description: None,
|
||||||
tags: None,
|
tags: None,
|
||||||
@@ -27,20 +25,7 @@ fn main() {
|
|||||||
|
|
||||||
if let Some(book) = res.entry.iter().next() {
|
if let Some(book) = res.entry.iter().next() {
|
||||||
let book = app.repo.lock().unwrap().get(book.id.to_string().clone());
|
let book = app.repo.lock().unwrap().get(book.id.to_string().clone());
|
||||||
println!("{:?}", book.unwrap().author);
|
println!("{:?}", book.clone().unwrap().author);
|
||||||
|
println!("{}", book.unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
// sleep(Duration::new(10, 0));
|
|
||||||
//
|
|
||||||
// let filter = BookFilter {
|
|
||||||
// author: None,
|
|
||||||
// title: Some("foo".to_string()),
|
|
||||||
// language: None,
|
|
||||||
// description: None,
|
|
||||||
// tags: None,
|
|
||||||
// published_at: None,
|
|
||||||
// publisher: None,
|
|
||||||
// updated: None,
|
|
||||||
// };
|
|
||||||
// println!("{}", to_xml_string(&app.books.books_feed(filter)).unwrap());
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user