From 5dc2cb112dba84291bbcf4b4175e0a746360b535 Mon Sep 17 00:00:00 2001 From: derfenix Date: Tue, 9 Sep 2025 21:25:31 +0300 Subject: [PATCH] doc: Regenerate AI docs for parsers, cleanup --- src/application/parsers/fb2.rs | 101 ++++++++++++++++++++++++++- src/application/parsers/mod.rs | 121 ++++++++++++++++++++++++++++++--- src/application/parsers/rs.rs | 38 ----------- src/application/parsers/zip.rs | 60 +++++++++++++++- src/main.rs | 20 +----- 5 files changed, 270 insertions(+), 70 deletions(-) diff --git a/src/application/parsers/fb2.rs b/src/application/parsers/fb2.rs index a92887a..ce0d4b1 100644 --- a/src/application/parsers/fb2.rs +++ b/src/application/parsers/fb2.rs @@ -3,16 +3,110 @@ use crate::domain::book::Book; use quick_xml::events::Event; use quick_xml::Reader; use std::fs::File; -use std::io::{BufReader, Read, BufRead}; +use std::io::{BufRead, BufReader}; use std::path::Path; +/// Parses the content of a file at the given path to extract a list of `Book` objects. +/// +/// # Arguments +/// * `path` - A reference to a `Path` representing the location of the file to be parsed. +/// +/// # Returns +/// * `Ok(Vec)` - A vector containing the list of `Book` objects on successful parsing. +/// * `Err(String)` - A string containing an error message if the file could not be opened or parsing failed. +/// +/// # Errors +/// This function returns an error in the following cases: +/// * If the file at the specified path cannot be opened, the corresponding IO error is converted to a string and returned. +/// * If the parsing logic inside `parse_with_reader` fails, the returned error from that function is propagated. +/// +/// # Example +/// ```ignore +/// use std::path::Path; +/// +/// let path = Path::new("books.txt"); +/// let result = parse(path); +/// +/// match result { +/// Ok(books) => println!("Parsed {} books", books.len()), +/// Err(err) => eprintln!("Error parsing books: {}", err), +/// } +/// ``` pub fn parse(path: &Path) -> Result, String> { let file = File::open(path).map_err(|e| e.to_string())?; let reader = BufReader::new(file); parse_with_reader(Box::new(reader), path) } - +/// Parses an XML document using a buffered reader and extracts book information. +/// +/// This function reads an XML document containing book details and maps it to a `Vec`. +/// It supports parsing various elements such as title, language, keywords, author details, +/// publisher, description, and publication year. Errors during parsing are propagated as strings. +/// +/// # Type Parameters +/// - `R`: A type that implements the [`BufRead`](std::io::BufRead) trait for buffered input. +/// +/// # Arguments +/// - `reader`: A buffered reader instance (e.g., [`std::io::BufReader`]) to read the XML content. +/// - `path`: A reference to a [`Path`](std::path::Path) representing the source file or data for the book. +/// +/// # Returns +/// - `Ok(Vec)` if parsing was successful, containing a vector of parsed `Book` results. +/// - `Err(String)` if an error occurred during parsing, containing a string description of the error. +/// +/// # XML Parsing Details +/// The following XML tags are processed: +/// - ``: Extracted as the book's title. +/// - ``: Extracted as the book's language. +/// - ``: Split by commas into individual tags for the book. +/// - ``: Extracted into a multiline description for the book. +/// - ``: Processes child elements such as ``, ``, ``, +/// and `` to construct an author's full details. +/// - ``: Extracted as the book's publication year. +/// - ``: Extracted as the book's publisher. +/// +/// # Behavior +/// - Multiple authors are supported through repeated `` tags. +/// - Whitespace is trimmed from all parsed text. +/// - If no content is available for certain fields (e.g., ``), they may remain `None` +/// or their equivalent default. +/// +/// # Errors +/// This function returns an error in the following cases: +/// - The XML data contains malformed or invalid content. +/// - Unexpected EOF is encountered during XML parsing. +/// - Any other IO or XML parsing errors occur. +/// +/// # Example +/// ```ignore +/// use std::fs::File; +/// use std::io::BufReader; +/// use std::path::Path; +/// +/// let file = File::open("books.xml").expect("Failed to open file"); +/// let reader = BufReader::new(file); +/// let path = Path::new("books.xml"); +/// +/// let result = parse_with_reader(reader, &path); +/// match result { +/// Ok(books) => { +/// for book in books { +/// println!("Book title: {}", book.title); +/// } +/// } +/// Err(e) => eprintln!("Error parsing XML: {}", e), +/// } +/// ``` +/// +/// # Dependencies +/// This function relies on an external XML parsing library capable of handling streaming XML, +/// such as `quick-xml`. The `Book` and `author::Author` structs, along with their methods (e.g., `new`), +/// must be defined elsewhere in the codebase. +/// +/// # Notes +/// - The function assumes that the XML tags match the expected structure. Unrecognized tags are ignored. +/// - It is assumed that the user will extend support for future XML element types as needed. pub fn parse_with_reader(reader: R, path: &Path) -> Result, String> { let mut reader = Reader::from_reader(reader); let mut buf = Vec::new(); @@ -115,7 +209,8 @@ pub fn parse_with_reader(reader: R, path: &Path) -> Result } b"year" => { if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) { - book.published_at = t.xml_content().map_err(|e| e.to_string())?.into_owned(); + book.published_at = + t.xml_content().map_err(|e| e.to_string())?.into_owned(); } } b"publisher" => { diff --git a/src/application/parsers/mod.rs b/src/application/parsers/mod.rs index 77b51aa..94230ac 100644 --- a/src/application/parsers/mod.rs +++ b/src/application/parsers/mod.rs @@ -1,7 +1,7 @@ use crate::domain::book::Book; use std::fmt; use std::fs::File; -use std::io::{BufReader, BufRead}; +use std::io::BufRead; use std::path::PathBuf; mod fb2; @@ -23,28 +23,129 @@ impl fmt::Display for Error { } } +/// Represents a source file or location that can be read from. +/// +/// The `Source` struct encapsulates information about a file or source location, including its +/// path and an optional reader for reading the source's contents. This allows for flexibility +/// in creating `Source` instances where the reading mechanism may be deferred or not immediately available. +/// +/// # Fields +/// +/// * `path` - A `PathBuf` representing the file system path to the source. This path must be valid. +/// * `reader` - An optional boxed dynamic trait object (`Option>`) that represents +/// a buffered reader capable of reading the contents of the source. If `None`, no reader +/// is currently defined or available. +/// +/// # Examples +/// +/// ```ignore +/// use std::fs::File; +/// use std::io::{BufReader, BufRead}; +/// use std::path::PathBuf; +/// +/// let path = PathBuf::from("example.txt"); +/// let file = File::open(&path).expect("Failed to open file"); +/// let reader = BufReader::new(file); +/// +/// let source = Source { +/// path, +/// reader: Some(Box::new(reader)), +/// }; +/// ``` +/// +/// In the above example, a `Source` instance is created with a valid path and a buffered reader. +/// +/// ```ignore +/// use std::path::PathBuf; +/// +/// let source = Source { +/// path: PathBuf::from("example.txt"), +/// reader: None, +/// }; +/// ``` +/// +/// In this example, a `Source` instance is created with a path but no reader. This might be useful +/// for cases where reading is deferred to a later point in time. pub struct Source { pub path: PathBuf, pub reader: Option>, // Сделал reader опциональным } +/// Parses a `Source` to extract a vector of `Book` objects. +/// +/// This function handles different file types based on the file's extension. +/// Supported file types and their parsing strategies include: +/// - `.rs`: Uses the `rs::parse` function to handle Rust source files. +/// - `.fb2`: Handles FictionBook 2 (FB2) files. This supports: +/// - Using a provided `reader` (if `source.reader` is given). +/// - Opening and parsing directly from the file otherwise. +/// - `.zip`: Handles ZIP archives. Always opens the file itself as ZIP parsing +/// requires the file to implement the `Seek` trait. +/// +/// If the file extension is unsupported or undefined, the function returns +/// an `Error::NotSupported`. +/// +/// # Parameters +/// +/// - `source`: A `Source` object containing the file path and an optional `reader` +/// for parsing. +/// +/// # Returns +/// +/// - `Ok(Vec)`: If the parsing is successful, returns a vector of `Book` objects. +/// - `Err(Error)`: If an error occurs during parsing or if the file type is unsupported. +/// +/// # Errors +/// +/// - Returns `Error::ParseError` if there is an issue reading or parsing the file. +/// - Returns `Error::NotSupported` if the file type is unsupported. +/// +/// # Examples +/// +/// ```ignore +/// let source = Source { +/// path: PathBuf::from("example.fb2"), +/// reader: None, +/// }; +/// let result = parse(source); +/// match result { +/// Ok(books) => println!("Parsed successfully: {:?}", books), +/// Err(error) => eprintln!("Failed to parse: {:?}", error), +/// } +/// ``` +/// +/// # Notes +/// +/// - For `.fb2` files, if a `reader` is provided in the `Source` object, it will +/// be used for parsing. Otherwise, the function will open the file and parse it. +/// - `.zip` files require the file to support the `Seek` trait and will always +/// be opened directly from the file system. +/// +/// # File Types +/// +/// - `.rs`: Rust source files. +/// - `.fb2`: FictionBook 2 files. +/// - `.zip`: ZIP archives. +/// +/// # See Also +/// +/// - [`rs::parse`](#) +/// - [`fb2::parse`](#) +/// - [`fb2::parse_with_reader`](#) +/// - [`zip::parse`](#) pub fn parse(source: Source) -> Result, Error> { let path = &source.path; match path.extension().and_then(|s| s.to_str()) { Some("rs") => rs::parse(path).map_err(Error::ParseError), - Some("fb2") => { - // Если reader предоставлен, используем его, иначе открываем файл - match source.reader { - Some(reader) => fb2::parse_with_reader(reader, path).map_err(Error::ParseError), - None => fb2::parse(path).map_err(Error::ParseError), - } + Some("fb2") => match source.reader { + Some(reader) => fb2::parse_with_reader(reader, path).map_err(Error::ParseError), + None => fb2::parse(path).map_err(Error::ParseError), }, Some("zip") => { - // ZIP всегда открывает файлы сам, так как нужен Seek let file = File::open(path).map_err(|e| Error::ParseError(e.to_string()))?; - zip::parse_direct(file, path).map_err(Error::ParseError) - }, + zip::parse(file, path).map_err(Error::ParseError) + } Some(_) | None => Err(Error::NotSupported), } } diff --git a/src/application/parsers/rs.rs b/src/application/parsers/rs.rs index 49c5f7b..8735498 100644 --- a/src/application/parsers/rs.rs +++ b/src/application/parsers/rs.rs @@ -2,44 +2,6 @@ use crate::domain::author::Author; use crate::domain::book::Book; use std::path::PathBuf; -/// Parses a given file path into a vector containing a `Book` object. -/// -/// # Arguments -/// -/// * `path` - A reference to a `PathBuf` that represents the file path to be parsed. -/// -/// # Returns -/// -/// * `Result, String>` - -/// - On success, returns a `Vec` with a single `Book` object populated based on the input path. -/// - On failure, returns an error `String` describing the issue. -/// -/// The function performs the following steps: -/// -/// 1. Creates a new instance of `Book`. -/// 2. Sets the `title` of the `Book` to the string representation of the input path. -/// 3. Creates a new instance of `Author`. -/// 4. Sets the `first_name` of the `Author` to the string representation of the file extension of `path`. -/// 5. Pushes the `Author` into the `author` vector of the `Book`. -/// 6. Returns a `Vec` containing the newly created `Book`. -/// -/// # Panics -/// -/// The function will panic if the input path does not contain a file extension -/// (i.e., when `path.extension()` returns `None`). -/// -/// # Example -/// -/// ```ignore -/// use std::path::PathBuf; -/// -/// let path = PathBuf::from("example.txt"); -/// let books = parse(&path).unwrap(); -/// -/// assert_eq!(books.len(), 1); -/// assert_eq!(books[0].title, "example.txt"); -/// assert_eq!(books[0].author[0].first_name, "txt"); -/// ``` pub fn parse(path: &PathBuf) -> Result, String> { let mut book = Book::new(); diff --git a/src/application/parsers/zip.rs b/src/application/parsers/zip.rs index 14ec5ad..fd19e1d 100644 --- a/src/application/parsers/zip.rs +++ b/src/application/parsers/zip.rs @@ -5,7 +5,65 @@ use std::io::{BufRead, Cursor, Read}; use std::path::Path; use zip::ZipArchive; -pub fn parse_direct( +/// Parses a ZIP archive to extract and process book data. +/// +/// This function reads a given ZIP archive, processes each file inside, and attempts to parse +/// them into a vector of `Book` objects. Each file in the archive is expected to be in a format +/// compatible with the `parse_source` function. +/// +/// # Type Parameters +/// - `R`: A type that implements both [`Read`](std::io::Read) and [`Seek`](std::io::Seek), which allows +/// reading and seeking operations on the input file. +/// +/// # Arguments +/// - `reader`: A reader implementing `Read` and `Seek`, used to access the ZIP archive. +/// - `path`: A reference to a [`Path`](std::path::Path) representing the file system path to the ZIP archive. +/// +/// # Returns +/// - `Ok(Vec)`: If parsing is successful, returns a vector of `Book` objects extracted from +/// the ZIP archive. +/// - `Err(String)`: If an error occurs, returns a descriptive error message as a `String`. +/// +/// # Errors +/// The function may return an error in the following cases: +/// - If the ZIP archive cannot be opened. +/// - If the function encounters issues reading from the ZIP archive or its files. +/// - If a file within the archive cannot be parsed into `Book` objects. +/// +/// # Example +/// ```ignore +/// use std::fs::File; +/// use std::path::Path; +/// use your_crate::parse_direct; +/// +/// let file = File::open("test_books.zip").expect("Failed to open file"); +/// let path = Path::new("test_books.zip"); +/// +/// match parse_direct(file, &path) { +/// Ok(books) => println!("Parsed {} books", books.len()), +/// Err(e) => eprintln!("Error parsing archive: {}", e), +/// } +/// ``` +/// +/// # Implementation Details +/// - The function processes each file inside the archive by reopening the archive for each iteration. This +/// ensures the `ZipArchive` does not consume other files during iteration. +/// - File contents are read into memory as raw bytes and wrapped in a [`BufReader`](std::io::BufRead) for further processing. +/// - Each file's path is dynamically constructed to include a placeholder `#` directory and its original name. +/// - The main parsing is delegated to the `parse_source` function, which returns either parsed books or an error. +/// +/// # Notes +/// - As the ZIP archive is re-opened for each file, the performance may be impacted for large archives. +/// - The `parse_source` function is expected to be defined elsewhere in the codebase to handle the specific parsing logic. +/// +/// # Dependencies +/// - This function depends on the `zip` crate to handle ZIP file operations and a custom `Book` and `Source` structure +/// for processing individual entries. +/// +/// # See Also +/// - [`ZipArchive`](zip::read::ZipArchive): Used to interact with the ZIP archive. +/// - `parse_source`: Required function that processes individual entries to extract `Book` data. +pub fn parse( reader: R, path: &Path, ) -> Result, String> { diff --git a/src/main.rs b/src/main.rs index c6a7f4b..53870e4 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,15 +1,13 @@ use opds::demo; -use opds::domain::repository::{AuthorFilter, Repository}; use opds::domain::repository::BookFilter; +use opds::domain::repository::{AuthorFilter}; use quick_xml::se::to_string as to_xml_string; -use std::thread::sleep; -use std::time::Duration; fn main() { let app = demo(); let filter = BookFilter { - author: Some(AuthorFilter{ + author: Some(AuthorFilter { id: None, name: None, //Some("rs".to_string()), }), @@ -30,18 +28,4 @@ fn main() { println!("{:?}", book.clone().unwrap().author); println!("{}", book.unwrap()); } - - // sleep(Duration::new(10, 0)); - // - // let filter = BookFilter { - // author: None, - // title: Some("foo".to_string()), - // language: None, - // description: None, - // tags: None, - // published_at: None, - // publisher: None, - // updated: None, - // }; - // println!("{}", to_xml_string(&app.books.books_feed(filter)).unwrap()); }