doc: Regenerate AI docs for parsers, cleanup

This commit is contained in:
2025-09-09 21:25:31 +03:00
parent 00dc0e39b9
commit 5dc2cb112d
5 changed files with 270 additions and 70 deletions

View File

@@ -3,16 +3,110 @@ use crate::domain::book::Book;
use quick_xml::events::Event;
use quick_xml::Reader;
use std::fs::File;
use std::io::{BufReader, Read, BufRead};
use std::io::{BufRead, BufReader};
use std::path::Path;
/// Parses the content of a file at the given path to extract a list of `Book` objects.
///
/// # Arguments
/// * `path` - A reference to a `Path` representing the location of the file to be parsed.
///
/// # Returns
/// * `Ok(Vec<Book>)` - A vector containing the list of `Book` objects on successful parsing.
/// * `Err(String)` - A string containing an error message if the file could not be opened or parsing failed.
///
/// # Errors
/// This function returns an error in the following cases:
/// * If the file at the specified path cannot be opened, the corresponding IO error is converted to a string and returned.
/// * If the parsing logic inside `parse_with_reader` fails, the returned error from that function is propagated.
///
/// # Example
/// ```ignore
/// use std::path::Path;
///
/// let path = Path::new("books.txt");
/// let result = parse(path);
///
/// match result {
/// Ok(books) => println!("Parsed {} books", books.len()),
/// Err(err) => eprintln!("Error parsing books: {}", err),
/// }
/// ```
pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
let file = File::open(path).map_err(|e| e.to_string())?;
let reader = BufReader::new(file);
parse_with_reader(Box::new(reader), path)
}
/// Parses an XML document using a buffered reader and extracts book information.
///
/// This function reads an XML document containing book details and maps it to a `Vec<Book>`.
/// It supports parsing various elements such as title, language, keywords, author details,
/// publisher, description, and publication year. Errors during parsing are propagated as strings.
///
/// # Type Parameters
/// - `R`: A type that implements the [`BufRead`](std::io::BufRead) trait for buffered input.
///
/// # Arguments
/// - `reader`: A buffered reader instance (e.g., [`std::io::BufReader`]) to read the XML content.
/// - `path`: A reference to a [`Path`](std::path::Path) representing the source file or data for the book.
///
/// # Returns
/// - `Ok(Vec<Book>)` if parsing was successful, containing a vector of parsed `Book` results.
/// - `Err(String)` if an error occurred during parsing, containing a string description of the error.
///
/// # XML Parsing Details
/// The following XML tags are processed:
/// - `<book-title>`: Extracted as the book's title.
/// - `<lang>`: Extracted as the book's language.
/// - `<keywords>`: Split by commas into individual tags for the book.
/// - `<annotation>`: Extracted into a multiline description for the book.
/// - `<author>`: Processes child elements such as `<first-name>`, `<last-name>`, `<middle-name>`,
/// and `<nickname>` to construct an author's full details.
/// - `<year>`: Extracted as the book's publication year.
/// - `<publisher>`: Extracted as the book's publisher.
///
/// # Behavior
/// - Multiple authors are supported through repeated `<author>` tags.
/// - Whitespace is trimmed from all parsed text.
/// - If no content is available for certain fields (e.g., `<last-name>`), they may remain `None`
/// or their equivalent default.
///
/// # Errors
/// This function returns an error in the following cases:
/// - The XML data contains malformed or invalid content.
/// - Unexpected EOF is encountered during XML parsing.
/// - Any other IO or XML parsing errors occur.
///
/// # Example
/// ```ignore
/// use std::fs::File;
/// use std::io::BufReader;
/// use std::path::Path;
///
/// let file = File::open("books.xml").expect("Failed to open file");
/// let reader = BufReader::new(file);
/// let path = Path::new("books.xml");
///
/// let result = parse_with_reader(reader, &path);
/// match result {
/// Ok(books) => {
/// for book in books {
/// println!("Book title: {}", book.title);
/// }
/// }
/// Err(e) => eprintln!("Error parsing XML: {}", e),
/// }
/// ```
///
/// # Dependencies
/// This function relies on an external XML parsing library capable of handling streaming XML,
/// such as `quick-xml`. The `Book` and `author::Author` structs, along with their methods (e.g., `new`),
/// must be defined elsewhere in the codebase.
///
/// # Notes
/// - The function assumes that the XML tags match the expected structure. Unrecognized tags are ignored.
/// - It is assumed that the user will extend support for future XML element types as needed.
pub fn parse_with_reader<R: BufRead>(reader: R, path: &Path) -> Result<Vec<Book>, String> {
let mut reader = Reader::from_reader(reader);
let mut buf = Vec::new();
@@ -115,7 +209,8 @@ pub fn parse_with_reader<R: BufRead>(reader: R, path: &Path) -> Result<Vec<Book>
}
b"year" => {
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
book.published_at = t.xml_content().map_err(|e| e.to_string())?.into_owned();
book.published_at =
t.xml_content().map_err(|e| e.to_string())?.into_owned();
}
}
b"publisher" => {

View File

@@ -1,7 +1,7 @@
use crate::domain::book::Book;
use std::fmt;
use std::fs::File;
use std::io::{BufReader, BufRead};
use std::io::BufRead;
use std::path::PathBuf;
mod fb2;
@@ -23,28 +23,129 @@ impl fmt::Display for Error {
}
}
/// Represents a source file or location that can be read from.
///
/// The `Source` struct encapsulates information about a file or source location, including its
/// path and an optional reader for reading the source's contents. This allows for flexibility
/// in creating `Source` instances where the reading mechanism may be deferred or not immediately available.
///
/// # Fields
///
/// * `path` - A `PathBuf` representing the file system path to the source. This path must be valid.
/// * `reader` - An optional boxed dynamic trait object (`Option<Box<dyn BufRead>>`) that represents
/// a buffered reader capable of reading the contents of the source. If `None`, no reader
/// is currently defined or available.
///
/// # Examples
///
/// ```ignore
/// use std::fs::File;
/// use std::io::{BufReader, BufRead};
/// use std::path::PathBuf;
///
/// let path = PathBuf::from("example.txt");
/// let file = File::open(&path).expect("Failed to open file");
/// let reader = BufReader::new(file);
///
/// let source = Source {
/// path,
/// reader: Some(Box::new(reader)),
/// };
/// ```
///
/// In the above example, a `Source` instance is created with a valid path and a buffered reader.
///
/// ```ignore
/// use std::path::PathBuf;
///
/// let source = Source {
/// path: PathBuf::from("example.txt"),
/// reader: None,
/// };
/// ```
///
/// In this example, a `Source` instance is created with a path but no reader. This might be useful
/// for cases where reading is deferred to a later point in time.
pub struct Source {
pub path: PathBuf,
pub reader: Option<Box<dyn BufRead>>, // Сделал reader опциональным
}
/// Parses a `Source` to extract a vector of `Book` objects.
///
/// This function handles different file types based on the file's extension.
/// Supported file types and their parsing strategies include:
/// - `.rs`: Uses the `rs::parse` function to handle Rust source files.
/// - `.fb2`: Handles FictionBook 2 (FB2) files. This supports:
/// - Using a provided `reader` (if `source.reader` is given).
/// - Opening and parsing directly from the file otherwise.
/// - `.zip`: Handles ZIP archives. Always opens the file itself as ZIP parsing
/// requires the file to implement the `Seek` trait.
///
/// If the file extension is unsupported or undefined, the function returns
/// an `Error::NotSupported`.
///
/// # Parameters
///
/// - `source`: A `Source` object containing the file path and an optional `reader`
/// for parsing.
///
/// # Returns
///
/// - `Ok(Vec<Book>)`: If the parsing is successful, returns a vector of `Book` objects.
/// - `Err(Error)`: If an error occurs during parsing or if the file type is unsupported.
///
/// # Errors
///
/// - Returns `Error::ParseError` if there is an issue reading or parsing the file.
/// - Returns `Error::NotSupported` if the file type is unsupported.
///
/// # Examples
///
/// ```ignore
/// let source = Source {
/// path: PathBuf::from("example.fb2"),
/// reader: None,
/// };
/// let result = parse(source);
/// match result {
/// Ok(books) => println!("Parsed successfully: {:?}", books),
/// Err(error) => eprintln!("Failed to parse: {:?}", error),
/// }
/// ```
///
/// # Notes
///
/// - For `.fb2` files, if a `reader` is provided in the `Source` object, it will
/// be used for parsing. Otherwise, the function will open the file and parse it.
/// - `.zip` files require the file to support the `Seek` trait and will always
/// be opened directly from the file system.
///
/// # File Types
///
/// - `.rs`: Rust source files.
/// - `.fb2`: FictionBook 2 files.
/// - `.zip`: ZIP archives.
///
/// # See Also
///
/// - [`rs::parse`](#)
/// - [`fb2::parse`](#)
/// - [`fb2::parse_with_reader`](#)
/// - [`zip::parse`](#)
pub fn parse(source: Source) -> Result<Vec<Book>, Error> {
let path = &source.path;
match path.extension().and_then(|s| s.to_str()) {
Some("rs") => rs::parse(path).map_err(Error::ParseError),
Some("fb2") => {
// Если reader предоставлен, используем его, иначе открываем файл
match source.reader {
Some("fb2") => match source.reader {
Some(reader) => fb2::parse_with_reader(reader, path).map_err(Error::ParseError),
None => fb2::parse(path).map_err(Error::ParseError),
}
},
Some("zip") => {
// ZIP всегда открывает файлы сам, так как нужен Seek
let file = File::open(path).map_err(|e| Error::ParseError(e.to_string()))?;
zip::parse_direct(file, path).map_err(Error::ParseError)
},
zip::parse(file, path).map_err(Error::ParseError)
}
Some(_) | None => Err(Error::NotSupported),
}
}

View File

@@ -2,44 +2,6 @@ use crate::domain::author::Author;
use crate::domain::book::Book;
use std::path::PathBuf;
/// Parses a given file path into a vector containing a `Book` object.
///
/// # Arguments
///
/// * `path` - A reference to a `PathBuf` that represents the file path to be parsed.
///
/// # Returns
///
/// * `Result<Vec<Book>, String>` -
/// - On success, returns a `Vec<Book>` with a single `Book` object populated based on the input path.
/// - On failure, returns an error `String` describing the issue.
///
/// The function performs the following steps:
///
/// 1. Creates a new instance of `Book`.
/// 2. Sets the `title` of the `Book` to the string representation of the input path.
/// 3. Creates a new instance of `Author`.
/// 4. Sets the `first_name` of the `Author` to the string representation of the file extension of `path`.
/// 5. Pushes the `Author` into the `author` vector of the `Book`.
/// 6. Returns a `Vec<Book>` containing the newly created `Book`.
///
/// # Panics
///
/// The function will panic if the input path does not contain a file extension
/// (i.e., when `path.extension()` returns `None`).
///
/// # Example
///
/// ```ignore
/// use std::path::PathBuf;
///
/// let path = PathBuf::from("example.txt");
/// let books = parse(&path).unwrap();
///
/// assert_eq!(books.len(), 1);
/// assert_eq!(books[0].title, "example.txt");
/// assert_eq!(books[0].author[0].first_name, "txt");
/// ```
pub fn parse(path: &PathBuf) -> Result<Vec<Book>, String> {
let mut book = Book::new();

View File

@@ -5,7 +5,65 @@ use std::io::{BufRead, Cursor, Read};
use std::path::Path;
use zip::ZipArchive;
pub fn parse_direct<R: Read + std::io::Seek + 'static>(
/// Parses a ZIP archive to extract and process book data.
///
/// This function reads a given ZIP archive, processes each file inside, and attempts to parse
/// them into a vector of `Book` objects. Each file in the archive is expected to be in a format
/// compatible with the `parse_source` function.
///
/// # Type Parameters
/// - `R`: A type that implements both [`Read`](std::io::Read) and [`Seek`](std::io::Seek), which allows
/// reading and seeking operations on the input file.
///
/// # Arguments
/// - `reader`: A reader implementing `Read` and `Seek`, used to access the ZIP archive.
/// - `path`: A reference to a [`Path`](std::path::Path) representing the file system path to the ZIP archive.
///
/// # Returns
/// - `Ok(Vec<Book>)`: If parsing is successful, returns a vector of `Book` objects extracted from
/// the ZIP archive.
/// - `Err(String)`: If an error occurs, returns a descriptive error message as a `String`.
///
/// # Errors
/// The function may return an error in the following cases:
/// - If the ZIP archive cannot be opened.
/// - If the function encounters issues reading from the ZIP archive or its files.
/// - If a file within the archive cannot be parsed into `Book` objects.
///
/// # Example
/// ```ignore
/// use std::fs::File;
/// use std::path::Path;
/// use your_crate::parse_direct;
///
/// let file = File::open("test_books.zip").expect("Failed to open file");
/// let path = Path::new("test_books.zip");
///
/// match parse_direct(file, &path) {
/// Ok(books) => println!("Parsed {} books", books.len()),
/// Err(e) => eprintln!("Error parsing archive: {}", e),
/// }
/// ```
///
/// # Implementation Details
/// - The function processes each file inside the archive by reopening the archive for each iteration. This
/// ensures the `ZipArchive` does not consume other files during iteration.
/// - File contents are read into memory as raw bytes and wrapped in a [`BufReader`](std::io::BufRead) for further processing.
/// - Each file's path is dynamically constructed to include a placeholder `#` directory and its original name.
/// - The main parsing is delegated to the `parse_source` function, which returns either parsed books or an error.
///
/// # Notes
/// - As the ZIP archive is re-opened for each file, the performance may be impacted for large archives.
/// - The `parse_source` function is expected to be defined elsewhere in the codebase to handle the specific parsing logic.
///
/// # Dependencies
/// - This function depends on the `zip` crate to handle ZIP file operations and a custom `Book` and `Source` structure
/// for processing individual entries.
///
/// # See Also
/// - [`ZipArchive`](zip::read::ZipArchive): Used to interact with the ZIP archive.
/// - `parse_source`: Required function that processes individual entries to extract `Book` data.
pub fn parse<R: Read + std::io::Seek + 'static>(
reader: R,
path: &Path,
) -> Result<Vec<Book>, String> {

View File

@@ -1,9 +1,7 @@
use opds::demo;
use opds::domain::repository::{AuthorFilter, Repository};
use opds::domain::repository::BookFilter;
use opds::domain::repository::{AuthorFilter};
use quick_xml::se::to_string as to_xml_string;
use std::thread::sleep;
use std::time::Duration;
fn main() {
let app = demo();
@@ -30,18 +28,4 @@ fn main() {
println!("{:?}", book.clone().unwrap().author);
println!("{}", book.unwrap());
}
// sleep(Duration::new(10, 0));
//
// let filter = BookFilter {
// author: None,
// title: Some("foo".to_string()),
// language: None,
// description: None,
// tags: None,
// published_at: None,
// publisher: None,
// updated: None,
// };
// println!("{}", to_xml_string(&app.books.books_feed(filter)).unwrap());
}