doc: Regenerate AI docs for parsers, cleanup
This commit is contained in:
@@ -3,16 +3,110 @@ use crate::domain::book::Book;
|
||||
use quick_xml::events::Event;
|
||||
use quick_xml::Reader;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, Read, BufRead};
|
||||
use std::io::{BufRead, BufReader};
|
||||
use std::path::Path;
|
||||
|
||||
/// Parses the content of a file at the given path to extract a list of `Book` objects.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `path` - A reference to a `Path` representing the location of the file to be parsed.
|
||||
///
|
||||
/// # Returns
|
||||
/// * `Ok(Vec<Book>)` - A vector containing the list of `Book` objects on successful parsing.
|
||||
/// * `Err(String)` - A string containing an error message if the file could not be opened or parsing failed.
|
||||
///
|
||||
/// # Errors
|
||||
/// This function returns an error in the following cases:
|
||||
/// * If the file at the specified path cannot be opened, the corresponding IO error is converted to a string and returned.
|
||||
/// * If the parsing logic inside `parse_with_reader` fails, the returned error from that function is propagated.
|
||||
///
|
||||
/// # Example
|
||||
/// ```ignore
|
||||
/// use std::path::Path;
|
||||
///
|
||||
/// let path = Path::new("books.txt");
|
||||
/// let result = parse(path);
|
||||
///
|
||||
/// match result {
|
||||
/// Ok(books) => println!("Parsed {} books", books.len()),
|
||||
/// Err(err) => eprintln!("Error parsing books: {}", err),
|
||||
/// }
|
||||
/// ```
|
||||
pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
|
||||
let file = File::open(path).map_err(|e| e.to_string())?;
|
||||
let reader = BufReader::new(file);
|
||||
parse_with_reader(Box::new(reader), path)
|
||||
}
|
||||
|
||||
|
||||
/// Parses an XML document using a buffered reader and extracts book information.
|
||||
///
|
||||
/// This function reads an XML document containing book details and maps it to a `Vec<Book>`.
|
||||
/// It supports parsing various elements such as title, language, keywords, author details,
|
||||
/// publisher, description, and publication year. Errors during parsing are propagated as strings.
|
||||
///
|
||||
/// # Type Parameters
|
||||
/// - `R`: A type that implements the [`BufRead`](std::io::BufRead) trait for buffered input.
|
||||
///
|
||||
/// # Arguments
|
||||
/// - `reader`: A buffered reader instance (e.g., [`std::io::BufReader`]) to read the XML content.
|
||||
/// - `path`: A reference to a [`Path`](std::path::Path) representing the source file or data for the book.
|
||||
///
|
||||
/// # Returns
|
||||
/// - `Ok(Vec<Book>)` if parsing was successful, containing a vector of parsed `Book` results.
|
||||
/// - `Err(String)` if an error occurred during parsing, containing a string description of the error.
|
||||
///
|
||||
/// # XML Parsing Details
|
||||
/// The following XML tags are processed:
|
||||
/// - `<book-title>`: Extracted as the book's title.
|
||||
/// - `<lang>`: Extracted as the book's language.
|
||||
/// - `<keywords>`: Split by commas into individual tags for the book.
|
||||
/// - `<annotation>`: Extracted into a multiline description for the book.
|
||||
/// - `<author>`: Processes child elements such as `<first-name>`, `<last-name>`, `<middle-name>`,
|
||||
/// and `<nickname>` to construct an author's full details.
|
||||
/// - `<year>`: Extracted as the book's publication year.
|
||||
/// - `<publisher>`: Extracted as the book's publisher.
|
||||
///
|
||||
/// # Behavior
|
||||
/// - Multiple authors are supported through repeated `<author>` tags.
|
||||
/// - Whitespace is trimmed from all parsed text.
|
||||
/// - If no content is available for certain fields (e.g., `<last-name>`), they may remain `None`
|
||||
/// or their equivalent default.
|
||||
///
|
||||
/// # Errors
|
||||
/// This function returns an error in the following cases:
|
||||
/// - The XML data contains malformed or invalid content.
|
||||
/// - Unexpected EOF is encountered during XML parsing.
|
||||
/// - Any other IO or XML parsing errors occur.
|
||||
///
|
||||
/// # Example
|
||||
/// ```ignore
|
||||
/// use std::fs::File;
|
||||
/// use std::io::BufReader;
|
||||
/// use std::path::Path;
|
||||
///
|
||||
/// let file = File::open("books.xml").expect("Failed to open file");
|
||||
/// let reader = BufReader::new(file);
|
||||
/// let path = Path::new("books.xml");
|
||||
///
|
||||
/// let result = parse_with_reader(reader, &path);
|
||||
/// match result {
|
||||
/// Ok(books) => {
|
||||
/// for book in books {
|
||||
/// println!("Book title: {}", book.title);
|
||||
/// }
|
||||
/// }
|
||||
/// Err(e) => eprintln!("Error parsing XML: {}", e),
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// # Dependencies
|
||||
/// This function relies on an external XML parsing library capable of handling streaming XML,
|
||||
/// such as `quick-xml`. The `Book` and `author::Author` structs, along with their methods (e.g., `new`),
|
||||
/// must be defined elsewhere in the codebase.
|
||||
///
|
||||
/// # Notes
|
||||
/// - The function assumes that the XML tags match the expected structure. Unrecognized tags are ignored.
|
||||
/// - It is assumed that the user will extend support for future XML element types as needed.
|
||||
pub fn parse_with_reader<R: BufRead>(reader: R, path: &Path) -> Result<Vec<Book>, String> {
|
||||
let mut reader = Reader::from_reader(reader);
|
||||
let mut buf = Vec::new();
|
||||
@@ -115,7 +209,8 @@ pub fn parse_with_reader<R: BufRead>(reader: R, path: &Path) -> Result<Vec<Book>
|
||||
}
|
||||
b"year" => {
|
||||
if let Ok(Event::Text(t)) = reader.read_event_into(&mut buf) {
|
||||
book.published_at = t.xml_content().map_err(|e| e.to_string())?.into_owned();
|
||||
book.published_at =
|
||||
t.xml_content().map_err(|e| e.to_string())?.into_owned();
|
||||
}
|
||||
}
|
||||
b"publisher" => {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::domain::book::Book;
|
||||
use std::fmt;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, BufRead};
|
||||
use std::io::BufRead;
|
||||
use std::path::PathBuf;
|
||||
|
||||
mod fb2;
|
||||
@@ -23,28 +23,129 @@ impl fmt::Display for Error {
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a source file or location that can be read from.
|
||||
///
|
||||
/// The `Source` struct encapsulates information about a file or source location, including its
|
||||
/// path and an optional reader for reading the source's contents. This allows for flexibility
|
||||
/// in creating `Source` instances where the reading mechanism may be deferred or not immediately available.
|
||||
///
|
||||
/// # Fields
|
||||
///
|
||||
/// * `path` - A `PathBuf` representing the file system path to the source. This path must be valid.
|
||||
/// * `reader` - An optional boxed dynamic trait object (`Option<Box<dyn BufRead>>`) that represents
|
||||
/// a buffered reader capable of reading the contents of the source. If `None`, no reader
|
||||
/// is currently defined or available.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```ignore
|
||||
/// use std::fs::File;
|
||||
/// use std::io::{BufReader, BufRead};
|
||||
/// use std::path::PathBuf;
|
||||
///
|
||||
/// let path = PathBuf::from("example.txt");
|
||||
/// let file = File::open(&path).expect("Failed to open file");
|
||||
/// let reader = BufReader::new(file);
|
||||
///
|
||||
/// let source = Source {
|
||||
/// path,
|
||||
/// reader: Some(Box::new(reader)),
|
||||
/// };
|
||||
/// ```
|
||||
///
|
||||
/// In the above example, a `Source` instance is created with a valid path and a buffered reader.
|
||||
///
|
||||
/// ```ignore
|
||||
/// use std::path::PathBuf;
|
||||
///
|
||||
/// let source = Source {
|
||||
/// path: PathBuf::from("example.txt"),
|
||||
/// reader: None,
|
||||
/// };
|
||||
/// ```
|
||||
///
|
||||
/// In this example, a `Source` instance is created with a path but no reader. This might be useful
|
||||
/// for cases where reading is deferred to a later point in time.
|
||||
pub struct Source {
|
||||
pub path: PathBuf,
|
||||
pub reader: Option<Box<dyn BufRead>>, // Сделал reader опциональным
|
||||
}
|
||||
|
||||
/// Parses a `Source` to extract a vector of `Book` objects.
|
||||
///
|
||||
/// This function handles different file types based on the file's extension.
|
||||
/// Supported file types and their parsing strategies include:
|
||||
/// - `.rs`: Uses the `rs::parse` function to handle Rust source files.
|
||||
/// - `.fb2`: Handles FictionBook 2 (FB2) files. This supports:
|
||||
/// - Using a provided `reader` (if `source.reader` is given).
|
||||
/// - Opening and parsing directly from the file otherwise.
|
||||
/// - `.zip`: Handles ZIP archives. Always opens the file itself as ZIP parsing
|
||||
/// requires the file to implement the `Seek` trait.
|
||||
///
|
||||
/// If the file extension is unsupported or undefined, the function returns
|
||||
/// an `Error::NotSupported`.
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// - `source`: A `Source` object containing the file path and an optional `reader`
|
||||
/// for parsing.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// - `Ok(Vec<Book>)`: If the parsing is successful, returns a vector of `Book` objects.
|
||||
/// - `Err(Error)`: If an error occurs during parsing or if the file type is unsupported.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// - Returns `Error::ParseError` if there is an issue reading or parsing the file.
|
||||
/// - Returns `Error::NotSupported` if the file type is unsupported.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```ignore
|
||||
/// let source = Source {
|
||||
/// path: PathBuf::from("example.fb2"),
|
||||
/// reader: None,
|
||||
/// };
|
||||
/// let result = parse(source);
|
||||
/// match result {
|
||||
/// Ok(books) => println!("Parsed successfully: {:?}", books),
|
||||
/// Err(error) => eprintln!("Failed to parse: {:?}", error),
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// # Notes
|
||||
///
|
||||
/// - For `.fb2` files, if a `reader` is provided in the `Source` object, it will
|
||||
/// be used for parsing. Otherwise, the function will open the file and parse it.
|
||||
/// - `.zip` files require the file to support the `Seek` trait and will always
|
||||
/// be opened directly from the file system.
|
||||
///
|
||||
/// # File Types
|
||||
///
|
||||
/// - `.rs`: Rust source files.
|
||||
/// - `.fb2`: FictionBook 2 files.
|
||||
/// - `.zip`: ZIP archives.
|
||||
///
|
||||
/// # See Also
|
||||
///
|
||||
/// - [`rs::parse`](#)
|
||||
/// - [`fb2::parse`](#)
|
||||
/// - [`fb2::parse_with_reader`](#)
|
||||
/// - [`zip::parse`](#)
|
||||
pub fn parse(source: Source) -> Result<Vec<Book>, Error> {
|
||||
let path = &source.path;
|
||||
|
||||
match path.extension().and_then(|s| s.to_str()) {
|
||||
Some("rs") => rs::parse(path).map_err(Error::ParseError),
|
||||
Some("fb2") => {
|
||||
// Если reader предоставлен, используем его, иначе открываем файл
|
||||
match source.reader {
|
||||
Some(reader) => fb2::parse_with_reader(reader, path).map_err(Error::ParseError),
|
||||
None => fb2::parse(path).map_err(Error::ParseError),
|
||||
}
|
||||
Some("fb2") => match source.reader {
|
||||
Some(reader) => fb2::parse_with_reader(reader, path).map_err(Error::ParseError),
|
||||
None => fb2::parse(path).map_err(Error::ParseError),
|
||||
},
|
||||
Some("zip") => {
|
||||
// ZIP всегда открывает файлы сам, так как нужен Seek
|
||||
let file = File::open(path).map_err(|e| Error::ParseError(e.to_string()))?;
|
||||
zip::parse_direct(file, path).map_err(Error::ParseError)
|
||||
},
|
||||
zip::parse(file, path).map_err(Error::ParseError)
|
||||
}
|
||||
Some(_) | None => Err(Error::NotSupported),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,44 +2,6 @@ use crate::domain::author::Author;
|
||||
use crate::domain::book::Book;
|
||||
use std::path::PathBuf;
|
||||
|
||||
/// Parses a given file path into a vector containing a `Book` object.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `path` - A reference to a `PathBuf` that represents the file path to be parsed.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Result<Vec<Book>, String>` -
|
||||
/// - On success, returns a `Vec<Book>` with a single `Book` object populated based on the input path.
|
||||
/// - On failure, returns an error `String` describing the issue.
|
||||
///
|
||||
/// The function performs the following steps:
|
||||
///
|
||||
/// 1. Creates a new instance of `Book`.
|
||||
/// 2. Sets the `title` of the `Book` to the string representation of the input path.
|
||||
/// 3. Creates a new instance of `Author`.
|
||||
/// 4. Sets the `first_name` of the `Author` to the string representation of the file extension of `path`.
|
||||
/// 5. Pushes the `Author` into the `author` vector of the `Book`.
|
||||
/// 6. Returns a `Vec<Book>` containing the newly created `Book`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// The function will panic if the input path does not contain a file extension
|
||||
/// (i.e., when `path.extension()` returns `None`).
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```ignore
|
||||
/// use std::path::PathBuf;
|
||||
///
|
||||
/// let path = PathBuf::from("example.txt");
|
||||
/// let books = parse(&path).unwrap();
|
||||
///
|
||||
/// assert_eq!(books.len(), 1);
|
||||
/// assert_eq!(books[0].title, "example.txt");
|
||||
/// assert_eq!(books[0].author[0].first_name, "txt");
|
||||
/// ```
|
||||
pub fn parse(path: &PathBuf) -> Result<Vec<Book>, String> {
|
||||
let mut book = Book::new();
|
||||
|
||||
|
||||
@@ -5,7 +5,65 @@ use std::io::{BufRead, Cursor, Read};
|
||||
use std::path::Path;
|
||||
use zip::ZipArchive;
|
||||
|
||||
pub fn parse_direct<R: Read + std::io::Seek + 'static>(
|
||||
/// Parses a ZIP archive to extract and process book data.
|
||||
///
|
||||
/// This function reads a given ZIP archive, processes each file inside, and attempts to parse
|
||||
/// them into a vector of `Book` objects. Each file in the archive is expected to be in a format
|
||||
/// compatible with the `parse_source` function.
|
||||
///
|
||||
/// # Type Parameters
|
||||
/// - `R`: A type that implements both [`Read`](std::io::Read) and [`Seek`](std::io::Seek), which allows
|
||||
/// reading and seeking operations on the input file.
|
||||
///
|
||||
/// # Arguments
|
||||
/// - `reader`: A reader implementing `Read` and `Seek`, used to access the ZIP archive.
|
||||
/// - `path`: A reference to a [`Path`](std::path::Path) representing the file system path to the ZIP archive.
|
||||
///
|
||||
/// # Returns
|
||||
/// - `Ok(Vec<Book>)`: If parsing is successful, returns a vector of `Book` objects extracted from
|
||||
/// the ZIP archive.
|
||||
/// - `Err(String)`: If an error occurs, returns a descriptive error message as a `String`.
|
||||
///
|
||||
/// # Errors
|
||||
/// The function may return an error in the following cases:
|
||||
/// - If the ZIP archive cannot be opened.
|
||||
/// - If the function encounters issues reading from the ZIP archive or its files.
|
||||
/// - If a file within the archive cannot be parsed into `Book` objects.
|
||||
///
|
||||
/// # Example
|
||||
/// ```ignore
|
||||
/// use std::fs::File;
|
||||
/// use std::path::Path;
|
||||
/// use your_crate::parse_direct;
|
||||
///
|
||||
/// let file = File::open("test_books.zip").expect("Failed to open file");
|
||||
/// let path = Path::new("test_books.zip");
|
||||
///
|
||||
/// match parse_direct(file, &path) {
|
||||
/// Ok(books) => println!("Parsed {} books", books.len()),
|
||||
/// Err(e) => eprintln!("Error parsing archive: {}", e),
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// # Implementation Details
|
||||
/// - The function processes each file inside the archive by reopening the archive for each iteration. This
|
||||
/// ensures the `ZipArchive` does not consume other files during iteration.
|
||||
/// - File contents are read into memory as raw bytes and wrapped in a [`BufReader`](std::io::BufRead) for further processing.
|
||||
/// - Each file's path is dynamically constructed to include a placeholder `#` directory and its original name.
|
||||
/// - The main parsing is delegated to the `parse_source` function, which returns either parsed books or an error.
|
||||
///
|
||||
/// # Notes
|
||||
/// - As the ZIP archive is re-opened for each file, the performance may be impacted for large archives.
|
||||
/// - The `parse_source` function is expected to be defined elsewhere in the codebase to handle the specific parsing logic.
|
||||
///
|
||||
/// # Dependencies
|
||||
/// - This function depends on the `zip` crate to handle ZIP file operations and a custom `Book` and `Source` structure
|
||||
/// for processing individual entries.
|
||||
///
|
||||
/// # See Also
|
||||
/// - [`ZipArchive`](zip::read::ZipArchive): Used to interact with the ZIP archive.
|
||||
/// - `parse_source`: Required function that processes individual entries to extract `Book` data.
|
||||
pub fn parse<R: Read + std::io::Seek + 'static>(
|
||||
reader: R,
|
||||
path: &Path,
|
||||
) -> Result<Vec<Book>, String> {
|
||||
|
||||
20
src/main.rs
20
src/main.rs
@@ -1,15 +1,13 @@
|
||||
use opds::demo;
|
||||
use opds::domain::repository::{AuthorFilter, Repository};
|
||||
use opds::domain::repository::BookFilter;
|
||||
use opds::domain::repository::{AuthorFilter};
|
||||
use quick_xml::se::to_string as to_xml_string;
|
||||
use std::thread::sleep;
|
||||
use std::time::Duration;
|
||||
|
||||
fn main() {
|
||||
let app = demo();
|
||||
|
||||
let filter = BookFilter {
|
||||
author: Some(AuthorFilter{
|
||||
author: Some(AuthorFilter {
|
||||
id: None,
|
||||
name: None, //Some("rs".to_string()),
|
||||
}),
|
||||
@@ -30,18 +28,4 @@ fn main() {
|
||||
println!("{:?}", book.clone().unwrap().author);
|
||||
println!("{}", book.unwrap());
|
||||
}
|
||||
|
||||
// sleep(Duration::new(10, 0));
|
||||
//
|
||||
// let filter = BookFilter {
|
||||
// author: None,
|
||||
// title: Some("foo".to_string()),
|
||||
// language: None,
|
||||
// description: None,
|
||||
// tags: None,
|
||||
// published_at: None,
|
||||
// publisher: None,
|
||||
// updated: None,
|
||||
// };
|
||||
// println!("{}", to_xml_string(&app.books.books_feed(filter)).unwrap());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user