@@ -11,6 +11,7 @@ use crate::domain::book::Book;
|
||||
use std::collections::VecDeque;
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
use crate::application::parsers::Source;
|
||||
|
||||
pub struct Loader {
|
||||
root: PathBuf,
|
||||
@@ -76,7 +77,7 @@ impl Iterator for LoaderIter {
|
||||
|
||||
impl LoaderIter {
|
||||
fn parse_path(path: &PathBuf) -> Option<Vec<Book>> {
|
||||
match parsers::parse(&path) {
|
||||
match parsers::parse(Source{ path: (*path).clone(), reader: None}) {
|
||||
Ok(books) => return Some(books),
|
||||
Err(err) => {
|
||||
match err {
|
||||
|
||||
@@ -8,6 +8,7 @@ use std::collections::VecDeque;
|
||||
use std::ffi::OsStr;
|
||||
use std::io;
|
||||
use std::path::PathBuf;
|
||||
use crate::application::parsers::Source;
|
||||
|
||||
const BUFFER_SIZE: usize = 4096;
|
||||
|
||||
@@ -72,7 +73,7 @@ impl<'a> LoaderIter<'a> {
|
||||
return None;
|
||||
}
|
||||
|
||||
match parsers::parse(&path) {
|
||||
match parsers::parse(Source{path: path.clone(), reader: None}) {
|
||||
Ok(books) => {
|
||||
for book in &books {
|
||||
println!("{}", book);
|
||||
|
||||
@@ -3,82 +3,18 @@ use crate::domain::book::Book;
|
||||
use quick_xml::events::Event;
|
||||
use quick_xml::Reader;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::io::{BufReader, Read, BufRead};
|
||||
use std::path::Path;
|
||||
|
||||
/// Parses an XML file located at the given path and extracts information about books.
|
||||
///
|
||||
/// This function processes the XML structure using a streaming XML reader to extract details
|
||||
/// about books, including:
|
||||
/// - Title
|
||||
/// - Language
|
||||
/// - Keywords
|
||||
/// - Authors (including optional details like first name, last name, middle name, and nickname)
|
||||
/// - Publication year
|
||||
/// - Publisher
|
||||
/// - Description
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `path` - A reference to the file path (`&Path`) of the XML file to parse.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns a `Result` where:
|
||||
/// - `Ok(Vec<Book>)` contains a vector of `Book` objects constructed from the parsed XML.
|
||||
/// - `Err(String)` contains an error message if the parsing fails at any stage.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns an error in the following scenarios:
|
||||
/// - Unable to open the file specified by `path`.
|
||||
/// - Malformed XML data in the file.
|
||||
/// - Issues during data extraction, such as reading incomplete or invalid values.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```ignore
|
||||
/// use std::path::Path;
|
||||
/// let path = Path::new("books.xml");
|
||||
/// match parse(&path) {
|
||||
/// Ok(books) => {
|
||||
/// for book in books {
|
||||
/// println!("Book Title: {}", book.title);
|
||||
/// }
|
||||
/// },
|
||||
/// Err(err) => eprintln!("Failed to parse XML file: {}", err),
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// # XML Structure
|
||||
///
|
||||
/// The XML should follow a specific schema with the following relevant elements:
|
||||
/// - `<book-title>`: Title of the book.
|
||||
/// - `<lang>`: Language of the book.
|
||||
/// - `<keywords>`: A comma-separated list of keywords/tags.
|
||||
/// - `<author>`: Contains subfields `<first-name>`, `<last-name>`, `<middle-name>`, or `<nickname>`.
|
||||
/// - `<year>`: Year of publication.
|
||||
/// - `<publisher>`: Publisher's name.
|
||||
/// - `<annotation>`: Description or annotation of the book.
|
||||
///
|
||||
/// # Notes
|
||||
///
|
||||
/// - Author data is flexible; if a nickname exists, it will override other name details.
|
||||
/// - The resulting `Vec<Book>` contains just one book object, as indicated in the implementation.
|
||||
///
|
||||
/// # Dependencies
|
||||
///
|
||||
/// This function depends on the following crates:
|
||||
/// - `quick-xml`: For fast XML parsing.
|
||||
/// - `uuid`: To generate a unique identifier for each book.
|
||||
/// - `chrono`: To serialize the current timestamp as an RFC3339 string.
|
||||
///
|
||||
/// # See Also
|
||||
///
|
||||
/// `Book` structure, which represents the parsed data for an individual book.
|
||||
pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
|
||||
let file = File::open(path).map_err(|e| e.to_string())?;
|
||||
let mut reader = Reader::from_reader(BufReader::new(file));
|
||||
let reader = BufReader::new(file);
|
||||
parse_with_reader(Box::new(reader), path)
|
||||
}
|
||||
|
||||
|
||||
pub fn parse_with_reader<R: BufRead>(reader: R, path: &Path) -> Result<Vec<Book>, String> {
|
||||
let mut reader = Reader::from_reader(reader);
|
||||
let mut buf = Vec::new();
|
||||
|
||||
let mut in_title = false;
|
||||
@@ -87,6 +23,7 @@ pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
|
||||
let mut in_description = false;
|
||||
|
||||
let mut book = Book::new();
|
||||
book.source = path.into();
|
||||
|
||||
loop {
|
||||
match reader.read_event_into(&mut buf) {
|
||||
|
||||
@@ -1,17 +1,13 @@
|
||||
use std::fmt;
|
||||
use crate::domain::book::Book;
|
||||
use std::fmt;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, BufRead};
|
||||
use std::path::PathBuf;
|
||||
|
||||
mod rs;
|
||||
mod fb2;
|
||||
mod rs;
|
||||
mod zip;
|
||||
|
||||
|
||||
/// Error enumeration representing possible errors that can occur when parsing files.
|
||||
///
|
||||
/// This enumeration has the following variants:
|
||||
/// - `NotSupported`: Indicates that the file format or extension is not supported.
|
||||
/// - `ParseError`: Contains a `String` representing the error message when a parsing process fails.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
NotSupported,
|
||||
@@ -27,53 +23,28 @@ impl fmt::Display for Error {
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses a file at the given path and attempts to convert its contents into a vector of `Book` objects.
|
||||
///
|
||||
/// This function determines the file type based on its extension and delegates the parsing duties
|
||||
/// to the appropriate module. Supported file extensions are:
|
||||
/// - `.rs`: Processed by the `rs` module.
|
||||
/// - `.fb2`: Processed by the `fb2` module.
|
||||
/// - `.zip`: Processed by the `zip` module.
|
||||
///
|
||||
/// If the file's extension is unsupported or missing, this function returns a `NotSupported` error.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `path` - A reference to a `PathBuf` that represents the file path to be parsed.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Ok(Vec<Book>)` - A vector of `Book` objects if the file was successfully parsed.
|
||||
/// * `Err(Error)` - An error if the file could not be parsed, the parsing process encountered
|
||||
/// an issue, or the file extension is not supported.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// - `Error::ParseError` - If the file parsing fails.
|
||||
/// - `Error::NotSupported` - If the file's extension is unsupported or missing.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```ignore
|
||||
/// use std::path::PathBuf;
|
||||
///
|
||||
/// let path = PathBuf::from("example.rs");
|
||||
/// let books = parse(&path);
|
||||
/// match books {
|
||||
/// Ok(book_list) => println!("Parsed {} books.", book_list.len()),
|
||||
/// Err(e) => println!("Failed to parse file: {:?}", e),
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// # Notes
|
||||
///
|
||||
/// Ensure that the appropriate parsers (`rs`, `fb2`, `zip`) are properly implemented
|
||||
/// and handle all required logic for their respective file types to avoid unexpected errors.
|
||||
pub fn parse(path: &PathBuf) -> Result<Vec<Book>, Error> {
|
||||
pub struct Source {
|
||||
pub path: PathBuf,
|
||||
pub reader: Option<Box<dyn BufRead>>, // Сделал reader опциональным
|
||||
}
|
||||
|
||||
pub fn parse(source: Source) -> Result<Vec<Book>, Error> {
|
||||
let path = &source.path;
|
||||
|
||||
match path.extension().and_then(|s| s.to_str()) {
|
||||
Some("rs") => rs::parse(path).map_err(Error::ParseError),
|
||||
Some("fb2") => fb2::parse(path).map_err(Error::ParseError),
|
||||
Some("zip") => zip::parse(path).map_err(Error::ParseError),
|
||||
Some("fb2") => {
|
||||
// Если reader предоставлен, используем его, иначе открываем файл
|
||||
match source.reader {
|
||||
Some(reader) => fb2::parse_with_reader(reader, path).map_err(Error::ParseError),
|
||||
None => fb2::parse(path).map_err(Error::ParseError),
|
||||
}
|
||||
},
|
||||
Some("zip") => {
|
||||
// ZIP всегда открывает файлы сам, так как нужен Seek
|
||||
let file = File::open(path).map_err(|e| Error::ParseError(e.to_string()))?;
|
||||
zip::parse_direct(file, path).map_err(Error::ParseError)
|
||||
},
|
||||
Some(_) | None => Err(Error::NotSupported),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,72 +1,44 @@
|
||||
use crate::application::parsers;
|
||||
use crate::application::parsers::{parse as parse_source, Source};
|
||||
use crate::domain::book::Book;
|
||||
use std::fs::File;
|
||||
use std::io::BufReader;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::io::{BufRead, Cursor, Read};
|
||||
use std::path::Path;
|
||||
use zip::ZipArchive;
|
||||
|
||||
|
||||
/// Parses a ZIP archive to extract a collection of `Book` objects.
|
||||
///
|
||||
/// This function takes a path to a ZIP archive file, reads its contents, and processes
|
||||
/// each file within the archive to extract `Book` objects using a custom parser. If any
|
||||
/// errors occur during file access, archive extraction, or parsing, they are returned as
|
||||
/// a `String`. On success, it returns a vector of `Book` objects contained in the archive.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `path` - A reference to a `Path` representing the file system path to the ZIP archive.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Ok(Vec<Book>)` - A vector containing the `Book` objects successfully parsed
|
||||
/// from the files in the archive.
|
||||
/// * `Err(String)` - An error message if any step in opening the file, reading the archive,
|
||||
/// or parsing the files fails.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// This function returns an error in the following cases:
|
||||
/// * If the ZIP file cannot be opened.
|
||||
/// * If the ZIP archive cannot be read.
|
||||
/// * If an individual file within the archive cannot be accessed.
|
||||
/// * If the parsing of a file fails.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```ignore
|
||||
/// use std::path::Path;
|
||||
/// use your_crate::parse;
|
||||
///
|
||||
/// let path = Path::new("books_archive.zip");
|
||||
/// match parse(&path) {
|
||||
/// Ok(books) => {
|
||||
/// for book in books {
|
||||
/// println!("Parsed book: {:?}", book);
|
||||
/// }
|
||||
/// }
|
||||
/// Err(e) => eprintln!("Failed to parse books: {}", e),
|
||||
/// }
|
||||
/// ```
|
||||
///
|
||||
/// # Dependencies
|
||||
///
|
||||
/// This function relies on the `ZipArchive` for working with ZIP files and a `parsers`
|
||||
/// module for custom file parsing logic.
|
||||
pub fn parse(path: &Path) -> Result<Vec<Book>, String> {
|
||||
let file = File::open(path).map_err(|e| e.to_string())?;
|
||||
let reader = BufReader::new(file);
|
||||
let mut archive = ZipArchive::new(reader).map_err(|e| e.to_string())?;
|
||||
|
||||
pub fn parse_direct<R: Read + std::io::Seek + 'static>(
|
||||
reader: R,
|
||||
path: &Path,
|
||||
) -> Result<Vec<Book>, String> {
|
||||
let archive = ZipArchive::new(reader).map_err(|e| e.to_string())?;
|
||||
let mut books: Vec<Book> = Vec::new();
|
||||
|
||||
for i in 0..archive.len() {
|
||||
let file = archive.by_index(i).map_err(|e| e.to_string())?;
|
||||
let name = file.name().to_string();
|
||||
// Нам нужно знать путь к архиву для открытия файлов внутри него
|
||||
let archive_path = path.to_path_buf();
|
||||
|
||||
match parsers::parse(&PathBuf::from(name.to_lowercase())) {
|
||||
for i in 0..archive.len() {
|
||||
// Открываем архив заново для каждого файла, так как ZipArchive consumes files
|
||||
let file = File::open(&archive_path).map_err(|e| e.to_string())?;
|
||||
let mut archive = ZipArchive::new(file).map_err(|e| e.to_string())?;
|
||||
let mut zip_file = archive.by_index(i).map_err(|e| e.to_string())?;
|
||||
|
||||
let name = zip_file.name().to_string();
|
||||
let file_path = path.to_path_buf().join("#").join(&name);
|
||||
|
||||
// Читаем содержимое файла в память и оборачиваем в BufReader
|
||||
let mut contents = Vec::new();
|
||||
zip_file
|
||||
.read_to_end(&mut contents)
|
||||
.map_err(|e| e.to_string())?;
|
||||
|
||||
let file_reader: Box<dyn BufRead> = Box::new(Cursor::new(contents)); // Изменили на BufRead
|
||||
let source = Source {
|
||||
path: file_path,
|
||||
reader: Some(file_reader),
|
||||
};
|
||||
|
||||
match parse_source(source) {
|
||||
Ok(new_books) => books.extend(new_books),
|
||||
Err(e) => return Err(e.to_string()),
|
||||
Err(e) => return Err(format!("Error parsing {}: {}", name, e)),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -68,13 +68,14 @@ impl Books {
|
||||
}
|
||||
|
||||
pub fn add_books_from_path(&mut self) {
|
||||
let iter = fs::Loader::new(PathBuf::from(&self.root));
|
||||
let books = fs::Loader::new(PathBuf::from(&self.root))
|
||||
.into_iter()
|
||||
.map(|mut book| {book.source = book.source.strip_prefix(&self.root).unwrap().into(); book})
|
||||
.collect();
|
||||
|
||||
match self.repo.lock() {
|
||||
Ok(mut repo) => {
|
||||
for book in iter {
|
||||
repo.add(book);
|
||||
}
|
||||
repo.bulk_add(books);
|
||||
}
|
||||
Err(err) => eprintln!("{}", err),
|
||||
}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::domain::author;
|
||||
use std::fmt;
|
||||
use std::path::PathBuf;
|
||||
use chrono::{DateTime, Utc};
|
||||
use uuid::Uuid;
|
||||
|
||||
@@ -14,6 +15,7 @@ pub struct Book {
|
||||
pub published_at: String,
|
||||
pub publisher: String,
|
||||
pub updated: DateTime<Utc>,
|
||||
pub source: PathBuf
|
||||
}
|
||||
|
||||
impl Book {
|
||||
@@ -28,6 +30,7 @@ impl Book {
|
||||
published_at: "".to_string(),
|
||||
publisher: "".to_string(),
|
||||
updated: chrono::Utc::now(),
|
||||
source: PathBuf::new()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -60,7 +63,7 @@ impl fmt::Display for Book {
|
||||
.collect::<Vec<_>>()
|
||||
.join(";");
|
||||
|
||||
write!(f, "{} by {}", self.title, authors)
|
||||
write!(f, "{} by {} at {}", self.title, authors, self.source.to_str().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -74,6 +74,7 @@ struct Book {
|
||||
published_at: String,
|
||||
publisher: String,
|
||||
updated: String,
|
||||
source: String,
|
||||
}
|
||||
|
||||
impl From<book::Book> for Book {
|
||||
@@ -88,6 +89,7 @@ impl From<book::Book> for Book {
|
||||
published_at: book.published_at,
|
||||
publisher: book.publisher,
|
||||
updated: book.updated.to_rfc3339(),
|
||||
source: book.source.as_os_str().to_str().unwrap().to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -114,6 +116,7 @@ impl Into<book::Book> for Book {
|
||||
published_at: self.published_at,
|
||||
publisher: self.publisher,
|
||||
updated: chrono::DateTime::parse_from_rfc3339(&self.updated).unwrap_or_default().to_utc(),
|
||||
source: self.source.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,7 +13,7 @@ fn main() {
|
||||
id: None,
|
||||
name: None, //Some("rs".to_string()),
|
||||
}),
|
||||
title: Some("Сборник".to_string()),
|
||||
title: Some("пов".to_string()),
|
||||
language: None,
|
||||
description: None,
|
||||
tags: None,
|
||||
@@ -27,7 +27,8 @@ fn main() {
|
||||
|
||||
if let Some(book) = res.entry.iter().next() {
|
||||
let book = app.repo.lock().unwrap().get(book.id.to_string().clone());
|
||||
println!("{:?}", book.unwrap().author);
|
||||
println!("{:?}", book.clone().unwrap().author);
|
||||
println!("{}", book.unwrap());
|
||||
}
|
||||
|
||||
// sleep(Duration::new(10, 0));
|
||||
|
||||
Reference in New Issue
Block a user