From 7c1b0f946bc001c1a0750f3dc67c0120d084f834 Mon Sep 17 00:00:00 2001 From: Boris Serebrov Date: Sun, 22 Mar 2026 19:26:19 -0400 Subject: [PATCH 1/2] Update download --recursive to auto-export google docs. --- src/common/drive_file.rs | 8 +++++++ src/common/file_tree_drive.rs | 44 +++++++++++++++++++++++++++++++++-- src/files/download.rs | 29 +++++++++++++++++++---- 3 files changed, 74 insertions(+), 7 deletions(-) diff --git a/src/common/drive_file.rs b/src/common/drive_file.rs index 6278b63..15d3074 100644 --- a/src/common/drive_file.rs +++ b/src/common/drive_file.rs @@ -120,6 +120,14 @@ impl DocType { } } + pub fn default_office_export_type(&self) -> FileExtension { + match self { + DocType::Document => FileExtension::Docx, + DocType::Spreadsheet => FileExtension::Xlsx, + DocType::Presentation => FileExtension::Pptx, + } + } + pub fn can_export_to(&self, extension: &FileExtension) -> bool { self.supported_export_types().contains(extension) } diff --git a/src/common/file_tree_drive.rs b/src/common/file_tree_drive.rs index a6e816d..86c9aa3 100644 --- a/src/common/file_tree_drive.rs +++ b/src/common/file_tree_drive.rs @@ -127,8 +127,14 @@ impl Folder { let f = File::from_file(&file, &folder).await?; let node = Node::FileNode(f); children.push(node); - } else { - // Skip documents + } else if drive_file::DocType::from_mime_type( + file.mime_type.as_deref().unwrap_or_default(), + ) + .is_some() + { + let f = File::from_google_doc(&file, &folder).await?; + let node = Node::FileNode(f); + children.push(node); } } @@ -199,6 +205,7 @@ pub struct File { pub parent: Folder, pub drive_id: String, pub md5: Option, + pub mime_type: Option, } impl File { @@ -217,14 +224,47 @@ impl File { parent: parent.clone(), drive_id: file_id, md5, + mime_type: file.mime_type.clone(), }; Ok(file) } + pub async fn from_google_doc( + file: &google_drive3::api::File, + parent: &Folder, + ) -> Result { + let name = file.name.clone().ok_or(Error::MissingFileName)?; + let file_id = file.id.clone().ok_or(Error::MissingFileId)?; + + let doc_type = drive_file::DocType::from_mime_type( + file.mime_type.as_deref().unwrap_or_default(), + ); + let export_ext = doc_type + .map(|dt| dt.default_office_export_type().to_string()) + .unwrap_or_default(); + let export_name = format!("{}.{}", name, export_ext); + + Ok(File { + name: export_name, + size: 0, + parent: parent.clone(), + drive_id: file_id, + md5: None, + mime_type: file.mime_type.clone(), + }) + } + pub fn relative_path(&self) -> PathBuf { self.parent.relative_path().join(&self.name) } + + pub fn is_google_doc(&self) -> bool { + self.mime_type + .as_deref() + .and_then(drive_file::DocType::from_mime_type) + .is_some() + } } #[derive(Debug)] diff --git a/src/files/download.rs b/src/files/download.rs index 0f08f60..dbbf5ab 100644 --- a/src/files/download.rs +++ b/src/files/download.rs @@ -1,4 +1,5 @@ use crate::common::drive_file; +use crate::common::drive_file::DocType; use crate::common::file_tree_drive; use crate::common::file_tree_drive::FileTreeDrive; use crate::common::hub_helper; @@ -167,12 +168,28 @@ pub async fn download_directory( continue; } - let body = download_file(&hub, &file.drive_id) - .await - .map_err(Error::DownloadFile)?; + if file.is_google_doc() { + let doc_type = DocType::from_mime_type( + file.mime_type.as_deref().unwrap_or_default(), + ) + .unwrap(); + let export_ext = doc_type.default_office_export_type(); + let mime_type = export_ext.get_export_mime().unwrap(); + + let body = files::export::export_file(&hub, &file.drive_id, &mime_type) + .await + .map_err(Error::ExportFile)?; - println!("Downloading file '{}'", file_path.display()); - save_body_to_file(body, &abs_file_path, file.md5.clone()).await?; + println!("Exporting {} '{}'", doc_type, file_path.display()); + save_body_to_file(body, &abs_file_path, None).await?; + } else { + let body = download_file(&hub, &file.drive_id) + .await + .map_err(Error::DownloadFile)?; + + println!("Downloading file '{}'", file_path.display()); + save_body_to_file(body, &abs_file_path, file.md5.clone()).await?; + } } } @@ -204,6 +221,7 @@ pub enum Error { Hub(hub_helper::Error), GetFile(google_drive3::Error), DownloadFile(google_drive3::Error), + ExportFile(google_drive3::Error), MissingFileName, FileExists(PathBuf), IsDirectory(String), @@ -231,6 +249,7 @@ impl Display for Error { Error::Hub(err) => write!(f, "{}", err), Error::GetFile(err) => write!(f, "Failed getting file: {}", err), Error::DownloadFile(err) => write!(f, "Failed to download file: {}", err), + Error::ExportFile(err) => write!(f, "Failed to export file: {}", err), Error::MissingFileName => write!(f, "File does not have a name"), Error::FileExists(path) => write!( f, From 0daaaac2f1a2d84ec415a3ea1266f48f1ed92141 Mon Sep 17 00:00:00 2001 From: Boris Serebrov Date: Sun, 22 Mar 2026 19:27:10 -0400 Subject: [PATCH 2/2] Change the recursive download to traverse the tree and download at the same time. --- src/files/download.rs | 150 +++++++++++++++++++++++------------------- 1 file changed, 81 insertions(+), 69 deletions(-) diff --git a/src/files/download.rs b/src/files/download.rs index dbbf5ab..6f0d688 100644 --- a/src/files/download.rs +++ b/src/files/download.rs @@ -1,7 +1,5 @@ use crate::common::drive_file; use crate::common::drive_file::DocType; -use crate::common::file_tree_drive; -use crate::common::file_tree_drive::FileTreeDrive; use crate::common::hub_helper; use crate::common::md5_writer::Md5Writer; use crate::files; @@ -137,69 +135,101 @@ pub async fn download_directory( file: &google_drive3::api::File, config: &Config, ) -> Result<(), Error> { - let tree = FileTreeDrive::from_file(&hub, &file) - .await - .map_err(Error::CreateFileTree)?; + let root_path = config.canonical_destination_root()?; + let dir_name = file.name.clone().ok_or(Error::MissingFileName)?; + let dir_path = PathBuf::from(&dir_name); - let tree_info = tree.info(); + let mut stats = DownloadStats::default(); + download_directory_recursive(hub, file, &root_path, &dir_path, &mut stats).await?; println!( - "Found {} files in {} directories with a total size of {}", - tree_info.file_count, - tree_info.folder_count, - human_bytes(tree_info.total_file_size as f64) + "Downloaded {} files in {} directories with a total size of {}", + stats.file_count, + stats.folder_count, + human_bytes(stats.total_file_size as f64) ); - let root_path = config.canonical_destination_root()?; - - for folder in &tree.folders() { - let folder_path = folder.relative_path(); - let abs_folder_path = root_path.join(&folder_path); + Ok(()) +} - println!("Creating directory {}", folder_path.display()); - fs::create_dir_all(&abs_folder_path) - .map_err(|err| Error::CreateDirectory(abs_folder_path, err))?; +#[derive(Default)] +struct DownloadStats { + file_count: u64, + folder_count: u64, + total_file_size: u64, +} - for file in folder.files() { - let file_path = file.relative_path(); +#[async_recursion] +async fn download_directory_recursive( + hub: &Hub, + dir_file: &google_drive3::api::File, + root_path: &PathBuf, + dir_path: &PathBuf, + stats: &mut DownloadStats, +) -> Result<(), Error> { + let abs_dir_path = root_path.join(dir_path); + println!("Creating directory {}", dir_path.display()); + fs::create_dir_all(&abs_dir_path) + .map_err(|err| Error::CreateDirectory(abs_dir_path.clone(), err))?; + stats.folder_count += 1; + + let file_id = dir_file.id.clone().ok_or(Error::MissingFileName)?; + let children = files::list::list_files( + hub, + &files::list::ListFilesConfig { + query: files::list::ListQuery::FilesInFolder { folder_id: file_id }, + order_by: Default::default(), + max_files: usize::MAX, + }, + ) + .await + .map_err(Error::ListFiles)?; + + for child in &children { + if drive_file::is_directory(child) { + let child_name = child.name.clone().ok_or(Error::MissingFileName)?; + let child_path = dir_path.join(&child_name); + download_directory_recursive(hub, child, root_path, &child_path, stats).await?; + } else if drive_file::is_binary(child) { + let file_name = child.name.clone().ok_or(Error::MissingFileName)?; + let file_path = dir_path.join(&file_name); let abs_file_path = root_path.join(&file_path); - if local_file_is_identical(&abs_file_path, &file) { - continue; + if abs_file_path.exists() { + let file_md5 = compute_md5_from_path(&abs_file_path).unwrap_or_default(); + if child.md5_checksum.as_deref() == Some(&file_md5) { + continue; + } } - if file.is_google_doc() { - let doc_type = DocType::from_mime_type( - file.mime_type.as_deref().unwrap_or_default(), - ) - .unwrap(); - let export_ext = doc_type.default_office_export_type(); - let mime_type = export_ext.get_export_mime().unwrap(); + let body = download_file(hub, child.id.as_deref().unwrap_or_default()) + .await + .map_err(Error::DownloadFile)?; + + println!("Downloading file '{}'", file_path.display()); + save_body_to_file(body, &abs_file_path, child.md5_checksum.clone()).await?; + stats.file_count += 1; + stats.total_file_size += child.size.unwrap_or(0) as u64; + } else if let Some(doc_type) = DocType::from_mime_type( + child.mime_type.as_deref().unwrap_or_default(), + ) { + let file_name = child.name.clone().ok_or(Error::MissingFileName)?; + let export_ext = doc_type.default_office_export_type(); + let export_name = format!("{}.{}", file_name, export_ext); + let file_path = dir_path.join(&export_name); + let abs_file_path = root_path.join(&file_path); - let body = files::export::export_file(&hub, &file.drive_id, &mime_type) - .await - .map_err(Error::ExportFile)?; + let mime_type = export_ext.get_export_mime().unwrap(); + let body = files::export::export_file(hub, child.id.as_deref().unwrap_or_default(), &mime_type) + .await + .map_err(Error::ExportFile)?; - println!("Exporting {} '{}'", doc_type, file_path.display()); - save_body_to_file(body, &abs_file_path, None).await?; - } else { - let body = download_file(&hub, &file.drive_id) - .await - .map_err(Error::DownloadFile)?; - - println!("Downloading file '{}'", file_path.display()); - save_body_to_file(body, &abs_file_path, file.md5.clone()).await?; - } + println!("Exporting {} '{}'", doc_type, file_path.display()); + save_body_to_file(body, &abs_file_path, None).await?; + stats.file_count += 1; } } - println!( - "Downloaded {} files in {} directories with a total size of {}", - tree_info.file_count, - tree_info.folder_count, - human_bytes(tree_info.total_file_size as f64) - ); - Ok(()) } @@ -222,6 +252,7 @@ pub enum Error { GetFile(google_drive3::Error), DownloadFile(google_drive3::Error), ExportFile(google_drive3::Error), + ListFiles(files::list::Error), MissingFileName, FileExists(PathBuf), IsDirectory(String), @@ -232,7 +263,6 @@ pub enum Error { RenameFile(io::Error), ReadChunk(hyper::Error), WriteChunk(io::Error), - CreateFileTree(file_tree_drive::Error), DestinationPathDoesNotExist(PathBuf), DestinationPathNotADirectory(PathBuf), CanonicalizeDestinationPath(PathBuf, io::Error), @@ -250,6 +280,7 @@ impl Display for Error { Error::GetFile(err) => write!(f, "Failed getting file: {}", err), Error::DownloadFile(err) => write!(f, "Failed to download file: {}", err), Error::ExportFile(err) => write!(f, "Failed to export file: {}", err), + Error::ListFiles(err) => write!(f, "Failed to list files: {}", err), Error::MissingFileName => write!(f, "File does not have a name"), Error::FileExists(path) => write!( f, @@ -280,7 +311,6 @@ impl Display for Error { Error::RenameFile(err) => write!(f, "Failed to rename file: {}", err), Error::ReadChunk(err) => write!(f, "Failed read from stream: {}", err), Error::WriteChunk(err) => write!(f, "Failed write to file: {}", err), - Error::CreateFileTree(err) => write!(f, "Failed to create file tree: {}", err), Error::DestinationPathDoesNotExist(path) => { write!(f, "Destination path '{}' does not exist", path.display()) } @@ -422,24 +452,6 @@ fn err_if_md5_mismatch(expected: Option, actual: String) -> Result<(), E } } -fn local_file_is_identical(path: &PathBuf, file: &file_tree_drive::File) -> bool { - if path.exists() { - let file_md5 = compute_md5_from_path(path).unwrap_or_else(|err| { - eprintln!( - "Warning: Error while computing md5 of '{}': {}", - path.display(), - err - ); - - String::new() - }); - - file.md5.clone().map(|md5| md5 == file_md5).unwrap_or(false) - } else { - false - } -} - fn compute_md5_from_path(path: &PathBuf) -> Result { let input = File::open(path)?; let reader = BufReader::new(input);