diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..c50f366 --- /dev/null +++ b/flake.lock @@ -0,0 +1,132 @@ +{ + "nodes": { + "crane": { + "inputs": { + "flake-compat": [ + "flake-compat" + ], + "flake-utils": [ + "flake-utils" + ], + "nixpkgs": [ + "nixpkgs" + ], + "rust-overlay": [ + "rust-overlay" + ] + }, + "locked": { + "lastModified": 1688772518, + "narHash": "sha256-ol7gZxwvgLnxNSZwFTDJJ49xVY5teaSvF7lzlo3YQfM=", + "owner": "ipetkov", + "repo": "crane", + "rev": "8b08e96c9af8c6e3a2b69af5a7fa168750fcf88e", + "type": "github" + }, + "original": { + "owner": "ipetkov", + "repo": "crane", + "type": "github" + } + }, + "flake-compat": { + "flake": false, + "locked": { + "lastModified": 1673956053, + "narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1687709756, + "narHash": "sha256-Y5wKlQSkgEK2weWdOu4J3riRd+kV/VCgHsqLNTTWQ/0=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "dbabf0ca0c0c4bce6ea5eaf65af5cb694d2082c7", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1688679045, + "narHash": "sha256-t3xGEfYIwhaLTPU8FLtN/pLPytNeDwbLI6a7XFFBlGo=", + "owner": "nixos", + "repo": "nixpkgs", + "rev": "3c7487575d9445185249a159046cc02ff364bff8", + "type": "github" + }, + "original": { + "owner": "nixos", + "ref": "nixos-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "crane": "crane", + "flake-compat": "flake-compat", + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs", + "rust-overlay": "rust-overlay" + } + }, + "rust-overlay": { + "inputs": { + "flake-utils": [ + "flake-utils" + ], + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1688870171, + "narHash": "sha256-8tD8fheWPa7TaJoxzcU3iHkCrQQpOpdMN+HYqgZ1N5A=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "5a932f10ac4bd59047d6e8b5780750ec76ea988a", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..141ee8f --- /dev/null +++ b/flake.nix @@ -0,0 +1,86 @@ +{ + description = "Download content from ilias.studium.kit.edu"; + + inputs = { + nixpkgs.url = "github:nixos/nixpkgs/nixos-unstable"; + + crane = { + url = "github:ipetkov/crane"; + inputs.nixpkgs.follows = "nixpkgs"; + inputs.flake-utils.follows = "flake-utils"; + inputs.flake-compat.follows = "flake-compat"; + inputs.rust-overlay.follows = "rust-overlay"; + }; + + # Import them even though we don't use them. Needed to allow overriding `rust-overlay` + # etc. in flakes consuming this flake. + # Temporary until https://github.com/NixOS/nix/issues/6986 is solved. + rust-overlay = { + url = "github:oxalica/rust-overlay"; + inputs.nixpkgs.follows = "nixpkgs"; + inputs.flake-utils.follows = "flake-utils"; + }; + flake-utils.url = "github:numtide/flake-utils"; + flake-compat = { + url = "github:edolstra/flake-compat"; + flake = false; + }; + }; + + outputs = { self, nixpkgs, crane, ... }: let + systems = [ "x86_64-linux" ]; + inherit (nixpkgs) lib; + forEachSystem = lib.genAttrs systems; + craneLib = forEachSystem (system: crane.lib.${system}); + + toHydraJob = with lib; foldlAttrs + (jobset: system: attrs: recursiveUpdate jobset + (mapAttrs (const (drv: { ${system} = drv; })) + (filterAttrs (name: const (name != "default")) attrs))) + { }; + + builds = forEachSystem (system: (lib.fix (final: { + common = { + pname = "KIT-ILIAS-Downloader"; + src = craneLib.${system}.cleanCargoSource self; + }; + cargoArtifacts = craneLib.${system}.buildDepsOnly (final.common // { + doCheck = false; + }); + clippy = craneLib.${system}.cargoClippy (final.common // { + inherit (final) cargoArtifacts; + cargoClippyExtraArgs = lib.escapeShellArgs [ + "--all-targets" + "--" + "-D" + "warnings" + "-A" + "non-snake-case" + "-A" + "clippy::upper-case-acronyms" + ]; + }); + format = craneLib.${system}.cargoFmt (final.common // { + inherit (final) cargoArtifacts; + }); + kit-ilias-downloader = craneLib.${system}.buildPackage (final.common // { + inherit (final) cargoArtifacts; + doCheck = false; + meta.license = lib.licenses.gpl3Plus; + meta.platforms = systems; + }); + }))); + in { + packages = forEachSystem (system: { + default = self.packages.${system}.kit-ilias-downloader; + inherit (builds.${system}) kit-ilias-downloader; + }); + checks = forEachSystem (system: { + inherit (builds.${system}) format clippy; + }); + hydraJobs = { + packages = toHydraJob self.packages; + checks = toHydraJob self.checks; + }; + }; +} diff --git a/src/ilias.rs b/src/ilias.rs index 27b972f..c249757 100644 --- a/src/ilias.rs +++ b/src/ilias.rs @@ -5,13 +5,12 @@ use std::{collections::HashMap, error::Error as _, io::Write, sync::Arc}; use anyhow::{anyhow, Context, Result}; use cookie_store::CookieStore; use once_cell::sync::Lazy; -use regex::Regex; use reqwest::{Client, IntoUrl, Proxy, Url}; use reqwest_cookie_store::CookieStoreMutex; use scraper::{ElementRef, Html, Selector}; use serde_json::json; -use crate::{cli::Opt, queue, util::wrap_html, ILIAS_URL, iliasignore::IliasIgnore}; +use crate::{cli::Opt, iliasignore::IliasIgnore, queue, util::wrap_html, ILIAS_URL}; pub mod course; pub mod exercise; @@ -46,12 +45,9 @@ pub struct ILIAS { fn error_is_http2(error: &reqwest::Error) -> bool { error .source() // hyper::Error - .map(|x| x.source()) // h2::Error - .flatten() - .map(|x| x.downcast_ref::()) - .flatten() - .map(|x| x.reason()) - .flatten() + .and_then(|x| x.source()) // h2::Error + .and_then(|x| x.downcast_ref::()) + .and_then(|x| x.reason()) .map(|x| x == h2::Reason::NO_ERROR) .unwrap_or(false) } @@ -222,8 +218,8 @@ impl ILIAS { } unreachable!() } - - pub async fn is_error_response(html: &Html) { + + pub fn is_error_response(html: &Html) -> bool { html.select(&ALERT_DANGER).next().is_some() } @@ -286,7 +282,13 @@ impl ILIAS { } else { None }; - Ok((ILIAS::get_items(&html), main_text, html.select(&LINKS).flat_map(|x| x.value().attr("href").map(|x| x.to_owned())).collect())) + Ok(( + ILIAS::get_items(&html), + main_text, + html.select(&LINKS) + .flat_map(|x| x.value().attr("href").map(|x| x.to_owned())) + .collect(), + )) } pub async fn get_course_content_tree(&self, ref_id: &str, cmd_node: &str) -> Result> { @@ -344,8 +346,8 @@ impl Object { | Presentation { name, .. } | ExerciseHandler { name, .. } | PluginDispatch { name, .. } - | Generic { name, .. } => &name, - Thread { url } => &url.thr_pk.as_ref().unwrap(), + | Generic { name, .. } => name, + Thread { url } => url.thr_pk.as_ref().unwrap(), Video { url } => &url.url, Dashboard { url } => &url.url, } @@ -366,7 +368,7 @@ impl Object { | ExerciseHandler { url, .. } | PluginDispatch { url, .. } | Video { url } - | Generic { url, .. } => &url, + | Generic { url, .. } => url, } } diff --git a/src/ilias/course.rs b/src/ilias/course.rs index 76c0f9f..e04f792 100644 --- a/src/ilias/course.rs +++ b/src/ilias/course.rs @@ -28,12 +28,12 @@ pub async fn download(path: PathBuf, ilias: Arc, url: &URL, name: &str) - return Ok(()); // ignore groups we are not in } warning!(name, "falling back to incomplete course content extractor!", e); - let (items, main_text, _) = ilias.get_course_content(&url).await?; + let (items, main_text, _) = ilias.get_course_content(url).await?; (items, main_text) }, } } else { - let (items, main_text, _) = ilias.get_course_content(&url).await?; + let (items, main_text, _) = ilias.get_course_content(url).await?; (items, main_text) }; if ilias.opt.save_ilias_pages { diff --git a/src/ilias/file.rs b/src/ilias/file.rs index 1df7b2a..a4e1690 100644 --- a/src/ilias/file.rs +++ b/src/ilias/file.rs @@ -17,6 +17,6 @@ pub async fn download(path: &Path, relative_path: &Path, ilias: Arc, url: } let data = ilias.download(&url.url).await?; log!(0, "Writing {}", relative_path.to_string_lossy()); - write_stream_to_file(&path, data.bytes_stream()).await?; + write_stream_to_file(path, data.bytes_stream()).await?; Ok(()) } diff --git a/src/ilias/folder.rs b/src/ilias/folder.rs index 898ecc5..56ef63b 100644 --- a/src/ilias/folder.rs +++ b/src/ilias/folder.rs @@ -17,7 +17,7 @@ static EXPAND_LINK: Lazy = Lazy::new(|| Regex::new("expand=\\d").unwrap() #[async_recursion] pub async fn download(path: &Path, ilias: Arc, url: &URL) -> Result<()> { - let content = ilias.get_course_content(&url).await?; + let content = ilias.get_course_content(url).await?; // expand all sessions for href in content.2 { diff --git a/src/ilias/video.rs b/src/ilias/video.rs index 2437ddc..27640bd 100644 --- a/src/ilias/video.rs +++ b/src/ilias/video.rs @@ -34,7 +34,7 @@ pub async fn download(path: &Path, relative_path: &Path, ilias: Arc, url: let json = &json_capture.next().context("xoct player json not found")?[1]; log!(2, "{}", json); let json = json.split(",\n").next().context("invalid xoct player json")?; - serde_json::from_str(&json.trim())? + serde_json::from_str(json.trim())? }; log!(2, "{}", json); let streams = json @@ -49,52 +49,50 @@ pub async fn download(path: &Path, relative_path: &Path, ilias: Arc, url: .as_str() .context("video src not string")?; download_to_path(&ilias, path, relative_path, url).await?; + } else if !ilias.opt.combine_videos { + fs::create_dir(path).await.context("failed to create video directory")?; + download_all(path, streams, ilias, relative_path).await?; } else { - if !ilias.opt.combine_videos { - fs::create_dir(path).await.context("failed to create video directory")?; - download_all(path, streams, ilias, relative_path).await?; - } else { - let dir = tempdir()?; - // construct ffmpeg command to combine all files - let mut arguments = vec![]; - for file in download_all(dir.path(), streams, ilias, relative_path).await? { - arguments.push("-i".to_owned()); - arguments.push(file.to_str().context("invalid UTF8")?.into()); - } - arguments.push("-c".into()); - arguments.push("copy".into()); - for i in 0..(arguments.len() / 2) - 1 { - arguments.push("-map".into()); - arguments.push(format!("{}", i)); - } - arguments.push(path.to_str().context("invalid UTF8 in path")?.into()); - let status = Command::new("ffmpeg") - .args(&arguments) - .stderr(Stdio::null()) - .stdout(Stdio::null()) - .spawn() - .context("failed to start ffmpeg")? - .wait() - .await - .context("failed to wait for ffmpeg")?; - if !status.success() { - error!(format!("ffmpeg failed to merge video files into {}", path.display())); - error!(format!("check this directory: {}", dir.into_path().display())); - error!(format!("ffmpeg command: {}", arguments.join(" "))); - } - }; + let dir = tempdir()?; + // construct ffmpeg command to combine all files + let mut arguments = vec![]; + for file in download_all(dir.path(), streams, ilias, relative_path).await? { + arguments.push("-i".to_owned()); + arguments.push(file.to_str().context("invalid UTF8")?.into()); + } + arguments.push("-c".into()); + arguments.push("copy".into()); + for i in 0..(arguments.len() / 2) - 1 { + arguments.push("-map".into()); + arguments.push(format!("{}", i)); + } + arguments.push(path.to_str().context("invalid UTF8 in path")?.into()); + let status = Command::new("ffmpeg") + .args(&arguments) + .stderr(Stdio::null()) + .stdout(Stdio::null()) + .spawn() + .context("failed to start ffmpeg")? + .wait() + .await + .context("failed to wait for ffmpeg")?; + if !status.success() { + error!(format!("ffmpeg failed to merge video files into {}", path.display())); + error!(format!("check this directory: {}", dir.into_path().display())); + error!(format!("ffmpeg command: {}", arguments.join(" "))); + } } Ok(()) } async fn download_all( path: &Path, - streams: &Vec, + streams: &[serde_json::Value], ilias: Arc, relative_path: &Path, ) -> Result> { let mut paths = Vec::new(); - for (i, stream) in streams.into_iter().enumerate() { + for (i, stream) in streams.iter().enumerate() { let url = stream .pointer("/sources/mp4/0/src") .context("video src not found")? @@ -126,9 +124,9 @@ async fn download_to_path(ilias: &ILIAS, path: &Path, relative_path: &Path, url: } } } else { - let resp = ilias.download(&url).await?; + let resp = ilias.download(url).await?; log!(0, "Writing {}", relative_path.to_string_lossy()); - write_stream_to_file(&path, resp.bytes_stream()).await?; + write_stream_to_file(path, resp.bytes_stream()).await?; } Ok(()) } diff --git a/src/ilias/weblink.rs b/src/ilias/weblink.rs index 72e7763..4ada810 100644 --- a/src/ilias/weblink.rs +++ b/src/ilias/weblink.rs @@ -27,7 +27,7 @@ pub async fn download(path: &Path, relative_path: &Path, ilias: Arc, url: if url.starts_with(ILIAS_URL) { // is a link list if fs::metadata(&path).await.is_err() { - create_dir(&path).await?; + create_dir(path).await?; log!(0, "Writing {}", relative_path.to_string_lossy()); } diff --git a/src/iliasignore.rs b/src/iliasignore.rs index eaf5129..13712c4 100644 --- a/src/iliasignore.rs +++ b/src/iliasignore.rs @@ -1,66 +1,67 @@ -use std::{path::{Path, PathBuf, Component}, ffi::OsString}; +use std::{ + ffi::OsString, + path::{Component, Path, PathBuf}, +}; use anyhow::Result; use ignore::gitignore::Gitignore; #[derive(Clone, Debug)] pub struct IliasIgnore { - ignores: Vec + ignores: Vec, } impl IliasIgnore { - pub fn load(mut path: PathBuf) -> Result { - let mut ignores = Vec::new(); - let mut prefix = Vec::new(); - // example scenario: - // path = /KIT/ILIAS/SS 23/Next Generation Internet - // iliasignore in ILIAS/.iliasignore: prefix = SS 23/Next Generation Internet/ - // iliasignore in Next Generation Internet/.iliasignore: prefix = "" - loop { - let (ignore, error) = Gitignore::new(path.join(".iliasignore")); - if let Some(err) = error { - warning!(err); - } - if ignore.len() > 0 { - ignores.push(IgnoreFile { - ignore, - prefix: prefix.iter().fold(OsString::new(), |mut acc, el| { - acc.push(el); - acc.push("/"); - acc - }) - }); - } - if let Some(last) = path.components().last() { - match last { - Component::Normal(name) => prefix.insert(0, name.to_owned()), - _ => break - } - } - path.pop(); - } - Ok(IliasIgnore { - ignores - }) - } + pub fn load(mut path: PathBuf) -> Result { + let mut ignores = Vec::new(); + let mut prefix = Vec::new(); + // example scenario: + // path = /KIT/ILIAS/SS 23/Next Generation Internet + // iliasignore in ILIAS/.iliasignore: prefix = SS 23/Next Generation Internet/ + // iliasignore in Next Generation Internet/.iliasignore: prefix = "" + loop { + let (ignore, error) = Gitignore::new(path.join(".iliasignore")); + if let Some(err) = error { + warning!(err); + } + if !ignore.is_empty() { + ignores.push(IgnoreFile { + ignore, + prefix: prefix.iter().fold(OsString::new(), |mut acc, el| { + acc.push(el); + acc.push("/"); + acc + }), + }); + } + if let Some(last) = path.components().last() { + match last { + Component::Normal(name) => prefix.insert(0, name.to_owned()), + _ => break, + } + } + path.pop(); + } + Ok(IliasIgnore { ignores }) + } - pub fn should_ignore(&self, path: &Path, is_dir: bool) -> bool { - for ignore_file in &self.ignores { - let mut full_path = ignore_file.prefix.clone(); - full_path.push(path.as_os_str()); - let matched = ignore_file.ignore.matched(&full_path, is_dir); - if matched.is_whitelist() { - return false; - } else if matched.is_ignore() { - return true; - } - } - false - } + pub fn should_ignore(&self, path: &Path, is_dir: bool) -> bool { + for ignore_file in &self.ignores { + let mut full_path = ignore_file.prefix.clone(); + full_path.push(path.as_os_str()); + let matched = ignore_file.ignore.matched(&full_path, is_dir); + if matched.is_whitelist() { + return false; + } else if matched.is_ignore() { + return true; + } + } + false + } } #[derive(Clone, Debug)] struct IgnoreFile { - ignore: Gitignore, - prefix: OsString -} \ No newline at end of file + ignore: Gitignore, + prefix: OsString, +} diff --git a/src/main.rs b/src/main.rs index ba5a236..3bd8b0d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,7 +3,6 @@ use anyhow::{anyhow, Context, Result}; use futures::future::{self, Either}; use futures::StreamExt; -use ignore::gitignore::Gitignore; use indicatif::{ProgressDrawTarget, ProgressStyle}; use structopt::StructOpt; use tokio::fs;