Initial commit

This commit is contained in:
FliegendeWurst 2020-04-21 22:41:28 +02:00
commit 1529a678e0
No known key found for this signature in database
GPG Key ID: CA38E82B54B32A88
6 changed files with 2354 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
target

1759
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

20
Cargo.toml Normal file
View File

@ -0,0 +1,20 @@
[package]
name = "KIT-ILIAS-downloader"
version = "0.1.0"
authors = ["FliegendeWurst <2012gdwu@web.de>"]
license = "GPL-2.0"
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
reqwest = { version = "0.10.4", features = ["cookies", "json", "stream"] }
error-chain = "0.12.2"
tokio = { version = "0.2", features = ["full"] }
serde_json = "1.0.51"
scraper = "0.11.0"
url = "2.1.1"
futures-util = "0.3.4"
regex = "1.3.7"
lazy_static = "1.4.0"
parking_lot = "0.10.2"

26
README.md Normal file
View File

@ -0,0 +1,26 @@
# KIT-ILIAS-downloader
Download content from ILIAS. That includes:
* files
* Opencast lectures
## Installation
Go to the [releases](releases) and get the executable for your operating system. Alternatively compile from source:
```sh
$ git clone https://github.com/FliegendeWurst/KIT-ILIAS-downloader
...
$ cd KIT-ILIAS-downloader
$ cargo build --release
...
$ cp target/release/KIT-ILIAS-downloader [directory in $PATH]
```
## Usage
TBD
## Credits
Inspired by https://github.com/brantsch/kit-ilias-fuse.

64
src/errors.rs Normal file
View File

@ -0,0 +1,64 @@
use error_chain::error_chain;
use super::*;
error_chain! {
// The type defined for this error. These are the conventional
// and recommended names, but they can be arbitrarily chosen.
//
// It is also possible to leave this section out entirely, or
// leave it empty, and these names will be used automatically.
types {
Error, ErrorKind, ResultExt, Result;
}
// Without the `Result` wrapper:
//
// types {
// Error, ErrorKind, ResultExt;
// }
// Automatic conversions between this error chain and other
// error chains. In this case, it will e.g. generate an
// `ErrorKind` variant called `Another` which in turn contains
// the `other_error::ErrorKind`, with conversions from
// `other_error::Error`.
//
// Optionally, some attributes can be added to a variant.
//
// This section can be empty.
links {
// Another(other_error::Error, other_error::ErrorKind) #[cfg(unix)];
}
// Automatic conversions between this error chain and other
// error types not defined by the `error_chain!`. These will be
// wrapped in a new error with, in the first case, the
// `ErrorKind::Fmt` variant. The description and cause will
// forward to the description and cause of the original error.
//
// Optionally, some attributes can be added to a variant.
//
// This section can be empty.
foreign_links {
//ALSA(alsa::Error);
//Channel(crossbeam_channel::SendError); // TODO: requires type argument
//Discord(serenity::Error);
Io(std::io::Error);
Reqwest(reqwest::Error);
//Pulse(pulse::error::PAErr);
}
// Define additional `ErrorKind` variants. Define custom responses with the
// `description` and `display` calls.
errors {
UnsupportedChannel(x: String) {
description("unsupported channel kind")
display("unsupported channel kind {:?}", x)
}
}
// If this annotation is left off, a variant `Msg(s: String)` will be added, and `From`
// impls will be provided for `String` and `&str`
//skip_msg_variant
}

484
src/main.rs Normal file
View File

@ -0,0 +1,484 @@
use futures_util::stream::TryStreamExt;
use lazy_static::lazy_static;
use parking_lot::Mutex;
use regex::Regex;
use reqwest::Client;
use scraper::{ElementRef, Html, Selector};
use serde_json::json;
use tokio::fs::File as AsyncFile;
use tokio::io::{stream_reader, BufWriter};
use tokio::task;
use url::Url;
use std::collections::VecDeque;
use std::default::Default;
use std::fs;
use std::io;
use std::panic;
use std::path::PathBuf;
use std::sync::Arc;
mod errors;
use errors::*;
const ILIAS_URL: &'static str = "https://ilias.studium.kit.edu/";
struct ILIAS {
user: String,
pass: String,
path_prefix: PathBuf,
client: Client
}
#[derive(Debug)]
struct Dashboard {
items: Vec<Object>
}
#[derive(Debug)]
enum Object {
Course {
name: String,
url: URL
},
Folder {
name: String,
url: URL
},
File {
name: String,
url: URL
},
Forum {
name: String,
url: URL
},
Wiki {
name: String,
url: URL
},
ExerciseHandler {
name: String,
url: URL
},
PluginDispatch {
name: String,
url: URL
},
Video {
url: String,
},
Generic {
name: String,
url: URL
},
}
use Object::*;
impl Object {
fn name(&self) -> &str {
match self {
Course { name, .. } => &name,
Folder { name, .. } => &name,
File { name, .. } => &name,
Forum { name, .. } => &name,
Wiki { name, .. } => &name,
ExerciseHandler { name, .. } => &name,
PluginDispatch { name, .. } => &name,
Video { url } => &url,
Generic { name, .. } => &name,
}
}
fn kind(&self) -> &str {
match self {
Course { .. } => "course",
Folder { .. } => "folder",
File { .. } => "file",
Forum { .. } => "forum",
Wiki { .. } => "wiki",
ExerciseHandler { .. } => "exercise handler",
PluginDispatch { .. } => "plugin dispatch",
Video { .. } => "video",
Generic { .. } => "generic",
}
}
fn from_link(item: ElementRef, link: ElementRef) -> Self {
let mut name = link.text().collect::<String>().replace('/', "-");
let url = URL::from_href(link.value().attr("href").unwrap());
if url.url.starts_with("https://ilias.studium.kit.edu/goto.php") {
let item_prop = Selector::parse("span.il_ItemProperty").unwrap();
let mut item_props = item.select(&item_prop);
let ext = item_props.next().unwrap();
let version = item_props.nth(1).unwrap().text().collect::<String>();
let version = version.trim();
if version.starts_with("Version: ") {
name.push_str("_v");
name.push_str(&version[9..]);
}
return File { name: format!("{}.{}", name, ext.text().collect::<String>().trim()), url };
}
if url.cmd.as_ref().map(|x| &**x) == Some("showThreads") {
return Forum { name, url };
}
match &*url.baseClass {
"ilExerciseHandlerGUI" => ExerciseHandler { name, url },
"ililWikiHandlerGUI" => Wiki { name, url },
"ilrepositorygui" => match url.cmd.as_deref() {
Some("view") => Folder { name, url },
Some(_) => Generic { name, url },
None => Course { name, url },
},
"ilObjPluginDispatchGUI" => PluginDispatch { name, url },
_ => Generic { name, url }
}
}
}
#[allow(non_snake_case)]
#[derive(Debug)]
struct URL {
url: String,
baseClass: String,
cmdClass: Option<String>,
cmdNode: Option<String>,
cmd: Option<String>,
forwardCmd: Option<String>,
ref_id: String,
}
#[allow(non_snake_case)]
impl URL {
fn from_href(href: &str) -> Self {
let url = Url::parse(&format!("http://domain/{}", href)).unwrap();
let mut baseClass = String::new();
let mut cmdClass = None;
let mut cmdNode = None;
let mut cmd = None;
let mut forwardCmd = None;
let mut ref_id = String::new();
for (k, v) in url.query_pairs() {
match &*k {
"baseClass" => baseClass = v.into_owned(),
"cmdClass" => cmdClass = Some(v.into_owned()),
"cmdNode" => cmdNode = Some(v.into_owned()),
"cmd" => cmd = Some(v.into_owned()),
"forwardCmd" => forwardCmd = Some(v.into_owned()),
"ref_id" => ref_id = v.into_owned(),
_ => {}
}
}
URL {
url: href.to_owned(),
baseClass,
cmdClass,
cmdNode,
cmd,
forwardCmd,
ref_id
}
}
}
impl ILIAS {
async fn login<S1, S2>(user: S1, pass: S1) -> Result<Self> where S1: Into<String>, S2: Into<String> {
let user = user.into();
let pass = pass.into();
let client = Client::builder()
.cookie_store(true)
.user_agent("KIT-ILIAS-fuse/0.0.1-dev")
.build()?;
println!("Logging into Shibboleth..");
let session_establishment = client
.post("https://ilias.studium.kit.edu/Shibboleth.sso/Login")
.form(&json!({
"sendLogin": "1",
"idp_selection": "https://idp.scc.kit.edu/idp/shibboleth",
"target": "https://ilias.studium.kit.edu/shib_login.php?target=",
"home_organization_selection": "Mit KIT-Account anmelden"
}))
.send().await?;
println!("Logging into identity provider..");
let login_response = client
.post(session_establishment.url().clone())
.form(&json!({
"j_username": &user,
"j_password": &pass,
"_eventId_proceed": ""
}))
.send().await?.text().await?;
let dom = Html::parse_document(&login_response);
/* TODO: OTP
login_soup = BeautifulSoup(login_response.text, 'lxml')
otp_inp = login_soup.find("input", attrs={"name": "j_tokenNumber"})
if otp_inp:
print("OTP Detected.")
otp = input("OTP token: ")
otp_url = otp_inp.parent.parent.parent['action']
otp_response = self.post('https://idp.scc.kit.edu'+otp_url, data={'j_tokenNumber':otp, "_eventId_proceed": ""})
login_soup = BeautifulSoup(otp_response.text, 'lxml')
*/
let saml = Selector::parse(r#"input[name="SAMLResponse"]"#).unwrap();
let saml = dom.select(&saml).next().expect("no SAML response");
let relay_state = Selector::parse(r#"input[name="RelayState"]"#).unwrap();
let relay_state = dom.select(&relay_state).next().expect("no relay state");
println!("Logging into ILIAS..");
client
.post("https://ilias.studium.kit.edu/Shibboleth.sso/SAML2/POST")
.form(&json!({
"SAMLResponse": saml.value().attr("value").unwrap(),
"RelayState": relay_state.value().attr("value").unwrap()
}))
.send().await?;
println!("Logged in!");
let path_prefix = PathBuf::from(env!("ILIAS_DIR"));
Ok(ILIAS {
client, user, pass, path_prefix
})
}
async fn personal_desktop(&mut self) -> Result<Dashboard> {
let html = self.get_html("https://ilias.studium.kit.edu/ilias.php?baseClass=ilPersonalDesktopGUI&cmd=jumpToSelectedItems").await?;
let items = ILIAS::get_items(&html);
Ok(Dashboard {
items
})
}
fn get_items(html: &Html) -> Vec<Object> {
let container_items = Selector::parse("div.il_ContainerListItem").unwrap();
let container_item_title = Selector::parse("a.il_ContainerItemTitle").unwrap();
html.select(&container_items).map(|item| {
let link = item.select(&container_item_title).next().unwrap();
Object::from_link(item, link)
}).collect()
}
async fn get_html(&self, url: &str) -> Result<Html> {
let text = self.client.get(url).send().await?.text().await?;
Ok(Html::parse_document(&text))
}
async fn get_course_content(&self, url: &URL) -> Result<Vec<Object>> {
let html = self.get_html(&format!("{}{}", ILIAS_URL, url.url)).await?;
Ok(ILIAS::get_items(&html))
}
async fn download(&self, url: &str) -> Result<reqwest::Response> {
//let url = format!("{}{}", ILIAS_URL, url.url);
if VERBOSITY > 0 {
println!("Downloading {}", url);
}
Ok(self.client.get(url).send().await?)
}
}
const DOWNLOAD_FILES: bool = true;
const DOWNLOAD_VIDEOS: bool = true;
const SKIP_EXISTING: bool = true;
const VERBOSITY: usize = 1;
#[tokio::main]
async fn main() {
*PANIC_HOOK.lock() = panic::take_hook();
panic::set_hook(Box::new(|info| {
*TASKS_RUNNING.lock() -= 1;
PANIC_HOOK.lock()(info);
}));
// TODO: config at runtime..
// it's literally in the executable currently
let mut ilias = match ILIAS::login::<&str, &str>(env!("ILIAS_USER"), env!("ILIAS_PASS")).await {
Ok(ilias) => ilias,
Err(e) => panic!("error: {:?}", e)
};
let desktop = ilias.personal_desktop().await.unwrap();
let mut queue = VecDeque::new();
for item in desktop.items {
let mut path = ilias.path_prefix.clone();
path.push(item.name());
queue.push_back((path, item));
}
let ilias = Arc::new(ilias);
while let Some((path, obj)) = queue.pop_front() {
let ilias = Arc::clone(&ilias);
task::spawn(async {
*TASKS_RUNNING.lock() += 1;
process(ilias, path, obj).await;
*TASKS_RUNNING.lock() -= 1;
});
}
while *TASKS_RUNNING.lock() > 0 {
tokio::time::delay_for(std::time::Duration::from_millis(500)).await;
}
}
lazy_static!{
static ref TASKS_RUNNING: Mutex<usize> = Mutex::default();
static ref PANIC_HOOK: Mutex<Box<dyn Fn(&panic::PanicInfo) + Sync + Send + 'static>> = Mutex::new(Box::new(|_| {}));
}
// see https://github.com/rust-lang/rust/issues/53690#issuecomment-418911229
//async fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) {
fn process(ilias: Arc<ILIAS>, path: PathBuf, obj: Object) -> impl std::future::Future<Output = ()> + Send { async move {
if VERBOSITY > 0 {
println!("Syncing {} {}..", obj.kind(), path.strip_prefix(&ilias.path_prefix).unwrap().to_string_lossy());
}
match &obj {
Course { name, url } => {
if let Err(e) = fs::create_dir(&path) {
if e.kind() != io::ErrorKind::AlreadyExists {
println!("error: {:?}", e);
}
}
let content = ilias.get_course_content(&url).await.unwrap();
for item in content {
let mut path = path.clone();
path.push(item.name());
let ilias = Arc::clone(&ilias);
task::spawn(async {
*TASKS_RUNNING.lock() += 1;
process(ilias, path, item).await;
*TASKS_RUNNING.lock() -= 1;
});
}
},
Folder { name, url } => {
if let Err(e) = fs::create_dir(&path) {
if e.kind() != io::ErrorKind::AlreadyExists {
println!("error: {:?}", e);
}
}
let content = ilias.get_course_content(&url).await.unwrap();
for item in content {
let mut path = path.clone();
path.push(item.name());
let ilias = Arc::clone(&ilias);
task::spawn(async {
*TASKS_RUNNING.lock() += 1;
process(ilias, path, item).await;
*TASKS_RUNNING.lock() -= 1;
});
}
},
File { name, url } => {
if !DOWNLOAD_FILES {
return;
}
if SKIP_EXISTING && fs::metadata(&path).is_ok() {
if VERBOSITY > 1 {
println!("Skipping download, file exists already");
}
return;
}
let data = ilias.download(&url.url).await;
match data {
Ok(resp) => {
let mut reader = stream_reader(resp.bytes_stream().map_err(|x| {
io::Error::new(io::ErrorKind::Other, x)
}));
let file = AsyncFile::create(&path).await.unwrap();
let mut file = BufWriter::new(file);
tokio::io::copy(&mut reader, &mut file).await.unwrap();
},
Err(e) => println!("error: {:?}", e)
}
},
PluginDispatch { name, url } => {
if !DOWNLOAD_VIDEOS {
return;
}
if let Err(e) = fs::create_dir(&path) {
if e.kind() != io::ErrorKind::AlreadyExists {
println!("error: {:?}", e);
}
}
let list_url = format!("{}ilias.php?ref_id={}&cmdClass=xocteventgui&cmdNode=n7:mz:14p&baseClass=ilObjPluginDispatchGUI&lang=de&limit=20&cmd=asyncGetTableGUI&cmdMode=asynch", ILIAS_URL, url.ref_id);
let data = ilias.download(&list_url);
let html = data.await.unwrap().text().await.unwrap();
let html = Html::parse_fragment(&html);
let tr = Selector::parse("tr").unwrap();
let td = Selector::parse("td").unwrap();
let a = Selector::parse(r#"a[target="_blank"]"#).unwrap();
for row in html.select(&tr) {
let link = row.select(&a).next();
if link.is_none() {
continue;
}
let link = link.unwrap();
let mut cells = row.select(&td);
if let Some(title) = cells.nth(2) {
let title = title.inner_html();
let title = title.trim();
if title.starts_with("<div") {
continue;
}
let mut path = path.clone();
path.push(format!("{}.mp4", title));
if VERBOSITY > 0 {
println!("Found video: {}", title);
}
let video = Video {
url: link.value().attr("href").unwrap().to_owned()
};
let ilias = Arc::clone(&ilias);
task::spawn(async {
*TASKS_RUNNING.lock() += 1;
process(ilias, path, video).await;
*TASKS_RUNNING.lock() -= 1;
});
}
}
},
Video { url } => {
lazy_static!{
static ref XOCT_REGEX: Regex = Regex::new(r#"(?m)<script>\s+xoctPaellaPlayer\.init\(([\s\S]+)\)\s+</script>"#).unwrap();
}
if !DOWNLOAD_VIDEOS {
return;
}
if SKIP_EXISTING && fs::metadata(&path).is_ok() {
if VERBOSITY > 1 {
println!("Skipping download, file exists already");
}
return;
}
let url = format!("{}{}", ILIAS_URL, url);
let data = ilias.download(&url);
let html = data.await.unwrap().text().await.unwrap();
//println!("{}", html);
let json: serde_json::Value = {
let mut json_capture = XOCT_REGEX.captures_iter(&html);
let json = &json_capture.next().unwrap()[1];
//println!("{}", json);
let json = json.split(",\n").nth(0).unwrap();
serde_json::from_str(&json.trim()).unwrap()
};
//println!("{}", json);
let url = json["streams"][0]["sources"]["mp4"][0]["src"].as_str().unwrap();
let resp = ilias.download(&url).await.unwrap();
let mut reader = stream_reader(resp.bytes_stream().map_err(|x| {
io::Error::new(io::ErrorKind::Other, x)
}));
if VERBOSITY > 0 {
println!("Saving video to {:?}", path);
}
let file = AsyncFile::create(&path).await.unwrap();
let mut file = BufWriter::new(file);
tokio::io::copy(&mut reader, &mut file).await.unwrap();
},
o => {
if VERBOSITY > 0 {
println!("ignoring {:#?}", o)
}
}
}
}}