mirror of
https://github.com/FliegendeWurst/KIT-ILIAS-downloader.git
synced 2024-08-28 04:04:18 +00:00
Add doctype and base href to forum post HTML
This commit is contained in:
parent
d5cc512a7e
commit
b4861a5e3f
@ -11,7 +11,7 @@ use reqwest_cookie_store::CookieStoreMutex;
|
|||||||
use scraper::{ElementRef, Html, Selector};
|
use scraper::{ElementRef, Html, Selector};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
|
|
||||||
use crate::{cli::Opt, queue, ILIAS_URL};
|
use crate::{cli::Opt, queue, util::wrap_html, ILIAS_URL};
|
||||||
|
|
||||||
pub mod course;
|
pub mod course;
|
||||||
pub mod exercise;
|
pub mod exercise;
|
||||||
@ -251,8 +251,7 @@ impl ILIAS {
|
|||||||
let html = self.get_html(&url.url).await?;
|
let html = self.get_html(&url.url).await?;
|
||||||
|
|
||||||
let main_text = if let Some(el) = html.select(&IL_CONTENT_CONTAINER).next() {
|
let main_text = if let Some(el) = html.select(&IL_CONTENT_CONTAINER).next() {
|
||||||
// specify a base URL for relative links
|
Some(wrap_html(&el.inner_html()))
|
||||||
Some(format!(r#"<base href="{}">{}"#, ILIAS_URL, el.inner_html()))
|
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
@ -8,7 +8,7 @@ use scraper::Selector;
|
|||||||
use crate::{
|
use crate::{
|
||||||
handle_gracefully, process_gracefully,
|
handle_gracefully, process_gracefully,
|
||||||
queue::spawn,
|
queue::spawn,
|
||||||
util::{file_escape, write_file_data},
|
util::{file_escape, wrap_html, write_file_data},
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::{Object, ILIAS, URL};
|
use super::{Object, ILIAS, URL};
|
||||||
@ -61,7 +61,7 @@ pub async fn download(path: &Path, relative_path: &Path, ilias: Arc<ILIAS>, url:
|
|||||||
let link = container.select(&LINKS).next().context("post link not found")?;
|
let link = container.select(&LINKS).next().context("post link not found")?;
|
||||||
let id = link.value().attr("id").context("no id in thread link")?.to_owned();
|
let id = link.value().attr("id").context("no id in thread link")?.to_owned();
|
||||||
let name = format!("{}_{}_{}.html", id, author, title.trim());
|
let name = format!("{}_{}_{}.html", id, author, title.trim());
|
||||||
let data = container.inner_html();
|
let data = wrap_html(&container.inner_html());
|
||||||
let path = path.join(file_escape(&name));
|
let path = path.join(file_escape(&name));
|
||||||
let relative_path = relative_path.join(file_escape(&name));
|
let relative_path = relative_path.join(file_escape(&name));
|
||||||
spawn(handle_gracefully(async move {
|
spawn(handle_gracefully(async move {
|
||||||
@ -79,16 +79,15 @@ pub async fn download(path: &Path, relative_path: &Path, ilias: Arc<ILIAS>, url:
|
|||||||
}
|
}
|
||||||
if let Some(container) = container.select(&POST_ATTACHMENTS).next() {
|
if let Some(container) = container.select(&POST_ATTACHMENTS).next() {
|
||||||
for attachment in container.select(&LINKS) {
|
for attachment in container.select(&LINKS) {
|
||||||
let href = attachment.value().attr("href").map(|x| x.to_owned())
|
let href = attachment
|
||||||
|
.value()
|
||||||
|
.attr("href")
|
||||||
|
.map(|x| x.to_owned())
|
||||||
.context("attachment link without href")?;
|
.context("attachment link without href")?;
|
||||||
if href.contains("cmd=deliverZipFile") {
|
if href.contains("cmd=deliverZipFile") {
|
||||||
continue; // skip downloading all attachments as zip
|
continue; // skip downloading all attachments as zip
|
||||||
}
|
}
|
||||||
attachments.push((
|
attachments.push((id.clone(), attachment.text().collect::<String>(), href));
|
||||||
id.clone(),
|
|
||||||
attachment.text().collect::<String>(),
|
|
||||||
href,
|
|
||||||
));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -10,7 +10,12 @@ use tokio_util::io::StreamReader;
|
|||||||
use std::io;
|
use std::io;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::Result;
|
use crate::{Result, ILIAS_URL};
|
||||||
|
|
||||||
|
/// Prepends a doctype and a base URL to the HTML fragment.
|
||||||
|
pub fn wrap_html(html_fragment: &str) -> String {
|
||||||
|
format!(r#"<!DOCTYPE html>\n<base href="{}">{}"#, ILIAS_URL, html_fragment)
|
||||||
|
}
|
||||||
|
|
||||||
pub async fn write_stream_to_file(
|
pub async fn write_stream_to_file(
|
||||||
path: &Path,
|
path: &Path,
|
||||||
|
Loading…
Reference in New Issue
Block a user