Add doctype and base href to forum post HTML

This commit is contained in:
FliegendeWurst 2021-06-11 15:20:47 +02:00
parent d5cc512a7e
commit b4861a5e3f
3 changed files with 15 additions and 12 deletions

View File

@ -11,7 +11,7 @@ use reqwest_cookie_store::CookieStoreMutex;
use scraper::{ElementRef, Html, Selector}; use scraper::{ElementRef, Html, Selector};
use serde_json::json; use serde_json::json;
use crate::{cli::Opt, queue, ILIAS_URL}; use crate::{cli::Opt, queue, util::wrap_html, ILIAS_URL};
pub mod course; pub mod course;
pub mod exercise; pub mod exercise;
@ -251,8 +251,7 @@ impl ILIAS {
let html = self.get_html(&url.url).await?; let html = self.get_html(&url.url).await?;
let main_text = if let Some(el) = html.select(&IL_CONTENT_CONTAINER).next() { let main_text = if let Some(el) = html.select(&IL_CONTENT_CONTAINER).next() {
// specify a base URL for relative links Some(wrap_html(&el.inner_html()))
Some(format!(r#"<base href="{}">{}"#, ILIAS_URL, el.inner_html()))
} else { } else {
None None
}; };

View File

@ -8,7 +8,7 @@ use scraper::Selector;
use crate::{ use crate::{
handle_gracefully, process_gracefully, handle_gracefully, process_gracefully,
queue::spawn, queue::spawn,
util::{file_escape, write_file_data}, util::{file_escape, wrap_html, write_file_data},
}; };
use super::{Object, ILIAS, URL}; use super::{Object, ILIAS, URL};
@ -61,7 +61,7 @@ pub async fn download(path: &Path, relative_path: &Path, ilias: Arc<ILIAS>, url:
let link = container.select(&LINKS).next().context("post link not found")?; let link = container.select(&LINKS).next().context("post link not found")?;
let id = link.value().attr("id").context("no id in thread link")?.to_owned(); let id = link.value().attr("id").context("no id in thread link")?.to_owned();
let name = format!("{}_{}_{}.html", id, author, title.trim()); let name = format!("{}_{}_{}.html", id, author, title.trim());
let data = container.inner_html(); let data = wrap_html(&container.inner_html());
let path = path.join(file_escape(&name)); let path = path.join(file_escape(&name));
let relative_path = relative_path.join(file_escape(&name)); let relative_path = relative_path.join(file_escape(&name));
spawn(handle_gracefully(async move { spawn(handle_gracefully(async move {
@ -79,16 +79,15 @@ pub async fn download(path: &Path, relative_path: &Path, ilias: Arc<ILIAS>, url:
} }
if let Some(container) = container.select(&POST_ATTACHMENTS).next() { if let Some(container) = container.select(&POST_ATTACHMENTS).next() {
for attachment in container.select(&LINKS) { for attachment in container.select(&LINKS) {
let href = attachment.value().attr("href").map(|x| x.to_owned()) let href = attachment
.value()
.attr("href")
.map(|x| x.to_owned())
.context("attachment link without href")?; .context("attachment link without href")?;
if href.contains("cmd=deliverZipFile") { if href.contains("cmd=deliverZipFile") {
continue; // skip downloading all attachments as zip continue; // skip downloading all attachments as zip
} }
attachments.push(( attachments.push((id.clone(), attachment.text().collect::<String>(), href));
id.clone(),
attachment.text().collect::<String>(),
href,
));
} }
} }
} }

View File

@ -10,7 +10,12 @@ use tokio_util::io::StreamReader;
use std::io; use std::io;
use std::path::Path; use std::path::Path;
use crate::Result; use crate::{Result, ILIAS_URL};
/// Prepends a doctype and a base URL to the HTML fragment.
pub fn wrap_html(html_fragment: &str) -> String {
format!(r#"<!DOCTYPE html>\n<base href="{}">{}"#, ILIAS_URL, html_fragment)
}
pub async fn write_stream_to_file( pub async fn write_stream_to_file(
path: &Path, path: &Path,