Add doctype and base href to forum post HTML

This commit is contained in:
FliegendeWurst 2021-06-11 15:20:47 +02:00
parent d5cc512a7e
commit b4861a5e3f
3 changed files with 15 additions and 12 deletions

View File

@ -11,7 +11,7 @@ use reqwest_cookie_store::CookieStoreMutex;
use scraper::{ElementRef, Html, Selector};
use serde_json::json;
use crate::{cli::Opt, queue, ILIAS_URL};
use crate::{cli::Opt, queue, util::wrap_html, ILIAS_URL};
pub mod course;
pub mod exercise;
@ -251,8 +251,7 @@ impl ILIAS {
let html = self.get_html(&url.url).await?;
let main_text = if let Some(el) = html.select(&IL_CONTENT_CONTAINER).next() {
// specify a base URL for relative links
Some(format!(r#"<base href="{}">{}"#, ILIAS_URL, el.inner_html()))
Some(wrap_html(&el.inner_html()))
} else {
None
};

View File

@ -8,7 +8,7 @@ use scraper::Selector;
use crate::{
handle_gracefully, process_gracefully,
queue::spawn,
util::{file_escape, write_file_data},
util::{file_escape, wrap_html, write_file_data},
};
use super::{Object, ILIAS, URL};
@ -61,7 +61,7 @@ pub async fn download(path: &Path, relative_path: &Path, ilias: Arc<ILIAS>, url:
let link = container.select(&LINKS).next().context("post link not found")?;
let id = link.value().attr("id").context("no id in thread link")?.to_owned();
let name = format!("{}_{}_{}.html", id, author, title.trim());
let data = container.inner_html();
let data = wrap_html(&container.inner_html());
let path = path.join(file_escape(&name));
let relative_path = relative_path.join(file_escape(&name));
spawn(handle_gracefully(async move {
@ -79,16 +79,15 @@ pub async fn download(path: &Path, relative_path: &Path, ilias: Arc<ILIAS>, url:
}
if let Some(container) = container.select(&POST_ATTACHMENTS).next() {
for attachment in container.select(&LINKS) {
let href = attachment.value().attr("href").map(|x| x.to_owned())
let href = attachment
.value()
.attr("href")
.map(|x| x.to_owned())
.context("attachment link without href")?;
if href.contains("cmd=deliverZipFile") {
continue; // skip downloading all attachments as zip
}
attachments.push((
id.clone(),
attachment.text().collect::<String>(),
href,
));
attachments.push((id.clone(), attachment.text().collect::<String>(), href));
}
}
}

View File

@ -10,7 +10,12 @@ use tokio_util::io::StreamReader;
use std::io;
use std::path::Path;
use crate::Result;
use crate::{Result, ILIAS_URL};
/// Prepends a doctype and a base URL to the HTML fragment.
pub fn wrap_html(html_fragment: &str) -> String {
format!(r#"<!DOCTYPE html>\n<base href="{}">{}"#, ILIAS_URL, html_fragment)
}
pub async fn write_stream_to_file(
path: &Path,