better documentation

This commit is contained in:
phiresky 2019-06-12 22:55:18 +02:00
parent 46fb17bf96
commit e5246491b0
6 changed files with 36 additions and 25 deletions

View File

@ -5,8 +5,6 @@ rga is a tool to recursively search for text in many different types of files. I
[![Linux build status](https://travis-ci.org/phiresky/ripgrep_all.svg)](https://travis-ci.org/BurntSushi/ripgrep) [![Linux build status](https://travis-ci.org/phiresky/ripgrep_all.svg)](https://travis-ci.org/BurntSushi/ripgrep)
[![Crates.io](https://img.shields.io/crates/v/ripgrep_all.svg)](https://crates.io/crates/ripgrep_all) [![Crates.io](https://img.shields.io/crates/v/ripgrep_all.svg)](https://crates.io/crates/ripgrep_all)
similar:
# todo # todo
- jpg adapter (based on object classification / detection (yolo?)) for fun - jpg adapter (based on object classification / detection (yolo?)) for fun

View File

@ -11,7 +11,7 @@ use crate::matching::*;
use crate::preproc::PreprocConfig; use crate::preproc::PreprocConfig;
use failure::*; use failure::*;
use log::*; use log::*;
use regex::{Regex, RegexSet}; use regex::{Regex};
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::io::prelude::*; use std::io::prelude::*;

View File

@ -1,16 +1,16 @@
use super::*; use super::*;
use crate::adapters::spawning::map_exe_error; use crate::adapters::spawning::map_exe_error;
use crate::adapters::spawning::pipe_output;
use crate::preproc::rga_preproc; use crate::preproc::rga_preproc;
use lazy_static::lazy_static; use lazy_static::lazy_static;
use spawning::SpawningFileAdapter;
use std::fs::File; use std::fs::File;
use std::io::BufReader; use std::io::BufReader;
use std::io::Cursor;
use std::io::Take;
use std::path::PathBuf; use std::path::PathBuf;
use std::process::Command; use std::process::Command;
use std::process::Stdio;
static EXTENSIONS: &[&str] = &["pdf"]; static EXTENSIONS: &[&str] = &["pdf"];
@ -46,7 +46,7 @@ impl FileAdapter for PdfPagesAdapter {
let AdaptInfo { let AdaptInfo {
filepath_hint, filepath_hint,
is_real_file, is_real_file,
mut inp, inp: _,
oup, oup,
line_prefix, line_prefix,
archive_recursion_depth, archive_recursion_depth,

View File

@ -32,35 +32,46 @@ set_default!(max_archive_recursion, 4, i32);
#[structopt(rename_all = "kebab-case", set_term_width = 80)] #[structopt(rename_all = "kebab-case", set_term_width = 80)]
pub struct RgaArgs { pub struct RgaArgs {
#[serde(default, skip_serializing_if = "is_default")] #[serde(default, skip_serializing_if = "is_default")]
#[structopt(long = "--rga-no-cache", help = "Disable caching of results")] #[structopt(long = "--rga-no-cache")]
/// Disable caching of results
///
/// By default, rga caches the extracted text to a database in ~/.cache/rga if it is small enough.
/// This way, repeated searches on the same set of files will be much faster.
/// If you pass this flag, all caching will be disabled.
pub no_cache: bool, pub no_cache: bool,
#[serde(default, skip_serializing_if = "is_default")] #[serde(default, skip_serializing_if = "is_default")]
#[structopt( #[structopt(long = "--rga-accurate")]
long = "--rga-accurate", /// Use more accurate but slower matching by mime type
help = "Use more accurate but slower matching by mime type" ///
)] /// By default, rga will match files using file extensions.
/// Some programs, such as sqlite3, don't care about the file extension at all,
/// so users sometimes use any or no extension at all. With this flag, rga
/// will try to detect the mime type of input files using the magic bytes
/// (similar to the `file` utility), and use that to choose the adapter.
pub accurate: bool, pub accurate: bool,
#[serde(default, skip_serializing_if = "is_default")] #[serde(default, skip_serializing_if = "is_default")]
#[structopt( #[structopt(
long = "--rga-adapters", long = "--rga-adapters",
require_equals = true, require_equals = true,
require_delimiter = true, require_delimiter = true
help = "Change which adapters to use and in which priority order (descending)"
)] )]
/// Change which adapters to use and in which priority order (descending)
///
/// "foo,bar" means use only adapters foo and bar.
/// "-bar,baz" means use all default adapters except for bar and baz.
/// "+bar,baz" means use all default adapters and also bar and baz.
pub adapters: Vec<String>, pub adapters: Vec<String>,
#[serde( #[serde(
default = "def_cache_max_blob_len", default = "def_cache_max_blob_len",
skip_serializing_if = "def_cache_max_blob_len_if" skip_serializing_if = "def_cache_max_blob_len_if"
)] )]
#[structopt( #[structopt(long = "--rga-cache-max-blob-len", default_value = "2000000")]
long = "--rga-cache-max-blob-len", /// Max compressed size to cache
default_value = "2000000", ///
help = "Max compressed size to cache", /// Longest byte length (after compression) to store in cache. Longer adapter outputs will not be cached and recomputed every time.
long_help = "Longest byte length (after compression) to store in cache. Longer adapter outputs will not be cached and recomputed every time."
)]
pub cache_max_blob_len: u32, pub cache_max_blob_len: u32,
#[serde( #[serde(
@ -69,10 +80,12 @@ pub struct RgaArgs {
)] )]
#[structopt( #[structopt(
long = "--rga-cache-compression-level", long = "--rga-cache-compression-level",
hidden_short_help = true,
default_value = "12", default_value = "12",
require_equals = true, require_equals = true,
help = "ZSTD compression level to apply to adapter outputs before storing in cache db" help = ""
)] )]
/// ZSTD compression level to apply to adapter outputs before storing in cache db
pub cache_compression_level: u32, pub cache_compression_level: u32,
#[serde( #[serde(

View File

@ -1,4 +1,4 @@
use failure::{format_err, Fallible}; use failure::{Fallible};
use rga::adapters::*; use rga::adapters::*;
use rga::preproc::*; use rga::preproc::*;
use ripgrep_all as rga; use ripgrep_all as rga;

View File

@ -35,7 +35,7 @@ fn main() -> Fallible<()> {
for adapter in enabled_adapters { for adapter in enabled_adapters {
print(adapter) print(adapter)
} }
println!("The following adapters are disabled by default, and can be enabled using '--rga-adapters=+tesseract,xyz':\n"); println!("The following adapters are disabled by default, and can be enabled using '--rga-adapters=+pdfpages,tesseract':\n");
for adapter in disabled_adapters { for adapter in disabled_adapters {
print(adapter) print(adapter)
} }