download_manager/
main.rs

1//! # download-manager
2//!
3//! Download the Tor Browser Bundle over the tor network using multiple connections.
4//! We use [`HTTP Range requests`][1] to request the file in chunks.
5//!
6//! [1]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests
7
8use std::{collections::HashMap, num::NonZeroU8, str::FromStr};
9
10use anyhow::Context;
11use arti_client::{TorAddr, TorClient, TorClientConfig};
12use clap::Parser;
13use http_body_util::{BodyExt, Empty};
14use hyper::{
15    body::Bytes, client::conn::http1::SendRequest, header, http::uri::Scheme, Method, Request,
16    StatusCode, Uri,
17};
18use hyper_util::rt::TokioIo;
19use sha2::{Digest, Sha256};
20use tokio::{fs::OpenOptions, io::AsyncWriteExt};
21use tor_rtcompat::PreferredRuntime;
22use tracing_subscriber::{fmt, layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
23
24/// Tor Browser Bundle download manager
25///
26/// This binary downloads the Linux x86_64 version, which will not work on MacOS/Windows.
27#[derive(Parser)]
28struct Args {
29    /// Number of simultaneous connections
30    #[arg(long, short, default_value = "1")]
31    connections: NonZeroU8,
32    /// Version of the Tor Browser to download
33    #[clap(default_value = "14.0.7")]
34    version: String,
35}
36
37/// Connect to a URL using a isolated Tor Client
38async fn connect_to_url(
39    client: &TorClient<PreferredRuntime>,
40    uri: &Uri,
41) -> anyhow::Result<SendRequest<Empty<Bytes>>> {
42    // isolated client makes each connection run on a separate circuit
43    let isolated = client.isolated_client();
44
45    // Create TLS connector
46    let connector: tokio_native_tls::TlsConnector =
47        tokio_native_tls::native_tls::TlsConnector::new()
48            .unwrap()
49            .into();
50
51    // Only support HTTPS
52    if uri.scheme() != Some(&Scheme::HTTPS) {
53        return Err(anyhow::anyhow!("URL must use HTTPS"));
54    };
55
56    // Extract host from URI
57    let host = uri.host().ok_or(anyhow::anyhow!("Missing URL host"))?;
58
59    // Convert URL to TorAddr, defaulting to HTTPS port 443
60    let tor_addr = TorAddr::from((host, uri.port_u16().unwrap_or(443)))?;
61
62    // Connect to URL
63    tracing::debug!("Connecting to URL using Tor");
64    let stream = isolated.connect(tor_addr).await?;
65
66    // Wrap connection in TLS
67    tracing::debug!("Wrapping connection in TLS");
68    let tls_connection = connector.connect(host, stream).await?;
69
70    // Create HTTP connection
71    tracing::debug!("Performing HTTP Handshake");
72    let (sender, connection) = hyper::client::conn::http1::Builder::new()
73        .handshake(TokioIo::new(tls_connection))
74        .await?;
75
76    // Spawn task to drive HTTP state forward
77    tokio::spawn(async move {
78        if let Err(e) = connection.await {
79            tracing::debug!("Connection closed: {}", e);
80        }
81    });
82
83    Ok(sender)
84}
85
86/// Fetch the size of the Tor Browser Bundle
87async fn get_content_length(
88    http: &mut SendRequest<Empty<Bytes>>,
89    uri: &Uri,
90) -> anyhow::Result<u64> {
91    let host = uri.host().ok_or(anyhow::anyhow!("missing host"))?;
92    tracing::debug!("Request Content-Length of resource: {uri}");
93
94    // Create HTTP Request
95    let request = Request::builder()
96        .method(Method::HEAD)
97        // Required header
98        .header(header::HOST, host)
99        .uri(uri)
100        .body(Empty::new())?;
101    tracing::debug!("Sending request to server: {:?}", request);
102
103    let response = http.send_request(request).await?;
104    tracing::debug!("Received response from server: {:?}", response);
105
106    // Check that request succeeded
107    if !response.status().is_success() {
108        return Err(anyhow::anyhow!("HEAD Request failed: {:?}", response));
109    };
110
111    // Get the Content-Length header
112    match response.headers().get(header::CONTENT_LENGTH) {
113        Some(header) => {
114            let length: u64 = header.to_str()?.parse()?;
115            tracing::debug!("Content-Length of resource: {}", length);
116            Ok(length)
117        }
118        None => Err(anyhow::anyhow!("Missing Content-Length header")),
119    }
120}
121
122/// Fetch a [`HashMap`] of filename to checksum from a [`Uri`]
123async fn get_checksums(
124    http: &mut SendRequest<Empty<Bytes>>,
125    uri: Uri,
126) -> anyhow::Result<HashMap<String, String>> {
127    let host = uri.host().ok_or(anyhow::anyhow!("missing host in uri"))?;
128    tracing::debug!("Fetching checksums from {uri}");
129
130    let request = Request::builder()
131        .method(Method::GET)
132        .header(header::HOST, host)
133        .uri(uri)
134        .body(Empty::new())?;
135
136    let mut response = http.send_request(request).await?;
137
138    if response.status() != StatusCode::OK {
139        return Err(anyhow::anyhow!(
140            "Fetching checksum failed: {}",
141            response.status()
142        ));
143    };
144
145    // Extract checksums into HashMap
146    let mut checksums = HashMap::new();
147    let body = response.body_mut().collect().await?.to_bytes();
148    let content = std::str::from_utf8(&body)?;
149    for line in content.lines() {
150        if let Some((checksum, filename)) = line.split_once("  ") {
151            checksums.insert(filename.trim().to_string(), checksum.trim().to_string());
152        }
153    }
154    tracing::debug!("Fetched {} checksums", checksums.len());
155
156    Ok(checksums)
157}
158
159/// Request a range of bytes using HTTP Range requests
160async fn request_range(
161    // Clients should only be used once for fetching a chunk,
162    // so lets consume it
163    mut http: SendRequest<Empty<Bytes>>,
164    uri: Uri,
165    start: u64,
166    end: u64,
167) -> anyhow::Result<Bytes> {
168    let host = uri
169        .host()
170        .ok_or(anyhow::anyhow!("missing host"))?
171        .to_string();
172    tracing::debug!("Requesting range: {} to {}", start, end);
173
174    // Create Request
175    let request = Request::builder()
176        .method(Method::GET)
177        .uri(uri)
178        .header(header::HOST, host)
179        .header(header::RANGE, format!("bytes={}-{}", start, end))
180        .body(Empty::new())?;
181
182    let mut response = http.send_request(request).await?;
183
184    // We're expecting partial content
185    if response.status() != StatusCode::PARTIAL_CONTENT {
186        tracing::debug!("Server did not send chunk");
187        return Err(anyhow::anyhow!(
188            "No chunk from server: {:?}",
189            response.status()
190        ));
191    };
192
193    let body = response.body_mut().collect().await?;
194    Ok(body.to_bytes())
195}
196
197#[tokio::main]
198async fn main() -> anyhow::Result<()> {
199    tracing_subscriber::registry()
200        .with(fmt::layer())
201        .with(EnvFilter::from_default_env())
202        .init();
203    let args = Args::parse();
204    let connections = args.connections.get().into();
205
206    // Warn user when using more than 8 connections
207    if connections > 8 {
208        tracing::warn!("The Tor network has limited bandwidth, it is recommended to use less than 8 connections");
209    };
210
211    // Generate download and checksum URL from Tor version
212    let filename = format!("tor-browser-linux-x86_64-{}.tar.xz", args.version);
213
214    // Check if the file already exists
215    if tokio::fs::try_exists(&filename).await? {
216        tracing::info!("File already exists, skipping download");
217        return Err(anyhow::anyhow!("File {filename} already exists"));
218    }
219
220    let url = format!(
221        "https://dist.torproject.org/torbrowser/{}/{}",
222        args.version, filename
223    );
224    let uri = Uri::from_str(url.as_str())?;
225    let checksum_url = format!(
226        "https://dist.torproject.org/torbrowser/{}/sha256sums-signed-build.txt",
227        args.version
228    );
229    let checksum_uri = Uri::from_str(checksum_url.as_str())?;
230
231    // Create the tor client
232    let config = TorClientConfig::default();
233
234    tracing::info!("Bootstrapping... (this may take a while)");
235    let client = TorClient::create_bootstrapped(config).await?;
236
237    // Fetch Tor Browser Bundle size using isolated tor client
238    let mut connection = connect_to_url(&client, &uri).await?;
239    let length = get_content_length(&mut connection, &uri).await?;
240    tracing::info!("Tor Browser Bundle has size: {} bytes", length);
241
242    tracing::info!("Fetching checksum");
243    let checksums = get_checksums(&mut connection, checksum_uri).await?;
244    let checksum = checksums
245        .get(filename.as_str())
246        .ok_or(anyhow::anyhow!("Missing checksum in checksum file"))?;
247    tracing::info!("Checksum for resource: {}", &checksum);
248
249    let checksum = hex::decode(checksum).context("Failed to decode checksum")?;
250
251    // We should never have more connections than the size of the bundle
252    let connections = std::cmp::min(connections, length);
253
254    // Calculate HTTP Range chunks
255    let chunk_size = length / connections;
256    let remainder = length % connections;
257
258    let mut ranges = Vec::new();
259    let mut start = 0;
260    for i in 0..connections {
261        let extra = if i < remainder { 1 } else { 0 };
262        let end = start + chunk_size + extra - 1;
263        ranges.push((start, end));
264        start = end + 1;
265    }
266
267    tracing::info!("Creating {} connections", connections);
268    let connections = ranges.iter().map(|(start, end)| async {
269        // Create new connection for chunk
270        let connection = connect_to_url(&client, &uri).await?;
271        Ok::<_, anyhow::Error>((connection, *start, *end))
272    });
273    let connections = futures::future::try_join_all(connections).await?;
274
275    // Distribute work across multiple connections
276    let mut tasks = Vec::new();
277
278    for (client, start, end) in connections {
279        // Start future to request chunk
280        let task = tokio::spawn(request_range(client, uri.clone(), start, end));
281        tasks.push(task);
282    }
283
284    // Store downloaded content in memory
285    let mut content = Vec::new();
286
287    // Create SHA256 hasher
288    let mut hasher: Sha256 = Sha256::new();
289
290    // Write requested ranges sequentially into file
291    tracing::info!("Streaming download into file");
292    for task in tasks {
293        let data = task.await??;
294        hasher.update(&data);
295        content.extend_from_slice(&data);
296    }
297
298    if checksum != hasher.finalize().as_slice() {
299        return Err(anyhow::anyhow!("Mismatched checksum"));
300    }
301    tracing::info!("Checksum match!");
302
303    // Write content to file
304    let mut file = OpenOptions::new()
305        .create_new(true)
306        .write(true)
307        .open(&filename)
308        .await?;
309
310    file.write_all(&content).await?;
311    tracing::info!("Saved file: {}", &filename);
312
313    Ok(())
314}