erpc_analysis/
config.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
//! Handles application configuration for erpc-analysis.
//!
//! This module defines structures for parsing the `Config.toml` file and for
//! loading sensitive information like Neo4j credentials from environment
//! variables.

use anyhow::{Context, Result};
use log::info;
use serde::Deserialize;
use std::{env, fs, path::Path};

use crate::args::Args;

/// Represents the `[primary]` table within the `Config.toml` file.
/// It extracts settings relevant to `erpc-analysis` from this section.
#[derive(Debug, Deserialize)]
pub struct PrimarySectionConfig {
    /// Indicates if Neo4j database interaction is enabled.
    pub neo4j_allowed: bool,
}

/// Represents the root structure of the `Config.toml` file.
/// `erpc-analysis` is primarily interested in the settings within the
/// `[primary]` table.
#[derive(Debug, Deserialize)]
pub struct TomlRootConfig {
    /// Configuration settings from the `[primary]` table of the TOML file.
    pub primary: PrimarySectionConfig,
}

/// Holds Neo4j connection parameters.
/// These are loaded from environment variables if `neo4j_allowed` is true.
#[derive(Debug, Clone)]
pub struct Neo4jConfig {
    /// The URI for the Neo4j database (e.g., "bolt://localhost:7687").
    pub uri: String,
    /// The username for Neo4j authentication.
    pub username: String,
    /// The password for Neo4j authentication.
    pub password: String,
}

/// Community detection algorithm parameters for Louvain method
#[derive(Debug, Clone, Deserialize)]
pub struct LouvainConfig {
    pub max_iterations: u32,
    pub tolerance: f64,
    pub include_intermediate_communities: bool,
}

impl Default for LouvainConfig {
    fn default() -> Self {
        Self {
            max_iterations: 10,
            tolerance: 0.0001,
            include_intermediate_communities: false,
        }
    }
}

/// Label Propagation algorithm configuration
#[derive(Debug, Clone, Deserialize)]
pub struct LabelPropagationConfig {
    pub max_iterations: u32,
}

impl Default for LabelPropagationConfig {
    fn default() -> Self {
        Self { max_iterations: 10 }
    }
}

/// Community detection configuration
#[derive(Debug, Clone, Deserialize)]
pub struct CommunityDetectionConfig {
    pub consensus_runs: u32,
    pub louvain: LouvainConfig,
    pub label_propagation: LabelPropagationConfig,
}

impl Default for CommunityDetectionConfig {
    fn default() -> Self {
        Self {
            consensus_runs: 5,
            louvain: LouvainConfig::default(),
            label_propagation: LabelPropagationConfig::default(),
        }
    }
}

/// Centrality analysis configuration
#[derive(Debug, Clone, Deserialize)]
pub struct CentralityConfig {
    pub betweenness_sampling_size: Option<usize>,
    pub betweenness_sampling_seed: Option<u64>,
    pub use_wasserman_faust: Option<bool>,
}

impl Default for CentralityConfig {
    fn default() -> Self {
        Self {
            betweenness_sampling_size: Some(1000),
            betweenness_sampling_seed: Some(42),
            use_wasserman_faust: Some(true),
        }
    }
}

/// Path analysis configuration
#[derive(Debug, Clone, Deserialize)]
pub struct PathAnalysisConfig {
    pub max_paths_per_pair: Option<usize>,
    pub max_path_length: Option<usize>,
    pub representative_nodes_per_partition: usize,
    pub num_top_communities: usize,
    pub sample_size_communities: usize,
    pub num_top_asn_groups: usize,
    pub sample_size_asn_groups: usize,
    pub internal_component_sample_size: usize,
}

impl Default for PathAnalysisConfig {
    fn default() -> Self {
        Self {
            max_paths_per_pair: Some(10),
            max_path_length: Some(10),
            representative_nodes_per_partition: 5,
            num_top_communities: 3,
            sample_size_communities: 5,
            num_top_asn_groups: 3,
            sample_size_asn_groups: 3,
            internal_component_sample_size: 15,
        }
    }
}

/// General analysis configuration
#[derive(Debug, Clone, Deserialize)]
pub struct AnalysisSettings {
    pub max_display_components: usize,
    pub calculate_distribution: bool,
    pub isolation_ratio_threshold: f64,
}

impl Default for AnalysisSettings {
    fn default() -> Self {
        Self {
            max_display_components: 10,
            calculate_distribution: true,
            isolation_ratio_threshold: 50.0,
        }
    }
}

/// Analysis-specific configuration loaded from analysis_config.toml
#[derive(Debug, Clone, Deserialize, Default)]
pub struct AnalysisParametersConfig {
    pub community_detection: CommunityDetectionConfig,
    pub centrality: CentralityConfig,
    pub path_analysis: PathAnalysisConfig,
    pub analysis: AnalysisSettings,
}

/// The fully processed and usable configuration for the `erpc-analysis`
/// application. It combines settings from the TOML file and environment
/// variables.
#[derive(Debug, Clone)]
pub struct AnalysisConfig {
    /// Optional Neo4j configuration. `Some` if Neo4j is enabled and
    /// credentials are loaded, `None` otherwise.
    pub neo4j: Option<Neo4jConfig>,
    /// Analysis-specific parameters for algorithms and settings
    pub analysis_params: AnalysisParametersConfig,
}

impl AnalysisConfig {
    /// Loads the application configuration.
    ///
    /// This function performs the following steps:
    /// 1. Reads the TOML configuration file specified by `args.config`.
    /// 2. Parses the TOML into `TomlRootConfig`.
    /// 3. Checks `toml_root.primary.neo4j_allowed`.
    /// 4. If Neo4j is allowed, it attempts to load `NEO4J_DB_ADDR`,
    ///    `NEO4J_DB_USERNAME`, and `NEO4J_DB_PASSWORD` from environment
    ///    variables (expected to be set via `config/primary/.env`).
    /// 5. Constructs and returns the final `AnalysisConfig`.
    ///
    /// # Arguments
    /// * `args` - Parsed command-line arguments containing the path to the
    ///   TOML config file.
    ///
    /// # Errors
    /// Returns an error if the TOML file cannot be read or parsed, or if
    /// Neo4j is allowed but the required environment variables for
    /// credentials are not set.
    pub fn load_from_toml_and_env(args: &Args) -> Result<Self> {
        let config_path = Path::new(&args.config);
        info!("Loading configuration from: {:?}", config_path);

        let config_file_contents = fs::read_to_string(config_path)
            .with_context(|| {
                format!(
                    "Failed to read configuration file at: {:?}",
                    config_path
                )
            })?;

        let toml_root: TomlRootConfig = toml::from_str(&config_file_contents)
            .with_context(|| {
                format!(
                    "Failed to parse TOML from configuration file at: {:?}. \
                     Ensure it matches the expected structure.",
                    config_path
                )
            })?;

        let mut neo4j_details: Option<Neo4jConfig> = None;

        if toml_root.primary.neo4j_allowed {
            // Attempt to load .env file only if neo4j is allowed.
            // The .env file is expected to be at "config/primary/.env"
            // relative to workspace root.
            let env_path = "config/primary/.env";

            match dotenvy::from_path(env_path) {
                Ok(_) => {
                    println!(
                        "Successfully loaded environment variables from: {}",
                        env_path
                    );
                }
                Err(e) => {
                    println!(
                        "Warning: Could not load .env file from {}: {}. \
                         Neo4j credential loading will rely on globally set \
                         environment variables.",
                        env_path, e
                    );
                }
            }

            info!(
                "Attempting to load Neo4j credentials from environment \
                 variables..."
            );
            let raw_uri = env::var("NEO4J_DB_ADDR").context(
                "NEO4J_DB_ADDR not found. Ensure it is set (e.g., in \
                 'config/primary/.env' or globally).",
            )?;
            let uri = format!("bolt://{}", raw_uri);
            let username = env::var("NEO4J_DB_USERNAME").context(
                "NEO4J_DB_USERNAME not found. Ensure it is set (e.g., in \
                 'config/primary/.env' or globally).",
            )?;
            let password = env::var("NEO4J_DB_PASSWORD").context(
                "NEO4J_DB_PASSWORD not found. Ensure it is set (e.g., in \
                 'config/primary/.env' or globally).",
            )?;

            neo4j_details = Some(Neo4jConfig {
                uri,
                username,
                password,
            });
            println!("Successfully loaded Neo4j credentials.");
        } else {
            println!(
                "Neo4j is not allowed in the [primary] section of the \
                 configuration file. Skipping .env loading and credential \
                 retrieval."
            );
        }

        // Load analysis parameters from analysis_config.toml
        let analysis_params =
            Self::load_analysis_parameters(&args.analysis_config)?;

        Ok(AnalysisConfig {
            neo4j: neo4j_details,
            analysis_params,
        })
    }

    /// Loads analysis-specific parameters from analysis_config.toml
    fn load_analysis_parameters(
        config_path: &str,
    ) -> Result<AnalysisParametersConfig> {
        let analysis_config_path = Path::new(config_path);

        if !analysis_config_path.exists() {
            info!(
                "Analysis config file not found at {:?}, using defaults",
                analysis_config_path
            );
            return Ok(AnalysisParametersConfig::default());
        }

        info!(
            "Loading analysis parameters from: {:?}",
            analysis_config_path
        );

        let config_contents = fs::read_to_string(analysis_config_path)
            .with_context(|| {
                format!(
                    "Failed to read analysis config file at: {:?}",
                    analysis_config_path
                )
            })?;

        let analysis_params: AnalysisParametersConfig =
            toml::from_str(&config_contents).with_context(|| {
                format!(
                    "Failed to parse analysis config TOML from: {:?}. \
                         Ensure it matches the expected structure.",
                    analysis_config_path
                )
            })?;

        info!("Successfully loaded analysis parameters");
        Self::validate_analysis_parameters(&analysis_params)?;

        Ok(analysis_params)
    }

    /// Validates analysis parameters for correctness
    fn validate_analysis_parameters(
        params: &AnalysisParametersConfig,
    ) -> Result<()> {
        // Validate Louvain parameters
        let louvain = &params.community_detection.louvain;
        if louvain.max_iterations == 0 || louvain.max_iterations > 100 {
            return Err(anyhow::anyhow!(
                "Louvain max_iterations must be between 1 and 100, got: {}",
                louvain.max_iterations
            ));
        }
        if louvain.tolerance <= 0.0 || louvain.tolerance > 1.0 {
            return Err(anyhow::anyhow!(
                "Louvain tolerance must be between 0.0 and 1.0, got: {}",
                louvain.tolerance
            ));
        }

        // Validate Label Propagation parameters
        let lpa = &params.community_detection.label_propagation;
        if lpa.max_iterations == 0 || lpa.max_iterations > 100 {
            return Err(anyhow::anyhow!(
                "Label Propagation max_iterations must be between 1 and 100, \
                 got: {}",
                lpa.max_iterations
            ));
        }

        // Validate general parameters
        let cd = &params.community_detection;
        if cd.consensus_runs == 0 || cd.consensus_runs > 20 {
            return Err(anyhow::anyhow!(
                "consensus_runs must be between 1 and 20, got: {}",
                cd.consensus_runs
            ));
        }

        Ok(())
    }
}