Skip to content

Helper scripts

Manpage generator

format_as_markdown_verbatim(text)

Formats a text as a Markdown verbatim block.

Parameters:

Name Type Description Default
text

The input text.

required

Returns:

Type Description
str

Formatted text.

Source code in packages/manpage.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def format_as_markdown_verbatim(text):
    """
    Formats a text as a Markdown verbatim block.

    :type text : str
    :param text: The input text.

    :rtype: str
    :return: Formatted text.
    """

    # Some handy regexps
    lines      = re.compile('^',    re.MULTILINE)
    trailing   = re.compile('^ *$', re.MULTILINE)

    return trailing.sub('', lines.sub('    ', text))

generate()

Produces the manpage in Markdown format.

Apply argument parser usage and help into a template.

Source code in packages/manpage.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def generate():
    """
    Produces the manpage in Markdown format.

    Apply argument parser usage and help into a template.

    """

    # Set inputs and outputs
    template   = os.path.join(basepath, 'docs', 'man', 'onionprobe.1.txt.tmpl')
    output     = os.path.join(basepath, 'docs', 'man', 'onionprobe.1.txt')
    config     = os.path.join(basepath, 'configs', 'tor.yaml')

    # Assume a 80 columm terminal to compile the usage and help texts
    os.environ["COLUMNS"] = "80"

    # Initialize the command line parser
    parser     = cmdline_parser()

    # Compile template variables
    usage      = remove_usage_prefix(parser.format_usage())
    invocation = remove_usage_prefix(format_as_markdown_verbatim(parser.format_help()))
    date       = datetime.datetime.now().strftime('%b %d, %Y')

    with open(template, 'r') as template_file:
        with open(config, 'r') as config_file:
            with open(output, 'w') as output_file:
                contents = template_file.read()
                config   = format_as_markdown_verbatim(config_file.read())

                output_file.write(contents.format(date=date, usage=usage, invocation=invocation, config=config))

remove_usage_prefix(text)

Simply removes the "usage: " string prefix from a text.

Parameters:

Name Type Description Default
text

The input text.

required

Returns:

Type Description
str

The text without the "usage: string"

Source code in packages/manpage.py
27
28
29
30
31
32
33
34
35
36
37
38
def remove_usage_prefix(text):
    """
    Simply removes the "usage: " string prefix from a text.

    :type text : str
    :param text: The input text.

    :rtype: str
    :return: The text without the "usage: string"
    """

    return text.replace('usage: ', '')

Real-world Onion Sites

RealWorldOnionSites

Bases: OnionprobeConfigCompiler

Handles the 'Real-World Onion Sites' database

Inherits from the OnionprobeConfigCompiler class, implementing custom procedures.

Source code in packages/real-world-onion-sites.py
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
class RealWorldOnionSites(OnionprobeConfigCompiler):
    """
    Handles the 'Real-World Onion Sites' database

    Inherits from the OnionprobeConfigCompiler class, implementing
    custom procedures.
    """

    def build_endpoints_config(self, database):
        """
        Overrides OnionprobeConfigCompiler.build_endpoints_config()
        method with custom logic.

        :type database : str
        :param database: A database name from the databases dictionary.

        :rtype: dict
        :return: Onion Service endpoints in the format accepted by Onionprobe.

        """

        endpoints = {}

        # Get the Onion Service database from a remote CSV file
        if os.path.exists(self.databases[database]):
            print('Using list of %s database endpoints from %s...' % (
                database, self.databases[database]))

            with open(self.databases[database], 'r') as result:
                data = csv.DictReader(result.readlines())

        else:
            try:
                print('Fetching remote list of %s database endpoints from %s...' % (database, self.databases[database]))

                result = requests.get(self.databases[database])
                data   = csv.DictReader(StringIO(result.text))

            except Exception as e:
                # Log the exception
                print(repr(e))

                # Some error happened: do not proceed generating the config
                exit(1)

        # Parse the database and convert it to the Onionprobe endpoints format
        for item in data:
            print('Processing %s...' % (item['site_name']))

            url      = urllib.parse.urlparse(item['onion_url'])
            address  = url.netloc
            protocol = url.scheme if url.scheme != '' else 'http'
            port     = 80 if protocol == 'http' else 443
            paths    = [{
                'path'            : url.path if url.path != '' else '/',
                'allowed_statuses': [ 200 ],
                }]

            # Append to the endpoints dictionary
            if item['site_name'] not in endpoints:
                endpoints[item['site_name']] = {
                        'address' : address,
                        'protocol': protocol,
                        'port'    : port,
                        'paths'   : paths,
                        }

        return endpoints

build_endpoints_config(database)

Overrides OnionprobeConfigCompiler.build_endpoints_config() method with custom logic.

Parameters:

Name Type Description Default
database

A database name from the databases dictionary.

required

Returns:

Type Description
dict

Onion Service endpoints in the format accepted by Onionprobe.

Source code in packages/real-world-onion-sites.py
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def build_endpoints_config(self, database):
    """
    Overrides OnionprobeConfigCompiler.build_endpoints_config()
    method with custom logic.

    :type database : str
    :param database: A database name from the databases dictionary.

    :rtype: dict
    :return: Onion Service endpoints in the format accepted by Onionprobe.

    """

    endpoints = {}

    # Get the Onion Service database from a remote CSV file
    if os.path.exists(self.databases[database]):
        print('Using list of %s database endpoints from %s...' % (
            database, self.databases[database]))

        with open(self.databases[database], 'r') as result:
            data = csv.DictReader(result.readlines())

    else:
        try:
            print('Fetching remote list of %s database endpoints from %s...' % (database, self.databases[database]))

            result = requests.get(self.databases[database])
            data   = csv.DictReader(StringIO(result.text))

        except Exception as e:
            # Log the exception
            print(repr(e))

            # Some error happened: do not proceed generating the config
            exit(1)

    # Parse the database and convert it to the Onionprobe endpoints format
    for item in data:
        print('Processing %s...' % (item['site_name']))

        url      = urllib.parse.urlparse(item['onion_url'])
        address  = url.netloc
        protocol = url.scheme if url.scheme != '' else 'http'
        port     = 80 if protocol == 'http' else 443
        paths    = [{
            'path'            : url.path if url.path != '' else '/',
            'allowed_statuses': [ 200 ],
            }]

        # Append to the endpoints dictionary
        if item['site_name'] not in endpoints:
            endpoints[item['site_name']] = {
                    'address' : address,
                    'protocol': protocol,
                    'port'    : port,
                    'paths'   : paths,
                    }

    return endpoints

SecureDrop

SecureDropSites

Bases: OnionprobeConfigCompiler

Handles the Secure Drop API database

Inherits from the OnionprobeConfigCompiler class, implementing custom procedures.

Source code in packages/securedrop.py
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
class SecureDropSites(OnionprobeConfigCompiler):
    """
    Handles the Secure Drop API database

    Inherits from the OnionprobeConfigCompiler class, implementing
    custom procedures.
    """

    def build_endpoints_config(self, database):
        """
        Overrides OnionprobeConfigCompiler.build_endpoints_config()
        method with custom logic.

        :type database : str
        :param database: A database name from the databases dictionary.

        :rtype: dict
        :return: Onion Service endpoints in the format accepted by Onionprobe.

        """

        endpoints = {}

        # Get the Onion Service database from API data
        if os.path.exists(self.databases[database]):
            print('Using list of %s database endpoints from %s...' % (
                database, self.databases[database]))

            with open(self.databases[database], 'r') as result:
                data = json.loads(result.readlines()[0])

        else:
            try:
                print('Fetching remote list of %s database endpoints from %s...' % (database, self.databases[database]))

                result    = requests.get(self.databases[database])
                data      = json.load(StringIO(result.text))
                endpoints = {}

            except Exception as e:
                # Log the exception
                print(repr(e))

                # Some error happened: do not proceed generating the config
                exit(1)

        # Parse the database and convert it to the Onionprobe endpoints format
        for item in data:
            print('Processing %s...' % (item['title']))

            # Complete parsing
            # Does not work right now since the 'onion_address' field is not
            # RFC 1808 compliant.
            # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
            #url      = urllib.parse.urlparse(item['onion_address'])
            #address  = url.netloc
            #protocol = url.scheme if url.scheme != '' else 'http'
            #port     = 80 if protocol == 'http' else 443
            #paths    = [{
            #    'path': url.path if url.path != '' else '/',
            #    }]

            # Simpler parsing, assuming HTTP on port 80 and default path
            address  = item['onion_address']
            protocol = 'http'
            port     = 80
            paths    = [{
                'path'            : '/',
                'allowed_statuses': [ 200 ],
                }]

            # Append to the endpoints dictionary
            if item['title'] not in endpoints:
                # We can index either by the project title or by it's Onion Name
                #endpoints[item['onion_name']] = {
                endpoints[item['title']] = {
                        'address' : address,
                        'protocol': protocol,
                        'port'    : port,
                        'paths'   : paths,
                        }

        return endpoints

build_endpoints_config(database)

Overrides OnionprobeConfigCompiler.build_endpoints_config() method with custom logic.

Parameters:

Name Type Description Default
database

A database name from the databases dictionary.

required

Returns:

Type Description
dict

Onion Service endpoints in the format accepted by Onionprobe.

Source code in packages/securedrop.py
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
def build_endpoints_config(self, database):
    """
    Overrides OnionprobeConfigCompiler.build_endpoints_config()
    method with custom logic.

    :type database : str
    :param database: A database name from the databases dictionary.

    :rtype: dict
    :return: Onion Service endpoints in the format accepted by Onionprobe.

    """

    endpoints = {}

    # Get the Onion Service database from API data
    if os.path.exists(self.databases[database]):
        print('Using list of %s database endpoints from %s...' % (
            database, self.databases[database]))

        with open(self.databases[database], 'r') as result:
            data = json.loads(result.readlines()[0])

    else:
        try:
            print('Fetching remote list of %s database endpoints from %s...' % (database, self.databases[database]))

            result    = requests.get(self.databases[database])
            data      = json.load(StringIO(result.text))
            endpoints = {}

        except Exception as e:
            # Log the exception
            print(repr(e))

            # Some error happened: do not proceed generating the config
            exit(1)

    # Parse the database and convert it to the Onionprobe endpoints format
    for item in data:
        print('Processing %s...' % (item['title']))

        # Complete parsing
        # Does not work right now since the 'onion_address' field is not
        # RFC 1808 compliant.
        # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
        #url      = urllib.parse.urlparse(item['onion_address'])
        #address  = url.netloc
        #protocol = url.scheme if url.scheme != '' else 'http'
        #port     = 80 if protocol == 'http' else 443
        #paths    = [{
        #    'path': url.path if url.path != '' else '/',
        #    }]

        # Simpler parsing, assuming HTTP on port 80 and default path
        address  = item['onion_address']
        protocol = 'http'
        port     = 80
        paths    = [{
            'path'            : '/',
            'allowed_statuses': [ 200 ],
            }]

        # Append to the endpoints dictionary
        if item['title'] not in endpoints:
            # We can index either by the project title or by it's Onion Name
            #endpoints[item['onion_name']] = {
            endpoints[item['title']] = {
                    'address' : address,
                    'protocol': protocol,
                    'port'    : port,
                    'paths'   : paths,
                    }

    return endpoints

TPO

TPOSites

Bases: OnionprobeConfigCompiler

Handles official Tor Project Onion Services list.

Inherits from the OnionprobeConfigCompiler class, implementing custom procedures.

Source code in packages/tpo.py
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
class TPOSites(OnionprobeConfigCompiler):
    """
    Handles official Tor Project Onion Services list.

    Inherits from the OnionprobeConfigCompiler class, implementing
    custom procedures.
    """

    def build_endpoints_config(self, database):
        """
        Overrides OnionprobeConfigCompiler.build_endpoints_config()
        method with custom logic.

        :type database : str
        :param database: A database name from the databases dictionary.

        :rtype: dict
        :return: Onion Service endpoints in the format accepted by Onionprobe.

        """

        data = {}

        # Get the Onion Service database from a remote API
        if os.path.exists(self.databases[database]):
            print('Using list of %s database endpoints from %s...' % (
                database, self.databases[database]))

            with open(self.databases[database], 'r') as result:
                for line in result.readlines():
                    items = data.update(json.loads(line))

        else:
            try:
                print('Fetching remote list of %s database endpoints from %s...' % (database, self.databases[database]))
                result = requests.get(self.databases[database])

            except Exception as e:
                # Log the exception
                print(repr(e))

                # Some error happened: do not proceed generating the config
                exit(1)

            for line in result.text.split('\n'):
                if line != '':
                    data.update(json.loads(line))

        endpoints = {}

        # Parse the database and convert it to the Onionprobe endpoints format
        for item in data:
            print('Processing %s...' % (data[item]))

            # Complete parsing
            # Does not work right now since the 'onion_address' field is not
            # RFC 1808 compliant.
            # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
            #url      = urllib.parse.urlparse(data[item])
            #address  = url.netloc
            #protocol = url.scheme if url.scheme != '' else 'http'
            #port     = 80 if protocol == 'http' else 443
            #paths    = [{
            #    'path': url.path if url.path != '' else '/',
            #    }]

            # Simpler parsing, assuming HTTP on port 80 and default path
            address  = data[item]
            protocol = 'http'
            port     = 80
            paths    = [{
                'path'            : '/',
                'allowed_statuses': [ 200 ],
                }]

            if item in allowed_status_overrides:
                paths[0]['allowed_statuses'] = allowed_status_overrides[item]

            # Append to the endpoints dictionary
            if item not in endpoints:
                # We can index either by the project title or by it's Onion Name
                #endpoints[item['onion_name']] = {
                endpoints[item] = {
                        'address' : address,
                        'protocol': protocol,
                        'port'    : port,
                        'paths'   : paths,
                        }

        return endpoints

    def build_onionprobe_config(self):
        """
        Overrides OnionprobeConfigCompiler.build_onionprobe_config()
        method with custom logic.

        """

        # Set the interval and disable shuffling and randomization
        print('Enforcing shuffle, randomize, interval and sleep configurations, no matter what the template or the user says.')
        self.config['shuffle']   = False
        self.config['randomize'] = False
        self.config['interval']  = 60
        self.config['sleep']     = 60

        # Build the configuration
        super().build_onionprobe_config()

build_endpoints_config(database)

Overrides OnionprobeConfigCompiler.build_endpoints_config() method with custom logic.

Parameters:

Name Type Description Default
database

A database name from the databases dictionary.

required

Returns:

Type Description
dict

Onion Service endpoints in the format accepted by Onionprobe.

Source code in packages/tpo.py
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
def build_endpoints_config(self, database):
    """
    Overrides OnionprobeConfigCompiler.build_endpoints_config()
    method with custom logic.

    :type database : str
    :param database: A database name from the databases dictionary.

    :rtype: dict
    :return: Onion Service endpoints in the format accepted by Onionprobe.

    """

    data = {}

    # Get the Onion Service database from a remote API
    if os.path.exists(self.databases[database]):
        print('Using list of %s database endpoints from %s...' % (
            database, self.databases[database]))

        with open(self.databases[database], 'r') as result:
            for line in result.readlines():
                items = data.update(json.loads(line))

    else:
        try:
            print('Fetching remote list of %s database endpoints from %s...' % (database, self.databases[database]))
            result = requests.get(self.databases[database])

        except Exception as e:
            # Log the exception
            print(repr(e))

            # Some error happened: do not proceed generating the config
            exit(1)

        for line in result.text.split('\n'):
            if line != '':
                data.update(json.loads(line))

    endpoints = {}

    # Parse the database and convert it to the Onionprobe endpoints format
    for item in data:
        print('Processing %s...' % (data[item]))

        # Complete parsing
        # Does not work right now since the 'onion_address' field is not
        # RFC 1808 compliant.
        # See https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
        #url      = urllib.parse.urlparse(data[item])
        #address  = url.netloc
        #protocol = url.scheme if url.scheme != '' else 'http'
        #port     = 80 if protocol == 'http' else 443
        #paths    = [{
        #    'path': url.path if url.path != '' else '/',
        #    }]

        # Simpler parsing, assuming HTTP on port 80 and default path
        address  = data[item]
        protocol = 'http'
        port     = 80
        paths    = [{
            'path'            : '/',
            'allowed_statuses': [ 200 ],
            }]

        if item in allowed_status_overrides:
            paths[0]['allowed_statuses'] = allowed_status_overrides[item]

        # Append to the endpoints dictionary
        if item not in endpoints:
            # We can index either by the project title or by it's Onion Name
            #endpoints[item['onion_name']] = {
            endpoints[item] = {
                    'address' : address,
                    'protocol': protocol,
                    'port'    : port,
                    'paths'   : paths,
                    }

    return endpoints

build_onionprobe_config()

Overrides OnionprobeConfigCompiler.build_onionprobe_config() method with custom logic.

Source code in packages/tpo.py
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def build_onionprobe_config(self):
    """
    Overrides OnionprobeConfigCompiler.build_onionprobe_config()
    method with custom logic.

    """

    # Set the interval and disable shuffling and randomization
    print('Enforcing shuffle, randomize, interval and sleep configurations, no matter what the template or the user says.')
    self.config['shuffle']   = False
    self.config['randomize'] = False
    self.config['interval']  = 60
    self.config['sleep']     = 60

    # Build the configuration
    super().build_onionprobe_config()