While coding the session monitor a couple of weeks ago I developed a quick script which could query ntop for session information. Jaime started using it for graphing now, so I thought it might be useful to soembody.
import sgmllib, re, sys import socket from sets import Set class MyParser(sgmllib.SGMLParser): "A simple parser class." def parse(self, s): "Parse the given string 's'." self.feed(s) self.close() def __init__(self, verbose=0): "Initialise an object, passing 'verbose' to the superclass." sgmllib.SGMLParser.__init__(self, verbose) self.starting_description = 0 self.inside_table_element = 0 self.inside_table_row = 0 self.in_session = 0 self.td_count = 0 self.seen = 0 self.sessions = [] self.session_num = 0 self.queried_ip = "" self.dst_port_group = {} self.src_port_group = {} self.source_ip = "" self.source_port = "" self.dest_ip = "" self.dest_port = "" self.data_sent = 0 self.data_rcvd = 0 self.active_since = "" self.last_seen = "" self.duration = 0 self.inactive = 0 def tosecs(self, data): # example: 27 sec if data.endswith('sec'): data = data.split(' ', 1)[0] # examples: 1 day 1:52:27 # 2 days 4:56:23 elif data.__contains__('day'): result = re.findall('(d+) days? (S+)', data) (days, time) = result[0] seconds = int(days) * 86400 dt = time.split(':') seconds += int(dt.pop()) try: seconds += seconds + int(dt.pop()) * 60 seconds += seconds + int(dt.pop()) * 3600 except IndexError: pass data = str(seconds) # example: 1:52:27 else: dt = data.split(':') seconds = int(dt.pop()) try: seconds += int(dt.pop()) * 60 if len(dt) == 3: seconds += int(dt.pop()) * 3600 except IndexError: pass data = str(seconds) return data def start_td(self, attributes): self.td_count += 1 self.inside_table_element = 1 def end_td(self): self.inside_table_element = 0 self.seen = 0 def start_tr(self, attributes): self.inside_table_row = 1 def end_tr(self): self.inside_table_row = 0 self.td_count = 0 if self.in_session and self.source_ip: self.session_num += 1 tmp = "%s:%s --> %s:%s (%f %f) duration: %s" % (self.source_ip,self.source_port,self.dest_ip,self.dest_port, self.data_sent, self.data_rcvd, self.duration) self.sessions.append(tmp) def start_a(self, attributes): self.inside_table_row = 1 for name, value in attributes: if name == "href" and self.in_session: matches = re.findall("d+.d+.d+.d+",value) if len(matches) > 0: if self.td_count is 1: self.source_ip = matches[0] elif self.td_count is 2: self.dest_ip = matches[0] def handle_data(self, data): if self.queried_ip is "": matches = re.findall("(d+.d+.d+.d+)",data) if len(matches) > 0: self.queried_ip = matches[0] if data.__contains__("Active TCP/UDP Sessions"): self.in_session = 1 if data.__contains__("The color of the host"): self.sessions.append("NumSessions:%d" % int(self.session_num)) self.in_session = 0 source_sessions = 0 dest_sessions = 0 for sess in self.sessions: src_str = "^%s:.*" % self.queried_ip dst_str = "^d+.d+.d+.d+:d+s+-->s+%s:.*" % self.queried_ip src_sess = "^%s:d+s+-->s+(d+.d+.d+.d+):(d+)" % self.queried_ip dst_sess = "^(d+.d+.d+.d+):d+s+-->s+%s:(d+)" % self.queried_ip if re.findall(src_str, sess): source_sessions += 1 if re.findall(dst_str, sess): dest_sessions += 1 matches = re.findall(src_sess, sess) if len(matches) > 0: if matches[0][1] in self.src_port_group: self.src_port_group[matches[0][1]].add(matches[0][0]) else: self.src_port_group[matches[0][1]] = Set() self.src_port_group[matches[0][1]].add(matches[0][0]) matches = re.findall(dst_sess, sess) if len(matches) > 0: if matches[0][1] in self.dst_port_group: self.dst_port_group[matches[0][1]].add(matches[0][0]) else: self.dst_port_group[matches[0][1]] = Set() self.dst_port_group[matches[0][1]].add(matches[0][0]) self.sessions.append("SessionsAsSource:%s" % source_sessions) self.sessions.append("SessionsAsDest:%s" % dest_sessions) for port in self.src_port_group: self.sessions.append("UniqPort%sAsSourceSessions:%d" % (port, len(self.src_port_group[port]))) for port in self.dst_port_group: self.sessions.append("UniqPort%sAsDestSessions:%d" % (port, len(self.dst_port_group[port]))) if self.inside_table_element and self.in_session: if self.td_count <= 2: matches = re.findall(":(w+)",data) if len(matches) > 0: try: port = socket.getservbyname(matches[0]) except: port = matches[0] if self.td_count is 1: self.source_port = port elif self.td_count is 2: self.dest_port = port elif self.td_count is 3: if self.seen == 0: self.data_sent = float(data) self.seen = 1 else: if data.__contains__("KB"): self.data_sent *= 1024 elif data.__contains__("MB"): self.data_sent *= 1024 * 1024 elif data.__contains__("GB"): self.data_sent *= 1024 * 1024 * 1024 self.seen = 0 elif self.td_count is 4: if self.seen == 0: self.data_rcvd = float(data) self.seen = 1 else: if data.__contains__("KB"): self.data_rcvd *= 1024 elif data.__contains__("MB"): self.data_rcvd *= 1024 * 1024 elif data.__contains__("GB"): self.data_rcvd *= 1024 * 1024 * 1024 self.seen = 0 elif self.td_count is 5: self.active_since = data elif self.td_count is 6: self.last_seen = data elif self.td_count is 7: self.duration = self.tosecs(data) elif self.td_count is 8: self.inactive = self.tosecs(data) def get_sessions(self): return self.sessions # Real start import urllib, sgmllib argv = sys.argv try: ntop = argv[1] ip = argv[2] except: print "Prints out session information about a given host" print "Usage: " + argv[0] + " ntop_ip host_ip" sys.exit() # Get something to work with. f = None try: f = urllib.urlopen("http://" + ntop + ":3000/" + ip + ".html") except: print "Unable to connect to " + ntop sys.exit() s = f.read() # Try and process the page. # The class should have been defined first, remember. myparser = MyParser() myparser.parse(s) a = myparser.get_sessions() print "Sessions for " + ip print "--------------------------" for b in a: print b
The output looks somewhat like this:
Gestalt:ntop dk$ python session_query.py 192.168.1.2 192.168.1.2 Sessions for 192.168.1.2 -------------------------- 192.168.1.43:2409 --> 192.168.1.2:3000 (567.000000 2662.400000) duration: 0 192.168.1.2:39492 --> 195.77.186.178:33800 (74240.000000 75264.000000) duration: 406 192.168.1.2:45206 --> 195.55.170.126:33800 (140595.200000 143974.400000) duration: 644 192.168.1.2:54151 --> 207.158.15.208:443 (1153433.600000 1153433.600000) duration: 637 192.168.1.35:59334 --> 192.168.1.2:3000 (258.000000 112.000000) duration: 0 NumSessions:5 SessionsAsSource:3 SessionsAsDest:2 UniqPort443AsSourceSessions:1 UniqPort33800AsSourceSessions:2 UniqPort3000AsDestSessions:2 Gestalt:ntop dk$
Aaah, and with a little trick also the main session page can be parsed:
Gestalt:ntop dk$ python session_query.py 192.168.1.2 NetNetstat Sessions for NetNetstat -------------------------- 192.168.1.43:2409 --> 192.168.1.2:3000 (567.000000 2662.400000) duration: 0 192.168.1.2:39492 --> 195.77.186.178:33800 (75366.400000 76288.000000) duration: 459 192.168.1.2:45206 --> 195.55.170.126:33800 (141721.600000 144896.000000) duration: 706 192.168.1.2:54151 --> 207.158.15.208:443 (1153433.600000 1153433.600000) duration: 695 192.168.1.35:59337 --> 192.168.1.2:3000 (257.000000 112.000000) duration: 0 NumSessions:5 SessionsAsSource:0 SessionsAsDest:1 UniqPort33800AsDestSessions:1 Gestalt:ntop dk$
As a finishing note, we did setup another blog with ossim-related info: Santi’s blog. As an opener he features a demo video with a sample presentation done to a customer.