#!/usr/bin/env python
# -*- coding: UTF-8 -*-
#
import urllib
kategori = "Sikkerhed"
url = '
http://www.eksperten.dk/spm/%(kategori)s/' % vars()
while True:
html = urllib.urlopen( url ).read()
position = 0
endpostion = len(html)
while True:
try:
position = html.index('class="listbox"',position)+1
endposition = html.index('class="listbox"',position)+1
except:
break
hrefpos = position
# print position
while True:
hrefpos = html.find( "href=" , hrefpos, endposition )+1
if hrefpos == 0:
break
startpos = html.find('"',hrefpos)+1
endpos = html.find('"', startpos)
href = "
http://www.eksperten.dk%s/" % html[startpos:endpos]
# print href
question = urllib.urlopen(href).read()
qStr = '<meta name="description" content="'
qstart = question.index(qStr) + len(qStr)
qend = question.find('/>',qstart)
# Question
print qstart, qend, question[qstart:qend]
p_start = qend
while True:
nameStr = 'link_black_spm"'
stop = len(nameStr)
p_start = question.find( nameStr, p_start ) + len(nameStr) + 1
p_end = question.find( "<", p_start )
if p_start == stop:
break
# besvaret af
print "Navn",question[p_start:p_end]
s_answerStr = 'spm-respons">'
try:
s_answer = question.index(s_answerStr, p_start) + len(s_answerStr)
except:
break
p_start = question.find("</td>",s_answer )
if p_start == -1:
break
# svar
print "Svar", question[s_answer:p_start]
endpos = html.find("</a> </td></tr></table>")
startpos = html.find('href="/spm/%(kategori)s' % vars(), 0, endpos)
startx = html.find('"',startpos) + 1
endx = html.find('"',startx )
url = '
http://www.eksperten.dk' + html[startx:endx]