1 |
#!/usr/bin/env python |
2 |
# loop through files in the nominated folder |
3 |
|
4 |
from striphf import * |
5 |
import os,sys |
6 |
|
7 |
excl = ["MISSING","README","README.txt","wiki",".svn",".cvs"] |
8 |
sourcedir = os.path.expanduser("~/wikiscrape") |
9 |
targetdir = os.path.expanduser("~/wikiscrape/wiki") |
10 |
wikiname = "ASCEND" |
11 |
|
12 |
if not os.path.exists(targetdir): |
13 |
print "Target directory %s does not exist"%targetdir |
14 |
sys.exit(1) |
15 |
|
16 |
res = {} |
17 |
for f in os.listdir(sourcedir): |
18 |
sys.stderr.write("Processing %s...\n"%f) |
19 |
if f in excl: |
20 |
continue |
21 |
c = open(os.path.join(sourcedir,f)).read() |
22 |
s,pagename = html2wiki(c,wikiname) |
23 |
t = os.path.join(targetdir,f + ".txt") |
24 |
sys.stderr.write("Writing to %s\n"%t) |
25 |
res[pagename] = (t,"NEW") |
26 |
open(t,"w").write(s) |
27 |
|
28 |
import pickle |
29 |
pickle.dump(res,open("reslist.pickle","w")) |
30 |
|
31 |
|