|
import sqlite3
|
|
import re
|
|
import os
|
|
import shutil
|
|
|
|
con = sqlite3.connect('LPHD.db')
|
|
cur = con.cursor()
|
|
|
|
sql_fisgreek = '''SELECT DDB_isGREEK.ID_DDB, DDB_fpath.fpath
|
|
FROM DDB_isGREEK
|
|
INNER JOIN DDB_fpath
|
|
ON DDB_isGREEK.ID_DDB=DDB_fpath.ID_DDB
|
|
WHERE IsGreek="yes";'''
|
|
|
|
res_fisgreek = cur.execute(sql_fisgreek)
|
|
fisgreek = res_fisgreek.fetchall()
|
|
|
|
if not os.path.isdir('training_texts2'):
|
|
os.makedirs('training_texts2')
|
|
for idpap, path in fisgreek:
|
|
corpusf = re.sub('^idp\.data', 'training_texts', path)
|
|
corpusf = re.sub('xml$', 'txt', corpusf)
|
|
nfname = 'training_texts2/'+ re.search('[^/]+\.txt', corpusf).group(0)
|
|
if os.path.exists(corpusf):
|
|
shutil.copyfile(corpusf, nfname)
|
|
|
|
|