File size: 747 Bytes
05c431f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
import sqlite3
import re
import os
import shutil

con = sqlite3.connect('LPHD.db')
cur = con.cursor()

sql_fisgreek = '''SELECT DDB_isGREEK.ID_DDB, DDB_fpath.fpath 

    FROM DDB_isGREEK 

    INNER JOIN DDB_fpath 

    ON DDB_isGREEK.ID_DDB=DDB_fpath.ID_DDB 

    WHERE IsGreek="yes";'''

res_fisgreek = cur.execute(sql_fisgreek)
fisgreek = res_fisgreek.fetchall()

if not os.path.isdir('training_texts2'):
    os.makedirs('training_texts2')
for idpap, path in fisgreek:
    corpusf = re.sub('^idp\.data', 'training_texts', path)
    corpusf = re.sub('xml$', 'txt', corpusf)
    nfname = 'training_texts2/'+ re.search('[^/]+\.txt', corpusf).group(0)
    if os.path.exists(corpusf):
        shutil.copyfile(corpusf, nfname)