- 01
- 02
- 03
- 04
- 05
- 06
- 07
- 08
- 09
- 10
- 11
- 12
- 13
- 14
- 15
- 16
- 17
- 18
- 19
- 20
- 21
- 22
- 23
- 24
- 25
- 26
- 27
- 28
- 29
- 30
- 31
- 32
- 33
- 34
- 35
- 36
- 37
- 38
- 39
- 40
- 41
- 42
- 43
- 44
- 45
- 46
- 47
- 48
- 49
- 50
- 51
- 52
- 53
- 54
- 55
- 56
- 57
- 58
- 59
- 60
- 61
- 62
- 63
- 64
- 65
- 66
- 67
- 68
- 69
- 70
- 71
- 72
- 73
- 74
- 75
- 76
- 77
- 78
- 79
- 80
- 81
- 82
- 83
- 84
- 85
- 86
- 87
- 88
- 89
- 90
- 91
def generate_words(sample, phonemes, num=10):
global words, yd
gen = True
parens = 0
r = random.random()
word = ""
i = 0
while i < int(num):
for j in range(0, len(sample)):
if sample[j] == '(':
if gen:
gen = (random.randint(0,1) == 0)
if not gen:
parens += 1
elif sample[j] == ')':
if not gen:
parens -= 1
if parens == 0:
gen = True
elif sample[j] in phonemes.keys():
for n, phtype in enumerate(phonemes.keys()):
if gen and phtype == sample[j]:
#k = random.choice(phonemes[phtype])
n = yd.randomGen(phonemeRanks[phtype])
k = phonemes[phtype][n]
word += k
elif gen:
word += sample[j]
if (not word in words) and (not isExceptional(word)):
words.append(word)
i += 1
word = ""
# ...
"""parsing sound changes rule with the notation X>Y/Z, where X is a set of source phonemes
Y - a set of resulting phonemes, Z - a positional condition (optional)"""
def parsePhNotation(inline):
rule = inline.split('>')
if (not len(rule) == 2):
return []
source = rule[0].split(',')
final = rule[1].split('/')
result = final[0].split(',')
posCond = []
rule = []
if (len(final) > 2):
return False
elif (len(final) == 2):
posCond = final[1].split('_')
if (not len(posCond) == 2):
return []
posCond[0] = posCond[0].split('#')
posCond[1] = posCond[1].split('#')
if (len(posCond[0]) == 2) and len(posCond[0][0]) > 0:
return []
elif len(posCond[0]) == 2:
rule.append(" "+posCond[0][1])
else:
rule.append(posCond[0][0])
if (len(posCond[1]) == 2) and len(posCond[1][1]) > 0:
return []
rule.append(posCond[1][0])
if len(posCond[1]) == 2:
rule[1] += " "
rule[0] = rule[0].split(",")
rule[1] = rule[1].split(",")
final = []
if len(source) > len(result):
for i in range(len(result)-1, len(source)-1):
result.append("")
elif len(source) < len(result):
for i in range(len(source)-1, len(result)-1):
source.append("")
final.append(source)
final.append(result)
if (len(rule)>0):
final.append(rule)
return final
Рекурсивный спуск, автомат с магазинной памятью, top-down parsing, bottom-up parsing? Не, не слышал. В свое время время делал вот такие самопальные алгоритмы для рандомной генерации слов по фонетическим шаблонам (типа "CV(C)", "VC" и т.д.) и фонетического преобразования слов и при попытке починить чета наступал на новые баги, т.к. не до конца понимал как вообще парсятся компьютерные языки.