匹配文档里的时间


import re

# 扩展的正则表达式模式
patterns = [
    r'\b\d{1,4} (?:AD|BC)\b',                   # e.g., 411 AD, 3500 BC
    r'\b(?:AD|BC) \d{1,4}\b',                   # e.g., AD 411, BC 3500
    r'\bin \d{1,4} (?:AD|BC)?\b',                # e.g., in 2000 AD, in 3500 BC
    r'\bin \w+? \d{1,4}',                         # e.g., in March 1977
    r'\b\d{1,2}(?:st|nd|rd|th)? \w+',            # e.g., 5th June
    r'\bat \d{1,2}(?:st|nd|rd|th)? [apm]{2}',    # e.g., at 10am
    r'\b(?:before|after) \d{1,4} (?:AD|BC)?\b', # e.g., before 2000 AD, after 3500 BC
    r'\b\d{1,4} (?:AD|BC)\b',                   # e.g., 411 AD, 3500 BC (repeated for emphasis)
    r'\bo\w+ \d{1,4}\b',                               # 例如 of 2000
    r'\buntil \d{1,4}',                        # 例如 till 1500 AD
    r'\bb\w+ \d{1,4}',                        # 例如 till 1500 AD
    r'\bo\w+ \w+ \d{1,4}\b',                               # 例如 of 2000
    r'\bin \d{1,4}'

]

# 合并所有模式为一个正则表达式
combined_pattern = '|'.join(patterns)

# 测试文本
text = """After AD 411, England experienced an economic collapse and became a poor backwater. By 3500 BC,In September 1978,The military conquest of the Aztecs was completed by 1521.
Until the political reforms of 2000, After Mexico became independent from Spain in 1821, He then reinstated Deng in March 1977. A city of wide European-style boulevards based on the great agricultural wealth of the Pampas,
 was not resettled until 1580. Deng began to criticize Hua’s policies. In September 1978, The meeting is at 10am on 5th June. We will have a break for 15 minutes after 3pm. The deadline is before 31st December.
 On November 15, 1532,strangled him in July 1533.arriving in Peru in 1569,When Newport sailed for England in April 1608 he took a cargo of pyrite, """

# 查找所有匹配项,使用忽略大小写标志
matches = re.findall(combined_pattern, text, re.IGNORECASE)

# 提取非空的匹配项
filtered_matches = set(match.strip() for match in matches if match.strip())
filtered_matches = sorted(filtered_matches)

# 输出匹配结果
print(filtered_matches)