108 lines
1.2 KiB
Plaintext

# RUN THIS CELL to perform standard imports:
import spacy
nlp = spacy.load('en_core_web_sm')
# Enter your code here:
with open('../TextFiles/owlcreek.txt') as f:
doc = nlp(f.read())
# Run this cell to verify it worked:
doc[:36]
len(doc)
sents = [sent for sent in doc.sents]
len(sents)
print(sents[1].text)
# NORMAL SOLUTION:
for token in sents[1]:
print(token.text, token.pos_, token.dep_, token.lemma_)
# CHALLENGE SOLUTION:
for token in sents[1]:
print(f'{token.text:{15}} {token.pos_:{5}} {token.dep_:{10}} {token.lemma_:{15}}')
# Import the Matcher library:
from spacy.matcher import Matcher
matcher = Matcher(nlp.vocab)
# Create a pattern and add it to matcher:
pattern = [{'LOWER': 'swimming'}, {'IS_SPACE': True, 'OP':'*'}, {'LOWER': 'vigorously'}]
matcher.add('Swimming', None, pattern)
# Create a list of matches called "found_matches" and print the list:
found_matches = matcher(doc)
print(found_matches)
print(doc[1265:1290])
print(doc[3600:3615])
for sent in sents:
if found_matches[0][1] < sent.end:
print(sent)
break
for sent in sents:
if found_matches[1][1] < sent.end:
print(sent)
break