materi-praktikum/.virtual_documents/NLP/Praktikum Python Code/01-NLP-Python-Basics/07-NLP-Basics-Assessment-Solution.ipynb



# RUN THIS CELL to perform standard imports:
import spacy
nlp = spacy.load('en_core_web_sm')


# Enter your code here:

with open('../TextFiles/owlcreek.txt') as f:
    doc = nlp(f.read())


# Run this cell to verify it worked:

doc[:36]


len(doc)


sents = [sent for sent in doc.sents]
len(sents)


print(sents[1].text)


# NORMAL SOLUTION:
for token in sents[1]:
    print(token.text, token.pos_, token.dep_, token.lemma_)


# CHALLENGE SOLUTION:
for token in sents[1]:
    print(f'{token.text:{15}} {token.pos_:{5}} {token.dep_:{10}} {token.lemma_:{15}}')


# Import the Matcher library:

from spacy.matcher import Matcher
matcher = Matcher(nlp.vocab)


# Create a pattern and add it to matcher:

pattern = [{'LOWER': 'swimming'}, {'IS_SPACE': True, 'OP':'*'}, {'LOWER': 'vigorously'}]

matcher.add('Swimming', None, pattern)


# Create a list of matches called "found_matches" and print the list:

found_matches = matcher(doc)
print(found_matches)


print(doc[1265:1290])


print(doc[3600:3615])


for sent in sents:
    if found_matches[0][1] < sent.end:
        print(sent)
        break


for sent in sents:
    if found_matches[1][1] < sent.end:
        print(sent)
        break