django for production of my recommender system

Question

I have written an content_based recommender system in python3 using the data from a mysql database. Now i have to use django for production so that I need not to take input each time new articles are added in the database. How to convert this python code into django production. i will connect the database with django database connections. I am really confused how to write this code in django?

my_recommender_system

import pandas as pd
import re
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
stop = set(stopwords.words('english'))
from string import punctuation
import functools
from matplotlib import pyplot as plt
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english", ignore_stopwords =True)
from tqdm import tqdm_notebook
tqdm_notebook().pandas()
import numpy as np
import math
from sklearn.metrics.pairwise import linear_kernel
#import text

from collections import Counter
df = pd.read_csv('target.csv')
df = df.loc[:,['id','combined_text']].astype(str)
df["combined_text"] = df["combined_text"].apply(lambda x: ' '.join(pd.unique(x.split())))
df.combined_text = df.combined_text.apply(lambda x: x.lower())   
df.combined_text = df.combined_text.str.replace('[^\w\s]',' ')
df['combined_text'] = df['combined_text'].str.replace('\d+', ' ')
df.combined_text = df.combined_text.str.replace('nbsp?' , ' ')
#df.combined_text = df.combined_text.str.replace('nan?' , ' ')
df.combined_text = df.combined_text.str.replace('value?' , ' ')
df = df.dropna(subset = ['combined_text'])
df.combined_text = df.combined_text.str.replace('\s+', ' ') 
#df.combined_text.map(len).hist(figsize=(15, 5), bins=100)
df = df[(df.combined_text.map(len) > 600)]
df.reset_index(inplace=True, drop=True)

#df1 = df[(df.combined_text.map(len) > 7500)]
stop_words = []

f = open('stopwords.txt', 'r')
for l in f.readlines():
    stop_words.append(l.replace('
', ''))
    
additional_stop_words = ['t','aah','aap','don','doesn','isn','ve','ll','add', 'ndash','will','nan','q','article','lsquo','rsquo','ldquo','rdquo','personalised','please','read','download','app','here','more','experience','based','explore','bull','fact','myth','ndash','middot','lifestage','entire','collection','articles','reading','website','android','phone','a','zero']
stop_words += additional_stop_words
stop_words = list(filter(None, stop_words))
#print(len(stop_words))

def _removeNonAscii(s): 
    return "".join(i for i in s if ord(i)<128)

def clean_text(text):
    text = text.lower()
    text = re.sub(r"what's", "what is ", text)
    text = text.replace('(ap)', '')
    text = re.sub(r"\'s", " is ", text)
    text = re.sub(r"\'ve", " have ", text)
    text = re.sub(r"can't", "cannot ", text)
    text = re.sub(r"n't", " not ", text)
    text = re.sub(r"i'm", "i am ", text)
    text = re.sub(r"\'re", " are ", text)
    text = re.sub(r"\'d", " would ", text)
    text = re.sub(r"\'ll", " will ", text)
    text = re.sub(r'\W+', ' ', text)
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r"\", "", text)
    text = re.sub(r"\'", "", text)    
    text = re.sub(r"\"", "", text)
    text = re.sub('[^a-zA-Z ?!]+', '', text)
    text = _removeNonAscii(text)
    text = text.strip()
    return text

def tokenizer(text):
    text = clean_text(text)    
    tokens = [word_tokenize(sent) for sent in sent_tokenize(text)]
    tokens = list(functools.reduce(lambda x,y: x+y, tokens))
    tokens = list(filter(lambda token: token not in (stop_words + list(punctuation)) , tokens))
    return tokens

#df['combined_text'] = df['combined_text'].map(lambda d: str.encode(d.decode('utf-8')))
    
df['tokens'] = ''
df['tokens'] = df['combined_text'].progress_map(lambda d: tokenizer(d))
df['text_stemmed']=df['tokens'].apply(lambda x : [stemmer.stem(y) for y in x])
df['text_stemmed_sentence']=df['text_stemmed'].apply(lambda x : " ".join(x))
df['stemmed_tokens'] = df['text_stemmed_sentence'].progress_map(lambda d: tokenizer(d))
df = df[['id','text_stemmed_sentence','stemmed_tokens']]
# =============================================================================
# for descripition, tokens in zip(df['combined_text'].head(5), df['tokens'].head(5)):
#     print('description:', descripition)
#     print('tokens:', tokens)
#     print()
#     
# =============================================================================
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(min_df=5, analyzer='word', ngram_range=(1, 2), stop_words='english')
vz = vectorizer.fit_transform(list(df['stemmed_tokens'].map(lambda tokens: ' '.join(tokens))))
cosine_similarities = linear_kernel(vz,vz)
articlesRecommend = pd.DataFrame(cosine_similarities, columns = df.id, index = df.id)
y = np.array([articlesRecommend[c].nlargest(10).index.values for c in articlesRecommend])
articles_df = pd.DataFrame(data = y, index = articlesRecommend.columns)

django for production of my recommender system

my_recommender_system

Answers (1)

Related Questions