diff --git a/News_category_project/.ipynb_checkpoints/News_cat-checkpoint.ipynb b/News_category_project/.ipynb_checkpoints/News_cat-checkpoint.ipynb
new file mode 100644
index 0000000..fc54971
--- /dev/null
+++ b/News_category_project/.ipynb_checkpoints/News_cat-checkpoint.ipynb
@@ -0,0 +1,2856 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# News Category Finder using NLP\n",
+    "\n",
+    "\n",
+    "Please check the test, train and sample submission files\n",
+    "#### Importing Libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nltk"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train = pd.read_excel('Data_Train.xlsx', sheet_name='Sheet1')\n",
+    "test = pd.read_excel('Data_Test.xlsx', sheet_name='Sheet1')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>STORY</th>\n",
+       "      <th>SECTION</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>But the most painful was the huge reversal in ...</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>How formidable is the opposition alliance amon...</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Most Asian currencies were trading lower today...</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>If you want to answer any question, click on ‘...</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>In global markets, gold prices edged up today ...</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               STORY  SECTION\n",
+       "0  But the most painful was the huge reversal in ...        3\n",
+       "1  How formidable is the opposition alliance amon...        0\n",
+       "2  Most Asian currencies were trading lower today...        3\n",
+       "3  If you want to answer any question, click on ‘...        1\n",
+       "4  In global markets, gold prices edged up today ...        3"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>STORY</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2019 will see gadgets like gaming smartphones ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>It has also unleashed a wave of changes in the...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>It can be confusing to pick the right smartpho...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>The mobile application is integrated with a da...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>We have rounded up some of the gadgets that sh...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               STORY\n",
+       "0  2019 will see gadgets like gaming smartphones ...\n",
+       "1  It has also unleashed a wave of changes in the...\n",
+       "2  It can be confusing to pick the right smartpho...\n",
+       "3  The mobile application is integrated with a da...\n",
+       "4  We have rounded up some of the gadgets that sh..."
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SECTION</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>7628.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>1.357892</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>0.999341</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>2.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>3.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           SECTION\n",
+       "count  7628.000000\n",
+       "mean      1.357892\n",
+       "std       0.999341\n",
+       "min       0.000000\n",
+       "25%       1.000000\n",
+       "50%       1.000000\n",
+       "75%       2.000000\n",
+       "max       3.000000"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"4\" halign=\"left\">STORY</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th>count</th>\n",
+       "      <th>unique</th>\n",
+       "      <th>top</th>\n",
+       "      <th>freq</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SECTION</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1686</td>\n",
+       "      <td>1673</td>\n",
+       "      <td>This story has been published from a wire agen...</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2772</td>\n",
+       "      <td>2731</td>\n",
+       "      <td>This story has been published from a wire agen...</td>\n",
+       "      <td>13</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1924</td>\n",
+       "      <td>1914</td>\n",
+       "      <td>The consensus reads, “Exciting, entertaining, ...</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1246</td>\n",
+       "      <td>1233</td>\n",
+       "      <td>This story has been published from a wire agen...</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        STORY                                                               \n",
+       "        count unique                                                top freq\n",
+       "SECTION                                                                     \n",
+       "0        1686   1673  This story has been published from a wire agen...    4\n",
+       "1        2772   2731  This story has been published from a wire agen...   13\n",
+       "2        1924   1914  The consensus reads, “Exciting, entertaining, ...    3\n",
+       "3        1246   1233  This story has been published from a wire agen...   11"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train.groupby('SECTION').describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Tokenize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nltk.tokenize import word_tokenize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nltk.corpus import stopwords"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "running\n",
+      "run\n"
+     ]
+    }
+   ],
+   "source": [
+    "from nltk.stem.wordnet import WordNetLemmatizer \n",
+    "lem = WordNetLemmatizer()\n",
+    "\n",
+    "from nltk.stem.porter import PorterStemmer \n",
+    "stem = PorterStemmer()\n",
+    "\n",
+    "word = \"running\" \n",
+    "print(lem.lemmatize(word))\n",
+    "\n",
+    "print(stem.stem(word))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nltk.stem.wordnet import WordNetLemmatizer \n",
+    "from nltk.stem.porter import PorterStemmer \n",
+    "lem = WordNetLemmatizer()\n",
+    "stem = PorterStemmer()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Processing Tokens"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import string\n",
+    "\n",
+    "def text_process(mess):\n",
+    "    \"\"\"\n",
+    "    Takes in a string of text, then performs the following:\n",
+    "    1. Remove all punctuation\n",
+    "    2. Remove all stopwords\n",
+    "    3. Returns a list of the cleaned text\n",
+    "    \"\"\"\n",
+    "    # Check characters to see if they are in punctuation\n",
+    "    nopunc = [char for char in mess if char not in string.punctuation]\n",
+    "\n",
+    "    # Join the characters again to form the string.\n",
+    "    nopunc = ''.join(nopunc)\n",
+    "    \n",
+    "    token = word_tokenize(nopunc)\n",
+    "    \n",
+    "    # Now just remove any stopwords\n",
+    "    no_noise = [word for word in token if word.lower() not in (stopwords.words('english') and ['\"',\"'\",'`','”','“'])]\n",
+    "    # Stemming\n",
+    "    \n",
+    "    \n",
+    "    return no_noise\n",
+    "#[stem.stem(word.lower()) for word in no_noise]\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0       [painful, huge, reversal, fee, income, unheard...\n",
+       "1       [formidable, opposition, alliance, among, Cong...\n",
+       "2       [Asian, currencies, trading, lower, today, Sou...\n",
+       "3       [want, answer, question, click, ‘, Answer, ’, ...\n",
+       "4       [global, markets, gold, prices, edged, today, ...\n",
+       "5       [BEIJING, Chinese, tech, giant, Huawei, announ...\n",
+       "6       [Mumbai, India, Incs, external, commercial, bo...\n",
+       "7       [Wednesday, Federal, Reserve, Chairman, Jerome...\n",
+       "8       [give, audience, already, done, Yeh, Hai, Aash...\n",
+       "9       [com, Arbaaz, Khan, spoke, getting, back, Daba...\n",
+       "10      [“, One, would, think, development, testing, p...\n",
+       "11      [far, year, rupee, gained, 07, foreign, invest...\n",
+       "12      [Xiaomi, however, sees, presence, Jio, rural, ...\n",
+       "13      [ad, reads, bells, whistles, Bezel, notch, app...\n",
+       "14      [Tuesday, Powell, said, healthy, US, economy, ...\n",
+       "15      [feature, help, make, display, responsive, int...\n",
+       "16      [TikTok, popular, among, children, facing, cri...\n",
+       "17      [company, hive, ratings, business, whollyowned...\n",
+       "18      [chooses, hide, CP, colleagues, move, mother, ...\n",
+       "19      [’, right, opera, house, simply, goes, show, A...\n",
+       "20      [Facebook, said, eligible, creators, would, ab...\n",
+       "21      [Starring, Varun, Dhawan, Alia, Bhatt, Sonaksh...\n",
+       "22      [GKN, Securities, barred, misuse, socalled, da...\n",
+       "23      [Fintech, startup, Zeta, cofounded, Bhavin, Tu...\n",
+       "24      [story, published, wire, agency, feed, without...\n",
+       "25      [Globally, established, companies, Stratasys, ...\n",
+       "26      [statements, Yeddyurappa, says, air, strikes, ...\n",
+       "27      [NDA, seeks, reelection, agriculture, form, im...\n",
+       "28      [Yeddyurappa, said, IAF, air, strikes, would, ...\n",
+       "29      [”, two, releases, year, far, Milan, Talkies, ...\n",
+       "                              ...                        \n",
+       "7598    [“, TDP, party, ideology, gotten, sidelined, e...\n",
+       "7599    [far, whenever, ’, reviewed, Kindles, Paperwhi...\n",
+       "7600    [day, markets, saw, high, volatility, followin...\n",
+       "7601    [Today, Gmail, allows, 15GB, free, storage, Us...\n",
+       "7602    [Aparajita, Sarangi, took, voluntary, retireme...\n",
+       "7603    [Investors, awaiting, economic, growth, data, ...\n",
+       "7604    [advice, online, survivalists, moves, unpopula...\n",
+       "7605    [time, developers, disguising, app, pretend, c...\n",
+       "7606    [Lok, Sabha, elections, 2019, Fifth, phase, vo...\n",
+       "7607    [watchdog, passed, five, separate, orders, tog...\n",
+       "7608    [Twitter, post, last, week, OnePlus, confirmed...\n",
+       "7609    [iOSonly, email, client, named, Spark, launche...\n",
+       "7610    [said, question, really, spend, ₹10000, portab...\n",
+       "7611    [’, want, kind, movie, felt, done, similar, ki...\n",
+       "7612    [1999, film, Mother, received, Best, Foreign, ...\n",
+       "7613    [Mohan, Babu, considered, political, heavyweig...\n",
+       "7614    [SP, 500, opened, higher, 126, points, 004, 28...\n",
+       "7615    [However, reports, suggest, would, another, sm...\n",
+       "7616    [Mumbai, Indian, stocks, rose, key, indices, e...\n",
+       "7617    [Sure, others, slightly, faster, slightly, sha...\n",
+       "7618    [147, million, pixels, one, billion, colours, ...\n",
+       "7619    [BJD, supporter, Puri, Congress, leaders, well...\n",
+       "7620    [Bollywood, celebrities, took, social, media, ...\n",
+       "7621    [However, confirmation, developers, games, wou...\n",
+       "7622    [terms, optics, back, Redmi, Note, 7, boasts, ...\n",
+       "7623    [Karnataka, Congress, bastion, also, gave, BJP...\n",
+       "7624    [film, also, features, Janhvi, Kapoor, revolve...\n",
+       "7625    [database, created, bringing, together, crimin...\n",
+       "7626    [state, uneasy, relationship, mainland, since,...\n",
+       "7627    [Virus, stars, Kunchacko, Boban, Tovino, Thoma...\n",
+       "Name: STORY, Length: 7628, dtype: object"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train['STORY'].apply(text_process)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Bag of words"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_extraction.text import CountVectorizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bow_transformer = CountVectorizer(analyzer=text_process).fit(train['STORY'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "44346\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(bow_transformer.vocabulary_))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  (0, 98)\t1\n",
+      "  (0, 458)\t1\n",
+      "  (0, 4354)\t1\n",
+      "  (0, 6127)\t1\n",
+      "  (0, 7903)\t1\n",
+      "  (0, 17114)\t1\n",
+      "  (0, 19913)\t1\n",
+      "  (0, 20711)\t1\n",
+      "  (0, 21236)\t1\n",
+      "  (0, 22284)\t1\n",
+      "  (0, 23796)\t1\n",
+      "  (0, 24786)\t1\n",
+      "  (0, 25194)\t1\n",
+      "  (0, 25607)\t1\n",
+      "  (0, 26393)\t1\n",
+      "  (0, 26400)\t1\n",
+      "  (0, 26889)\t1\n",
+      "  (0, 27416)\t1\n",
+      "  (0, 28063)\t1\n",
+      "  (0, 28740)\t2\n",
+      "  (0, 28793)\t2\n",
+      "  (0, 29335)\t1\n",
+      "  (0, 32289)\t3\n",
+      "  (0, 33750)\t1\n",
+      "  (0, 34213)\t1\n",
+      "  (0, 34771)\t1\n",
+      "  (0, 35747)\t1\n",
+      "  (0, 37546)\t1\n",
+      "  (0, 37647)\t1\n",
+      "  (0, 39826)\t1\n",
+      "  (0, 41132)\t2\n",
+      "  (0, 42868)\t1\n",
+      "(1, 44346)\n"
+     ]
+    }
+   ],
+   "source": [
+    "bow4 = bow_transformer.transform([train['STORY'][4]])\n",
+    "print(bow4)   # vectors pointing from origin\n",
+    "print(bow4.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "markets\n",
+      "stock\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(bow_transformer.get_feature_names()[32289])\n",
+    "print(bow_transformer.get_feature_names()[39826])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "messages_bow = bow_transformer.transform(train['STORY'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Shape of Sparse Matrix:  (7628, 44346)\n",
+      "Amount of Non-Zero occurences:  417825\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('Shape of Sparse Matrix: ', messages_bow.shape)\n",
+    "print('Amount of Non-Zero occurences: ', messages_bow.nnz)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sparsity: 0.12351772521704532\n"
+     ]
+    }
+   ],
+   "source": [
+    "sparsity = (100.0 * messages_bow.nnz / (messages_bow.shape[0] * messages_bow.shape[1]))\n",
+    "\n",
+    "print(f'sparsity: {sparsity}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  (0, 42868)\t0.1653840117317503\n",
+      "  (0, 41132)\t0.23264193296874802\n",
+      "  (0, 39826)\t0.12464706628982292\n",
+      "  (0, 37647)\t0.13059926000708844\n",
+      "  (0, 37546)\t0.13533155309101896\n",
+      "  (0, 35747)\t0.11016931263373622\n",
+      "  (0, 34771)\t0.11197548409940615\n",
+      "  (0, 34213)\t0.17507399610069765\n",
+      "  (0, 33750)\t0.2329734713609702\n",
+      "  (0, 32289)\t0.3411710195006078\n",
+      "  (0, 29335)\t0.14164721721626908\n",
+      "  (0, 28793)\t0.2878801405286177\n",
+      "  (0, 28740)\t0.21695807780745277\n",
+      "  (0, 28063)\t0.1342731302228393\n",
+      "  (0, 27416)\t0.1771020108713445\n",
+      "  (0, 26889)\t0.13365789947306533\n",
+      "  (0, 26400)\t0.20124735910228528\n",
+      "  (0, 26393)\t0.1288535100738698\n",
+      "  (0, 25607)\t0.18575848290189373\n",
+      "  (0, 25194)\t0.20712745650428582\n",
+      "  (0, 24786)\t0.09579350098100424\n",
+      "  (0, 23796)\t0.130962730415073\n",
+      "  (0, 22284)\t0.14220199953463913\n",
+      "  (0, 21236)\t0.09531611135230537\n",
+      "  (0, 20711)\t0.18575848290189373\n",
+      "  (0, 19913)\t0.14959653531880082\n",
+      "  (0, 17114)\t0.17929921522347367\n",
+      "  (0, 7903)\t0.15233170401746512\n",
+      "  (0, 6127)\t0.11539264864412509\n",
+      "  (0, 4354)\t0.14394007026430886\n",
+      "  (0, 458)\t0.2436579581621663\n",
+      "  (0, 98)\t0.16899942411275612\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.feature_extraction.text import TfidfTransformer\n",
+    "\n",
+    "tfidf_transformer = TfidfTransformer().fit(messages_bow)\n",
+    "\n",
+    "# TEST\n",
+    "tfidf4 = tfidf_transformer.transform(bow4)\n",
+    "print(tfidf4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "7.74248747675401\n",
+      "8.33027414165613\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tfidf_transformer.idf_[bow_transformer.vocabulary_['u']])\n",
+    "print(tfidf_transformer.idf_[bow_transformer.vocabulary_['university']])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(7628, 44346)\n"
+     ]
+    }
+   ],
+   "source": [
+    "messages_tfidf = tfidf_transformer.transform(messages_bow)\n",
+    "print(messages_tfidf.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<7628x44346 sparse matrix of type '<class 'numpy.float64'>'\n",
+       "\twith 417825 stored elements in Compressed Sparse Row format>"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "messages_tfidf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#classifier\n",
+    "from sklearn.naive_bayes import MultinomialNB  \n",
+    "category_detect_model = MultinomialNB().fit(messages_tfidf, train['SECTION'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "predicted: 3\n",
+      "expected: 1\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('predicted:', category_detect_model.predict(tfidf4)[0])\n",
+    "print('expected:', train['SECTION'][3])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[3 0 3 ... 1 0 2]\n"
+     ]
+    }
+   ],
+   "source": [
+    "all_predictions = category_detect_model.predict(messages_tfidf)\n",
+    "print(all_predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.97      0.97      0.97      1686\n",
+      "           1       0.95      1.00      0.97      2772\n",
+      "           2       1.00      0.96      0.98      1924\n",
+      "           3       1.00      0.95      0.97      1246\n",
+      "\n",
+      "    accuracy                           0.97      7628\n",
+      "   macro avg       0.98      0.97      0.97      7628\n",
+      "weighted avg       0.97      0.97      0.97      7628\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import classification_report\n",
+    "print (classification_report(train['SECTION'], all_predictions))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Using Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.pipeline import Pipeline\n",
+    "\n",
+    "pipeline = Pipeline([\n",
+    "    ('bow', CountVectorizer(analyzer=text_process)),  # strings to token integer counts\n",
+    "    ('tfidf', TfidfTransformer()),  # integer counts to weighted TF-IDF scores\n",
+    "    ('classifier', MultinomialNB()),  # train on TF-IDF vectors w/ Naive Bayes classifier\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Pipeline(memory=None,\n",
+       "         steps=[('bow',\n",
+       "                 CountVectorizer(analyzer=<function text_process at 0x00000270C96390D0>,\n",
+       "                                 binary=False, decode_error='strict',\n",
+       "                                 dtype=<class 'numpy.int64'>, encoding='utf-8',\n",
+       "                                 input='content', lowercase=True, max_df=1.0,\n",
+       "                                 max_features=None, min_df=1,\n",
+       "                                 ngram_range=(1, 1), preprocessor=None,\n",
+       "                                 stop_words=None, strip_accents=None,\n",
+       "                                 token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
+       "                                 tokenizer=None, vocabulary=None)),\n",
+       "                ('tfidf',\n",
+       "                 TfidfTransformer(norm='l2', smooth_idf=True,\n",
+       "                                  sublinear_tf=False, use_idf=True)),\n",
+       "                ('classifier',\n",
+       "                 MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))],\n",
+       "         verbose=False)"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline.fit(train['STORY'],train['SECTION'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1 2 1 ... 1 0 1]\n"
+     ]
+    }
+   ],
+   "source": [
+    "predictions = pipeline.predict(test['STORY'])\n",
+    "print(predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SECTION</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2718</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2719</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2720</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2721</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2722</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2723</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2724</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2725</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2726</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2727</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2728</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2729</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2730</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2731</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2732</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2733</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2734</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2735</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2736</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2737</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2738</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2739</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2740</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2741</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2742</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2743</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2744</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2745</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2746</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2747</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2748 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      SECTION\n",
+       "0           1\n",
+       "1           2\n",
+       "2           1\n",
+       "3           1\n",
+       "4           1\n",
+       "5           1\n",
+       "6           1\n",
+       "7           2\n",
+       "8           1\n",
+       "9           2\n",
+       "10          0\n",
+       "11          3\n",
+       "12          2\n",
+       "13          1\n",
+       "14          2\n",
+       "15          1\n",
+       "16          1\n",
+       "17          2\n",
+       "18          3\n",
+       "19          2\n",
+       "20          2\n",
+       "21          1\n",
+       "22          2\n",
+       "23          0\n",
+       "24          0\n",
+       "25          2\n",
+       "26          2\n",
+       "27          3\n",
+       "28          3\n",
+       "29          0\n",
+       "...       ...\n",
+       "2718        0\n",
+       "2719        2\n",
+       "2720        3\n",
+       "2721        1\n",
+       "2722        0\n",
+       "2723        1\n",
+       "2724        2\n",
+       "2725        0\n",
+       "2726        1\n",
+       "2727        2\n",
+       "2728        1\n",
+       "2729        1\n",
+       "2730        3\n",
+       "2731        1\n",
+       "2732        3\n",
+       "2733        1\n",
+       "2734        0\n",
+       "2735        3\n",
+       "2736        0\n",
+       "2737        1\n",
+       "2738        1\n",
+       "2739        1\n",
+       "2740        3\n",
+       "2741        0\n",
+       "2742        0\n",
+       "2743        1\n",
+       "2744        1\n",
+       "2745        1\n",
+       "2746        0\n",
+       "2747        1\n",
+       "\n",
+       "[2748 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "output = pd.DataFrame(predictions,columns=['SECTION'])\n",
+    "output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output.to_excel('output.xlsx',sheet_name='Sheet1',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SECTION</th>\n",
+       "      <th>STORY</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2019 will see gadgets like gaming smartphones ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>It has also unleashed a wave of changes in the...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>It can be confusing to pick the right smartpho...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>The mobile application is integrated with a da...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>We have rounded up some of the gadgets that sh...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "      <td>\"Imagine if every message you sent was kept wi...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Positioned along the four sides of the Asus RO...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>2</td>\n",
+       "      <td>In fact, when I applied to USC film school the...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "      <td>As spotted by Android Police, Netflix is testi...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Her moves were immaculately choreographed as s...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>0</td>\n",
+       "      <td>The NCP leadership was under tremendous pressu...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>3</td>\n",
+       "      <td>On the traded volume front, 48.67 lakh shares ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>2</td>\n",
+       "      <td>They wrote, “Welcoming Makkal Selvan Vijay Set...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>1</td>\n",
+       "      <td>The back of the phone features a 12MP+13MP AI ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Talking about how the pressure got to her at o...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>1</td>\n",
+       "      <td>The Xiaomi Play is expected to have a CPU runn...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>1</td>\n",
+       "      <td>In January 2019, the telecom industry added 21...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Apart from the finalists, the grand finale als...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>3</td>\n",
+       "      <td>\"We expect (a) slew of REIT IPOs to hit the ma...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>2</td>\n",
+       "      <td>According to the same report, Avengers Endgame...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>2</td>\n",
+       "      <td>“Wounded by Lannister riders, they will seek r...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>1</td>\n",
+       "      <td>With so much hatred around, through the show, ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>2</td>\n",
+       "      <td>The Force Awakens, incidentally, wind up as th...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>0</td>\n",
+       "      <td>The two have been taking potshots at each othe...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Thakur said “they (authorities) wanted to forc...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>2</td>\n",
+       "      <td>“It’s seriously delightful that our new Dracul...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>2</td>\n",
+       "      <td>(Photo: Diljit Dosanjh/Instagram)Diljit Dosanj...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Some respite came from the 6% growth in utilit...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>3</td>\n",
+       "      <td>The outlook for India's rupee has deteriorated...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>0</td>\n",
+       "      <td>However, political analysts said that it will ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2718</th>\n",
+       "      <td>0</td>\n",
+       "      <td>The party renominated its sitting MP Ranjet Ra...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2719</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Was there a moment you were bullied?I was very...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2720</th>\n",
+       "      <td>3</td>\n",
+       "      <td>In Delhi, gold of 99.9% and 99.5% purities fel...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2721</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Snapchat's controversial and criticised redesi...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2722</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Of the eight states that faced single-phase po...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2723</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Facebook pulled 513 Pages, Groups and accounts...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2724</th>\n",
+       "      <td>2</td>\n",
+       "      <td>One of the most popular female actors in the 1...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2725</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Two Telugu TV actresses died in a road acciden...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2726</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Google CEO Sundar Pichai introduced Duplex ear...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2727</th>\n",
+       "      <td>2</td>\n",
+       "      <td>”Priyanka was replaced by Katrina Kaif, who wo...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2728</th>\n",
+       "      <td>1</td>\n",
+       "      <td>The Chinese smartphone manufacturer today intr...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2729</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Samsung, the world’s largest smartphone maker,...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2730</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Reliance Securities has revised TCS target pri...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2731</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Music streaming may not be novel, but it’s sti...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2732</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Ericsson India Pvt. Ltd had moved the Supreme ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2733</th>\n",
+       "      <td>1</td>\n",
+       "      <td>On the other hand, the hotel and airline loyal...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2734</th>\n",
+       "      <td>0</td>\n",
+       "      <td>The Narendra Modi government simply finished t...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2735</th>\n",
+       "      <td>3</td>\n",
+       "      <td>The yield on 10-year Treasuries climbed two ba...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2736</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Section 126 of the Representation of People Ac...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2737</th>\n",
+       "      <td>1</td>\n",
+       "      <td>With these techniques, machines are also learn...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2738</th>\n",
+       "      <td>1</td>\n",
+       "      <td>The traffic challan payment is also restricted...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2739</th>\n",
+       "      <td>1</td>\n",
+       "      <td>“We are not like Western countries, where peop...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2740</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Further, SBICAP Securities says delivery of th...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2741</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Two, Raj Thackeray’s Maharashtra Navnirman Sen...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2742</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Senior leaders of the BJP are using the sugges...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2743</th>\n",
+       "      <td>1</td>\n",
+       "      <td>According to researchers, fraud in the mobile ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2744</th>\n",
+       "      <td>1</td>\n",
+       "      <td>The iPhone XS and XS Max share the Apple A12 c...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2745</th>\n",
+       "      <td>1</td>\n",
+       "      <td>On the photography front, the Note 5 Pro featu...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2746</th>\n",
+       "      <td>0</td>\n",
+       "      <td>UDAY mandated that discoms bring the gap betwe...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2747</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Ripple also helps bank customers send money to...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2748 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      SECTION                                              STORY\n",
+       "0           1  2019 will see gadgets like gaming smartphones ...\n",
+       "1           2  It has also unleashed a wave of changes in the...\n",
+       "2           1  It can be confusing to pick the right smartpho...\n",
+       "3           1  The mobile application is integrated with a da...\n",
+       "4           1  We have rounded up some of the gadgets that sh...\n",
+       "5           1  \"Imagine if every message you sent was kept wi...\n",
+       "6           1  Positioned along the four sides of the Asus RO...\n",
+       "7           2  In fact, when I applied to USC film school the...\n",
+       "8           1  As spotted by Android Police, Netflix is testi...\n",
+       "9           2  Her moves were immaculately choreographed as s...\n",
+       "10          0  The NCP leadership was under tremendous pressu...\n",
+       "11          3  On the traded volume front, 48.67 lakh shares ...\n",
+       "12          2  They wrote, “Welcoming Makkal Selvan Vijay Set...\n",
+       "13          1  The back of the phone features a 12MP+13MP AI ...\n",
+       "14          2  Talking about how the pressure got to her at o...\n",
+       "15          1  The Xiaomi Play is expected to have a CPU runn...\n",
+       "16          1  In January 2019, the telecom industry added 21...\n",
+       "17          2  Apart from the finalists, the grand finale als...\n",
+       "18          3  \"We expect (a) slew of REIT IPOs to hit the ma...\n",
+       "19          2  According to the same report, Avengers Endgame...\n",
+       "20          2  “Wounded by Lannister riders, they will seek r...\n",
+       "21          1  With so much hatred around, through the show, ...\n",
+       "22          2  The Force Awakens, incidentally, wind up as th...\n",
+       "23          0  The two have been taking potshots at each othe...\n",
+       "24          0  Thakur said “they (authorities) wanted to forc...\n",
+       "25          2  “It’s seriously delightful that our new Dracul...\n",
+       "26          2  (Photo: Diljit Dosanjh/Instagram)Diljit Dosanj...\n",
+       "27          3  Some respite came from the 6% growth in utilit...\n",
+       "28          3  The outlook for India's rupee has deteriorated...\n",
+       "29          0  However, political analysts said that it will ...\n",
+       "...       ...                                                ...\n",
+       "2718        0  The party renominated its sitting MP Ranjet Ra...\n",
+       "2719        2  Was there a moment you were bullied?I was very...\n",
+       "2720        3  In Delhi, gold of 99.9% and 99.5% purities fel...\n",
+       "2721        1  Snapchat's controversial and criticised redesi...\n",
+       "2722        0  Of the eight states that faced single-phase po...\n",
+       "2723        1  Facebook pulled 513 Pages, Groups and accounts...\n",
+       "2724        2  One of the most popular female actors in the 1...\n",
+       "2725        0  Two Telugu TV actresses died in a road acciden...\n",
+       "2726        1  Google CEO Sundar Pichai introduced Duplex ear...\n",
+       "2727        2  ”Priyanka was replaced by Katrina Kaif, who wo...\n",
+       "2728        1  The Chinese smartphone manufacturer today intr...\n",
+       "2729        1  Samsung, the world’s largest smartphone maker,...\n",
+       "2730        3  Reliance Securities has revised TCS target pri...\n",
+       "2731        1  Music streaming may not be novel, but it’s sti...\n",
+       "2732        3  Ericsson India Pvt. Ltd had moved the Supreme ...\n",
+       "2733        1  On the other hand, the hotel and airline loyal...\n",
+       "2734        0  The Narendra Modi government simply finished t...\n",
+       "2735        3  The yield on 10-year Treasuries climbed two ba...\n",
+       "2736        0  Section 126 of the Representation of People Ac...\n",
+       "2737        1  With these techniques, machines are also learn...\n",
+       "2738        1  The traffic challan payment is also restricted...\n",
+       "2739        1  “We are not like Western countries, where peop...\n",
+       "2740        3  Further, SBICAP Securities says delivery of th...\n",
+       "2741        0  Two, Raj Thackeray’s Maharashtra Navnirman Sen...\n",
+       "2742        0  Senior leaders of the BJP are using the sugges...\n",
+       "2743        1  According to researchers, fraud in the mobile ...\n",
+       "2744        1  The iPhone XS and XS Max share the Apple A12 c...\n",
+       "2745        1  On the photography front, the Note 5 Pro featu...\n",
+       "2746        0  UDAY mandated that discoms bring the gap betwe...\n",
+       "2747        1  Ripple also helps bank customers send money to...\n",
+       "\n",
+       "[2748 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "output['STORY'] = test['STORY']\n",
+    "output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVC\n",
+    "from sklearn.feature_extraction.text import TfidfTransformer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']}\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "grid = GridSearchCV(SVC(),param_grid,verbose=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.pipeline import Pipeline\n",
+    "\n",
+    "pipeline = Pipeline([\n",
+    "    ('bow', CountVectorizer(analyzer=text_process)),  # strings to token integer counts\n",
+    "    ('tfidf', TfidfTransformer()),  # integer counts to weighted TF-IDF scores\n",
+    "    ('classifier', grid),  # train on TF-IDF vectors w/ Naive Bayes classifier\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\Subham\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.\n",
+      "  warnings.warn(CV_WARNING, FutureWarning)\n",
+      "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fitting 3 folds for each of 25 candidates, totalling 75 fits\n",
+      "[CV] C=0.1, gamma=1, kernel=rbf ......................................\n",
+      "[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.561, total=  30.1s\n",
+      "[CV] C=0.1, gamma=1, kernel=rbf ......................................\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   30.0s remaining:    0.0s\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.561, total=  30.8s\n",
+      "[CV] C=0.1, gamma=1, kernel=rbf ......................................\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  1.0min remaining:    0.0s\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.560, total=  29.4s\n",
+      "[CV] C=0.1, gamma=0.1, kernel=rbf ....................................\n",
+      "[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.391, total=  29.9s\n",
+      "[CV] C=0.1, gamma=0.1, kernel=rbf ....................................\n",
+      "[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.385, total=  29.3s\n",
+      "[CV] C=0.1, gamma=0.1, kernel=rbf ....................................\n",
+      "[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.387, total=  29.3s\n",
+      "[CV] C=0.1, gamma=0.01, kernel=rbf ...................................\n",
+      "[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.363, total=  29.2s\n",
+      "[CV] C=0.1, gamma=0.01, kernel=rbf ...................................\n",
+      "[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.363, total=  28.8s\n",
+      "[CV] C=0.1, gamma=0.01, kernel=rbf ...................................\n",
+      "[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.363, total=  29.3s\n",
+      "[CV] C=0.1, gamma=0.001, kernel=rbf ..................................\n",
+      "[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.363, total=  28.4s\n",
+      "[CV] C=0.1, gamma=0.001, kernel=rbf ..................................\n",
+      "[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.363, total=  27.5s\n",
+      "[CV] C=0.1, gamma=0.001, kernel=rbf ..................................\n",
+      "[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.363, total=  27.8s\n",
+      "[CV] C=0.1, gamma=0.0001, kernel=rbf .................................\n",
+      "[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.363, total=  27.6s\n",
+      "[CV] C=0.1, gamma=0.0001, kernel=rbf .................................\n",
+      "[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.363, total=  27.5s\n",
+      "[CV] C=0.1, gamma=0.0001, kernel=rbf .................................\n",
+      "[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.363, total=  28.6s\n",
+      "[CV] C=1, gamma=1, kernel=rbf ........................................\n",
+      "[CV] ............ C=1, gamma=1, kernel=rbf, score=0.961, total=  26.9s\n",
+      "[CV] C=1, gamma=1, kernel=rbf ........................................\n",
+      "[CV] ............ C=1, gamma=1, kernel=rbf, score=0.956, total=  26.6s\n",
+      "[CV] C=1, gamma=1, kernel=rbf ........................................\n",
+      "[CV] ............ C=1, gamma=1, kernel=rbf, score=0.962, total=  26.6s\n",
+      "[CV] C=1, gamma=0.1, kernel=rbf ......................................\n",
+      "[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.940, total=  18.9s\n",
+      "[CV] C=1, gamma=0.1, kernel=rbf ......................................\n",
+      "[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.937, total=  18.7s\n",
+      "[CV] C=1, gamma=0.1, kernel=rbf ......................................\n",
+      "[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.941, total=  19.0s\n",
+      "[CV] C=1, gamma=0.01, kernel=rbf .....................................\n",
+      "[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.416, total=  27.8s\n",
+      "[CV] C=1, gamma=0.01, kernel=rbf .....................................\n",
+      "[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.417, total=  27.7s\n",
+      "[CV] C=1, gamma=0.01, kernel=rbf .....................................\n",
+      "[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.417, total=  27.9s\n",
+      "[CV] C=1, gamma=0.001, kernel=rbf ....................................\n",
+      "[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.363, total=  28.2s\n",
+      "[CV] C=1, gamma=0.001, kernel=rbf ....................................\n",
+      "[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.363, total=  28.2s\n",
+      "[CV] C=1, gamma=0.001, kernel=rbf ....................................\n",
+      "[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.363, total=  28.4s\n",
+      "[CV] C=1, gamma=0.0001, kernel=rbf ...................................\n",
+      "[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.363, total=  28.8s\n",
+      "[CV] C=1, gamma=0.0001, kernel=rbf ...................................\n",
+      "[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.363, total=  28.8s\n",
+      "[CV] C=1, gamma=0.0001, kernel=rbf ...................................\n",
+      "[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.363, total=  31.9s\n",
+      "[CV] C=10, gamma=1, kernel=rbf .......................................\n",
+      "[CV] ........... C=10, gamma=1, kernel=rbf, score=0.963, total=  31.7s\n",
+      "[CV] C=10, gamma=1, kernel=rbf .......................................\n",
+      "[CV] ........... C=10, gamma=1, kernel=rbf, score=0.958, total=  31.3s\n",
+      "[CV] C=10, gamma=1, kernel=rbf .......................................\n",
+      "[CV] ........... C=10, gamma=1, kernel=rbf, score=0.964, total=  28.3s\n",
+      "[CV] C=10, gamma=0.1, kernel=rbf .....................................\n",
+      "[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.972, total=  14.4s\n",
+      "[CV] C=10, gamma=0.1, kernel=rbf .....................................\n",
+      "[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.965, total=  14.5s\n",
+      "[CV] C=10, gamma=0.1, kernel=rbf .....................................\n",
+      "[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.970, total=  14.5s\n",
+      "[CV] C=10, gamma=0.01, kernel=rbf ....................................\n",
+      "[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.947, total=  18.0s\n",
+      "[CV] C=10, gamma=0.01, kernel=rbf ....................................\n",
+      "[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.942, total=  17.7s\n",
+      "[CV] C=10, gamma=0.01, kernel=rbf ....................................\n",
+      "[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.945, total=  18.1s\n",
+      "[CV] C=10, gamma=0.001, kernel=rbf ...................................\n",
+      "[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.419, total=  27.9s\n",
+      "[CV] C=10, gamma=0.001, kernel=rbf ...................................\n",
+      "[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.421, total=  27.9s\n",
+      "[CV] C=10, gamma=0.001, kernel=rbf ...................................\n",
+      "[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.420, total=  29.2s\n",
+      "[CV] C=10, gamma=0.0001, kernel=rbf ..................................\n",
+      "[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.363, total=  28.3s\n",
+      "[CV] C=10, gamma=0.0001, kernel=rbf ..................................\n",
+      "[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.363, total=  28.5s\n",
+      "[CV] C=10, gamma=0.0001, kernel=rbf ..................................\n",
+      "[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.363, total=  28.2s\n",
+      "[CV] C=100, gamma=1, kernel=rbf ......................................\n",
+      "[CV] .......... C=100, gamma=1, kernel=rbf, score=0.963, total=  27.5s\n",
+      "[CV] C=100, gamma=1, kernel=rbf ......................................\n",
+      "[CV] .......... C=100, gamma=1, kernel=rbf, score=0.958, total=  27.2s\n",
+      "[CV] C=100, gamma=1, kernel=rbf ......................................\n",
+      "[CV] .......... C=100, gamma=1, kernel=rbf, score=0.964, total=  27.5s\n",
+      "[CV] C=100, gamma=0.1, kernel=rbf ....................................\n",
+      "[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.972, total=  14.2s\n",
+      "[CV] C=100, gamma=0.1, kernel=rbf ....................................\n",
+      "[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.964, total=  14.5s\n",
+      "[CV] C=100, gamma=0.1, kernel=rbf ....................................\n",
+      "[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.971, total=  14.6s\n",
+      "[CV] C=100, gamma=0.01, kernel=rbf ...................................\n",
+      "[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.972, total=  13.6s\n",
+      "[CV] C=100, gamma=0.01, kernel=rbf ...................................\n",
+      "[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.965, total=  13.7s\n",
+      "[CV] C=100, gamma=0.01, kernel=rbf ...................................\n",
+      "[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.970, total=  14.5s\n",
+      "[CV] C=100, gamma=0.001, kernel=rbf ..................................\n",
+      "[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.948, total=  18.3s\n",
+      "[CV] C=100, gamma=0.001, kernel=rbf ..................................\n",
+      "[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.943, total=  17.7s\n",
+      "[CV] C=100, gamma=0.001, kernel=rbf ..................................\n",
+      "[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.946, total=  18.1s\n",
+      "[CV] C=100, gamma=0.0001, kernel=rbf .................................\n",
+      "[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.419, total=  29.7s\n",
+      "[CV] C=100, gamma=0.0001, kernel=rbf .................................\n",
+      "[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.421, total=  28.2s\n",
+      "[CV] C=100, gamma=0.0001, kernel=rbf .................................\n",
+      "[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.420, total=  29.2s\n",
+      "[CV] C=1000, gamma=1, kernel=rbf .....................................\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.963, total=  28.3s\n",
+      "[CV] C=1000, gamma=1, kernel=rbf .....................................\n",
+      "[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.958, total=  27.6s\n",
+      "[CV] C=1000, gamma=1, kernel=rbf .....................................\n",
+      "[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.964, total=  28.6s\n",
+      "[CV] C=1000, gamma=0.1, kernel=rbf ...................................\n",
+      "[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.972, total=  15.1s\n",
+      "[CV] C=1000, gamma=0.1, kernel=rbf ...................................\n",
+      "[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.964, total=  15.9s\n",
+      "[CV] C=1000, gamma=0.1, kernel=rbf ...................................\n",
+      "[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.971, total=  15.3s\n",
+      "[CV] C=1000, gamma=0.01, kernel=rbf ..................................\n",
+      "[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.973, total=  14.4s\n",
+      "[CV] C=1000, gamma=0.01, kernel=rbf ..................................\n",
+      "[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.963, total=  14.1s\n",
+      "[CV] C=1000, gamma=0.01, kernel=rbf ..................................\n",
+      "[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.970, total=  14.4s\n",
+      "[CV] C=1000, gamma=0.001, kernel=rbf .................................\n",
+      "[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.972, total=  14.1s\n",
+      "[CV] C=1000, gamma=0.001, kernel=rbf .................................\n",
+      "[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.965, total=  14.2s\n",
+      "[CV] C=1000, gamma=0.001, kernel=rbf .................................\n",
+      "[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.971, total=  14.4s\n",
+      "[CV] C=1000, gamma=0.0001, kernel=rbf ................................\n",
+      "[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.948, total=  18.2s\n",
+      "[CV] C=1000, gamma=0.0001, kernel=rbf ................................\n",
+      "[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.943, total=  18.5s\n",
+      "[CV] C=1000, gamma=0.0001, kernel=rbf ................................\n",
+      "[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.946, total=  18.7s\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=1)]: Done  75 out of  75 | elapsed: 29.5min finished\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Pipeline(memory=None,\n",
+       "         steps=[('bow',\n",
+       "                 CountVectorizer(analyzer=<function text_process at 0x000001A3A6C5AF28>,\n",
+       "                                 binary=False, decode_error='strict',\n",
+       "                                 dtype=<class 'numpy.int64'>, encoding='utf-8',\n",
+       "                                 input='content', lowercase=True, max_df=1.0,\n",
+       "                                 max_features=None, min_df=1,\n",
+       "                                 ngram_range=(1, 1), preprocessor=None,\n",
+       "                                 stop_words=None, strip_accents=None,\n",
+       "                                 token_pattern='(?u)\\\\b\\\\w\\\\w...\n",
+       "                                            decision_function_shape='ovr',\n",
+       "                                            degree=3, gamma='auto_deprecated',\n",
+       "                                            kernel='rbf', max_iter=-1,\n",
+       "                                            probability=False,\n",
+       "                                            random_state=None, shrinking=True,\n",
+       "                                            tol=0.001, verbose=False),\n",
+       "                              iid='warn', n_jobs=None,\n",
+       "                              param_grid={'C': [0.1, 1, 10, 100, 1000],\n",
+       "                                          'gamma': [1, 0.1, 0.01, 0.001,\n",
+       "                                                    0.0001],\n",
+       "                                          'kernel': ['rbf']},\n",
+       "                              pre_dispatch='2*n_jobs', refit=True,\n",
+       "                              return_train_score=False, scoring=None,\n",
+       "                              verbose=3))],\n",
+       "         verbose=False)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline.fit(train['STORY'],train['SECTION'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SECTION</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2718</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2719</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2720</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2721</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2722</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2723</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2724</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2725</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2726</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2727</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2728</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2729</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2730</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2731</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2732</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2733</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2734</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2735</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2736</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2737</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2738</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2739</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2740</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2741</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2742</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2743</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2744</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2745</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2746</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2747</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2748 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      SECTION\n",
+       "0           1\n",
+       "1           2\n",
+       "2           1\n",
+       "3           1\n",
+       "4           1\n",
+       "5           1\n",
+       "6           1\n",
+       "7           2\n",
+       "8           1\n",
+       "9           2\n",
+       "10          0\n",
+       "11          3\n",
+       "12          2\n",
+       "13          1\n",
+       "14          2\n",
+       "15          1\n",
+       "16          3\n",
+       "17          2\n",
+       "18          3\n",
+       "19          2\n",
+       "20          2\n",
+       "21          2\n",
+       "22          2\n",
+       "23          0\n",
+       "24          0\n",
+       "25          2\n",
+       "26          2\n",
+       "27          3\n",
+       "28          3\n",
+       "29          0\n",
+       "...       ...\n",
+       "2718        0\n",
+       "2719        2\n",
+       "2720        3\n",
+       "2721        1\n",
+       "2722        0\n",
+       "2723        1\n",
+       "2724        2\n",
+       "2725        2\n",
+       "2726        1\n",
+       "2727        2\n",
+       "2728        1\n",
+       "2729        1\n",
+       "2730        3\n",
+       "2731        2\n",
+       "2732        3\n",
+       "2733        1\n",
+       "2734        0\n",
+       "2735        3\n",
+       "2736        0\n",
+       "2737        1\n",
+       "2738        1\n",
+       "2739        1\n",
+       "2740        3\n",
+       "2741        0\n",
+       "2742        0\n",
+       "2743        1\n",
+       "2744        1\n",
+       "2745        1\n",
+       "2746        0\n",
+       "2747        1\n",
+       "\n",
+       "[2748 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions = pipeline.predict(test['STORY'])\n",
+    "output_svm_quote = pd.DataFrame(predictions,columns=['SECTION'])\n",
+    "output_svm_quote"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SECTION</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2718</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2719</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2720</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2721</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2722</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2723</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2724</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2725</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2726</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2727</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2728</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2729</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2730</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2731</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2732</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2733</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2734</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2735</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2736</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2737</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2738</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2739</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2740</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2741</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2742</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2743</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2744</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2745</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2746</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2747</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2748 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      SECTION\n",
+       "0           1\n",
+       "1           2\n",
+       "2           1\n",
+       "3           1\n",
+       "4           1\n",
+       "5           1\n",
+       "6           1\n",
+       "7           2\n",
+       "8           1\n",
+       "9           2\n",
+       "10          0\n",
+       "11          3\n",
+       "12          2\n",
+       "13          1\n",
+       "14          2\n",
+       "15          1\n",
+       "16          3\n",
+       "17          2\n",
+       "18          3\n",
+       "19          2\n",
+       "20          2\n",
+       "21          2\n",
+       "22          2\n",
+       "23          0\n",
+       "24          0\n",
+       "25          2\n",
+       "26          2\n",
+       "27          3\n",
+       "28          3\n",
+       "29          0\n",
+       "...       ...\n",
+       "2718        0\n",
+       "2719        2\n",
+       "2720        3\n",
+       "2721        1\n",
+       "2722        0\n",
+       "2723        1\n",
+       "2724        2\n",
+       "2725        2\n",
+       "2726        1\n",
+       "2727        2\n",
+       "2728        1\n",
+       "2729        1\n",
+       "2730        3\n",
+       "2731        1\n",
+       "2732        3\n",
+       "2733        1\n",
+       "2734        0\n",
+       "2735        3\n",
+       "2736        0\n",
+       "2737        1\n",
+       "2738        1\n",
+       "2739        1\n",
+       "2740        3\n",
+       "2741        0\n",
+       "2742        0\n",
+       "2743        1\n",
+       "2744        1\n",
+       "2745        1\n",
+       "2746        0\n",
+       "2747        1\n",
+       "\n",
+       "[2748 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions = pipeline.predict(test['STORY'])\n",
+    "output_svm = pd.DataFrame(predictions,columns=['SECTION'])\n",
+    "output_svm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_svm_quote.to_excel('output_svm_quote.xlsx',sheet_name='Sheet1',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_svm_skim.to_excel('output_svm_skim.xlsx',sheet_name='Sheet1',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['sadva', 'vavb', 'is', 'what']"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "token = ['sadva','vavb','\"',\"'\",'`','is','what']\n",
+    "[word for word in token if word.lower() not in (stopwords.words('english') and ['\"',\"'\",'`','”','“'])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/News_category_project/Data_Test.xlsx b/News_category_project/Data_Test.xlsx
new file mode 100644
index 0000000..fd65f62
Binary files /dev/null and b/News_category_project/Data_Test.xlsx differ
diff --git a/News_category_project/Data_Train.xlsx b/News_category_project/Data_Train.xlsx
new file mode 100644
index 0000000..5f65921
Binary files /dev/null and b/News_category_project/Data_Train.xlsx differ
diff --git a/News_category_project/News_cat.ipynb b/News_category_project/News_cat.ipynb
new file mode 100644
index 0000000..fc54971
--- /dev/null
+++ b/News_category_project/News_cat.ipynb
@@ -0,0 +1,2856 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# News Category Finder using NLP\n",
+    "\n",
+    "\n",
+    "Please check the test, train and sample submission files\n",
+    "#### Importing Libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nltk"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train = pd.read_excel('Data_Train.xlsx', sheet_name='Sheet1')\n",
+    "test = pd.read_excel('Data_Test.xlsx', sheet_name='Sheet1')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>STORY</th>\n",
+       "      <th>SECTION</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>But the most painful was the huge reversal in ...</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>How formidable is the opposition alliance amon...</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Most Asian currencies were trading lower today...</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>If you want to answer any question, click on ‘...</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>In global markets, gold prices edged up today ...</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               STORY  SECTION\n",
+       "0  But the most painful was the huge reversal in ...        3\n",
+       "1  How formidable is the opposition alliance amon...        0\n",
+       "2  Most Asian currencies were trading lower today...        3\n",
+       "3  If you want to answer any question, click on ‘...        1\n",
+       "4  In global markets, gold prices edged up today ...        3"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>STORY</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2019 will see gadgets like gaming smartphones ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>It has also unleashed a wave of changes in the...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>It can be confusing to pick the right smartpho...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>The mobile application is integrated with a da...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>We have rounded up some of the gadgets that sh...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               STORY\n",
+       "0  2019 will see gadgets like gaming smartphones ...\n",
+       "1  It has also unleashed a wave of changes in the...\n",
+       "2  It can be confusing to pick the right smartpho...\n",
+       "3  The mobile application is integrated with a da...\n",
+       "4  We have rounded up some of the gadgets that sh..."
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SECTION</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>7628.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>1.357892</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>0.999341</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>2.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>3.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "           SECTION\n",
+       "count  7628.000000\n",
+       "mean      1.357892\n",
+       "std       0.999341\n",
+       "min       0.000000\n",
+       "25%       1.000000\n",
+       "50%       1.000000\n",
+       "75%       2.000000\n",
+       "max       3.000000"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead tr:last-of-type th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th colspan=\"4\" halign=\"left\">STORY</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th></th>\n",
+       "      <th>count</th>\n",
+       "      <th>unique</th>\n",
+       "      <th>top</th>\n",
+       "      <th>freq</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SECTION</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1686</td>\n",
+       "      <td>1673</td>\n",
+       "      <td>This story has been published from a wire agen...</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2772</td>\n",
+       "      <td>2731</td>\n",
+       "      <td>This story has been published from a wire agen...</td>\n",
+       "      <td>13</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1924</td>\n",
+       "      <td>1914</td>\n",
+       "      <td>The consensus reads, “Exciting, entertaining, ...</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1246</td>\n",
+       "      <td>1233</td>\n",
+       "      <td>This story has been published from a wire agen...</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        STORY                                                               \n",
+       "        count unique                                                top freq\n",
+       "SECTION                                                                     \n",
+       "0        1686   1673  This story has been published from a wire agen...    4\n",
+       "1        2772   2731  This story has been published from a wire agen...   13\n",
+       "2        1924   1914  The consensus reads, “Exciting, entertaining, ...    3\n",
+       "3        1246   1233  This story has been published from a wire agen...   11"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train.groupby('SECTION').describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#### Tokenize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nltk.tokenize import word_tokenize"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nltk.corpus import stopwords"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "running\n",
+      "run\n"
+     ]
+    }
+   ],
+   "source": [
+    "from nltk.stem.wordnet import WordNetLemmatizer \n",
+    "lem = WordNetLemmatizer()\n",
+    "\n",
+    "from nltk.stem.porter import PorterStemmer \n",
+    "stem = PorterStemmer()\n",
+    "\n",
+    "word = \"running\" \n",
+    "print(lem.lemmatize(word))\n",
+    "\n",
+    "print(stem.stem(word))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from nltk.stem.wordnet import WordNetLemmatizer \n",
+    "from nltk.stem.porter import PorterStemmer \n",
+    "lem = WordNetLemmatizer()\n",
+    "stem = PorterStemmer()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Processing Tokens"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import string\n",
+    "\n",
+    "def text_process(mess):\n",
+    "    \"\"\"\n",
+    "    Takes in a string of text, then performs the following:\n",
+    "    1. Remove all punctuation\n",
+    "    2. Remove all stopwords\n",
+    "    3. Returns a list of the cleaned text\n",
+    "    \"\"\"\n",
+    "    # Check characters to see if they are in punctuation\n",
+    "    nopunc = [char for char in mess if char not in string.punctuation]\n",
+    "\n",
+    "    # Join the characters again to form the string.\n",
+    "    nopunc = ''.join(nopunc)\n",
+    "    \n",
+    "    token = word_tokenize(nopunc)\n",
+    "    \n",
+    "    # Now just remove any stopwords\n",
+    "    no_noise = [word for word in token if word.lower() not in (stopwords.words('english') and ['\"',\"'\",'`','”','“'])]\n",
+    "    # Stemming\n",
+    "    \n",
+    "    \n",
+    "    return no_noise\n",
+    "#[stem.stem(word.lower()) for word in no_noise]\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0       [painful, huge, reversal, fee, income, unheard...\n",
+       "1       [formidable, opposition, alliance, among, Cong...\n",
+       "2       [Asian, currencies, trading, lower, today, Sou...\n",
+       "3       [want, answer, question, click, ‘, Answer, ’, ...\n",
+       "4       [global, markets, gold, prices, edged, today, ...\n",
+       "5       [BEIJING, Chinese, tech, giant, Huawei, announ...\n",
+       "6       [Mumbai, India, Incs, external, commercial, bo...\n",
+       "7       [Wednesday, Federal, Reserve, Chairman, Jerome...\n",
+       "8       [give, audience, already, done, Yeh, Hai, Aash...\n",
+       "9       [com, Arbaaz, Khan, spoke, getting, back, Daba...\n",
+       "10      [“, One, would, think, development, testing, p...\n",
+       "11      [far, year, rupee, gained, 07, foreign, invest...\n",
+       "12      [Xiaomi, however, sees, presence, Jio, rural, ...\n",
+       "13      [ad, reads, bells, whistles, Bezel, notch, app...\n",
+       "14      [Tuesday, Powell, said, healthy, US, economy, ...\n",
+       "15      [feature, help, make, display, responsive, int...\n",
+       "16      [TikTok, popular, among, children, facing, cri...\n",
+       "17      [company, hive, ratings, business, whollyowned...\n",
+       "18      [chooses, hide, CP, colleagues, move, mother, ...\n",
+       "19      [’, right, opera, house, simply, goes, show, A...\n",
+       "20      [Facebook, said, eligible, creators, would, ab...\n",
+       "21      [Starring, Varun, Dhawan, Alia, Bhatt, Sonaksh...\n",
+       "22      [GKN, Securities, barred, misuse, socalled, da...\n",
+       "23      [Fintech, startup, Zeta, cofounded, Bhavin, Tu...\n",
+       "24      [story, published, wire, agency, feed, without...\n",
+       "25      [Globally, established, companies, Stratasys, ...\n",
+       "26      [statements, Yeddyurappa, says, air, strikes, ...\n",
+       "27      [NDA, seeks, reelection, agriculture, form, im...\n",
+       "28      [Yeddyurappa, said, IAF, air, strikes, would, ...\n",
+       "29      [”, two, releases, year, far, Milan, Talkies, ...\n",
+       "                              ...                        \n",
+       "7598    [“, TDP, party, ideology, gotten, sidelined, e...\n",
+       "7599    [far, whenever, ’, reviewed, Kindles, Paperwhi...\n",
+       "7600    [day, markets, saw, high, volatility, followin...\n",
+       "7601    [Today, Gmail, allows, 15GB, free, storage, Us...\n",
+       "7602    [Aparajita, Sarangi, took, voluntary, retireme...\n",
+       "7603    [Investors, awaiting, economic, growth, data, ...\n",
+       "7604    [advice, online, survivalists, moves, unpopula...\n",
+       "7605    [time, developers, disguising, app, pretend, c...\n",
+       "7606    [Lok, Sabha, elections, 2019, Fifth, phase, vo...\n",
+       "7607    [watchdog, passed, five, separate, orders, tog...\n",
+       "7608    [Twitter, post, last, week, OnePlus, confirmed...\n",
+       "7609    [iOSonly, email, client, named, Spark, launche...\n",
+       "7610    [said, question, really, spend, ₹10000, portab...\n",
+       "7611    [’, want, kind, movie, felt, done, similar, ki...\n",
+       "7612    [1999, film, Mother, received, Best, Foreign, ...\n",
+       "7613    [Mohan, Babu, considered, political, heavyweig...\n",
+       "7614    [SP, 500, opened, higher, 126, points, 004, 28...\n",
+       "7615    [However, reports, suggest, would, another, sm...\n",
+       "7616    [Mumbai, Indian, stocks, rose, key, indices, e...\n",
+       "7617    [Sure, others, slightly, faster, slightly, sha...\n",
+       "7618    [147, million, pixels, one, billion, colours, ...\n",
+       "7619    [BJD, supporter, Puri, Congress, leaders, well...\n",
+       "7620    [Bollywood, celebrities, took, social, media, ...\n",
+       "7621    [However, confirmation, developers, games, wou...\n",
+       "7622    [terms, optics, back, Redmi, Note, 7, boasts, ...\n",
+       "7623    [Karnataka, Congress, bastion, also, gave, BJP...\n",
+       "7624    [film, also, features, Janhvi, Kapoor, revolve...\n",
+       "7625    [database, created, bringing, together, crimin...\n",
+       "7626    [state, uneasy, relationship, mainland, since,...\n",
+       "7627    [Virus, stars, Kunchacko, Boban, Tovino, Thoma...\n",
+       "Name: STORY, Length: 7628, dtype: object"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train['STORY'].apply(text_process)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Bag of words"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.feature_extraction.text import CountVectorizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bow_transformer = CountVectorizer(analyzer=text_process).fit(train['STORY'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "44346\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(len(bow_transformer.vocabulary_))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  (0, 98)\t1\n",
+      "  (0, 458)\t1\n",
+      "  (0, 4354)\t1\n",
+      "  (0, 6127)\t1\n",
+      "  (0, 7903)\t1\n",
+      "  (0, 17114)\t1\n",
+      "  (0, 19913)\t1\n",
+      "  (0, 20711)\t1\n",
+      "  (0, 21236)\t1\n",
+      "  (0, 22284)\t1\n",
+      "  (0, 23796)\t1\n",
+      "  (0, 24786)\t1\n",
+      "  (0, 25194)\t1\n",
+      "  (0, 25607)\t1\n",
+      "  (0, 26393)\t1\n",
+      "  (0, 26400)\t1\n",
+      "  (0, 26889)\t1\n",
+      "  (0, 27416)\t1\n",
+      "  (0, 28063)\t1\n",
+      "  (0, 28740)\t2\n",
+      "  (0, 28793)\t2\n",
+      "  (0, 29335)\t1\n",
+      "  (0, 32289)\t3\n",
+      "  (0, 33750)\t1\n",
+      "  (0, 34213)\t1\n",
+      "  (0, 34771)\t1\n",
+      "  (0, 35747)\t1\n",
+      "  (0, 37546)\t1\n",
+      "  (0, 37647)\t1\n",
+      "  (0, 39826)\t1\n",
+      "  (0, 41132)\t2\n",
+      "  (0, 42868)\t1\n",
+      "(1, 44346)\n"
+     ]
+    }
+   ],
+   "source": [
+    "bow4 = bow_transformer.transform([train['STORY'][4]])\n",
+    "print(bow4)   # vectors pointing from origin\n",
+    "print(bow4.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "markets\n",
+      "stock\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(bow_transformer.get_feature_names()[32289])\n",
+    "print(bow_transformer.get_feature_names()[39826])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "messages_bow = bow_transformer.transform(train['STORY'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Shape of Sparse Matrix:  (7628, 44346)\n",
+      "Amount of Non-Zero occurences:  417825\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('Shape of Sparse Matrix: ', messages_bow.shape)\n",
+    "print('Amount of Non-Zero occurences: ', messages_bow.nnz)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "sparsity: 0.12351772521704532\n"
+     ]
+    }
+   ],
+   "source": [
+    "sparsity = (100.0 * messages_bow.nnz / (messages_bow.shape[0] * messages_bow.shape[1]))\n",
+    "\n",
+    "print(f'sparsity: {sparsity}')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  (0, 42868)\t0.1653840117317503\n",
+      "  (0, 41132)\t0.23264193296874802\n",
+      "  (0, 39826)\t0.12464706628982292\n",
+      "  (0, 37647)\t0.13059926000708844\n",
+      "  (0, 37546)\t0.13533155309101896\n",
+      "  (0, 35747)\t0.11016931263373622\n",
+      "  (0, 34771)\t0.11197548409940615\n",
+      "  (0, 34213)\t0.17507399610069765\n",
+      "  (0, 33750)\t0.2329734713609702\n",
+      "  (0, 32289)\t0.3411710195006078\n",
+      "  (0, 29335)\t0.14164721721626908\n",
+      "  (0, 28793)\t0.2878801405286177\n",
+      "  (0, 28740)\t0.21695807780745277\n",
+      "  (0, 28063)\t0.1342731302228393\n",
+      "  (0, 27416)\t0.1771020108713445\n",
+      "  (0, 26889)\t0.13365789947306533\n",
+      "  (0, 26400)\t0.20124735910228528\n",
+      "  (0, 26393)\t0.1288535100738698\n",
+      "  (0, 25607)\t0.18575848290189373\n",
+      "  (0, 25194)\t0.20712745650428582\n",
+      "  (0, 24786)\t0.09579350098100424\n",
+      "  (0, 23796)\t0.130962730415073\n",
+      "  (0, 22284)\t0.14220199953463913\n",
+      "  (0, 21236)\t0.09531611135230537\n",
+      "  (0, 20711)\t0.18575848290189373\n",
+      "  (0, 19913)\t0.14959653531880082\n",
+      "  (0, 17114)\t0.17929921522347367\n",
+      "  (0, 7903)\t0.15233170401746512\n",
+      "  (0, 6127)\t0.11539264864412509\n",
+      "  (0, 4354)\t0.14394007026430886\n",
+      "  (0, 458)\t0.2436579581621663\n",
+      "  (0, 98)\t0.16899942411275612\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.feature_extraction.text import TfidfTransformer\n",
+    "\n",
+    "tfidf_transformer = TfidfTransformer().fit(messages_bow)\n",
+    "\n",
+    "# TEST\n",
+    "tfidf4 = tfidf_transformer.transform(bow4)\n",
+    "print(tfidf4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "7.74248747675401\n",
+      "8.33027414165613\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(tfidf_transformer.idf_[bow_transformer.vocabulary_['u']])\n",
+    "print(tfidf_transformer.idf_[bow_transformer.vocabulary_['university']])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(7628, 44346)\n"
+     ]
+    }
+   ],
+   "source": [
+    "messages_tfidf = tfidf_transformer.transform(messages_bow)\n",
+    "print(messages_tfidf.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<7628x44346 sparse matrix of type '<class 'numpy.float64'>'\n",
+       "\twith 417825 stored elements in Compressed Sparse Row format>"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "messages_tfidf"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#classifier\n",
+    "from sklearn.naive_bayes import MultinomialNB  \n",
+    "category_detect_model = MultinomialNB().fit(messages_tfidf, train['SECTION'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "predicted: 3\n",
+      "expected: 1\n"
+     ]
+    }
+   ],
+   "source": [
+    "print('predicted:', category_detect_model.predict(tfidf4)[0])\n",
+    "print('expected:', train['SECTION'][3])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[3 0 3 ... 1 0 2]\n"
+     ]
+    }
+   ],
+   "source": [
+    "all_predictions = category_detect_model.predict(messages_tfidf)\n",
+    "print(all_predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "           0       0.97      0.97      0.97      1686\n",
+      "           1       0.95      1.00      0.97      2772\n",
+      "           2       1.00      0.96      0.98      1924\n",
+      "           3       1.00      0.95      0.97      1246\n",
+      "\n",
+      "    accuracy                           0.97      7628\n",
+      "   macro avg       0.98      0.97      0.97      7628\n",
+      "weighted avg       0.97      0.97      0.97      7628\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import classification_report\n",
+    "print (classification_report(train['SECTION'], all_predictions))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Using Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.pipeline import Pipeline\n",
+    "\n",
+    "pipeline = Pipeline([\n",
+    "    ('bow', CountVectorizer(analyzer=text_process)),  # strings to token integer counts\n",
+    "    ('tfidf', TfidfTransformer()),  # integer counts to weighted TF-IDF scores\n",
+    "    ('classifier', MultinomialNB()),  # train on TF-IDF vectors w/ Naive Bayes classifier\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Pipeline(memory=None,\n",
+       "         steps=[('bow',\n",
+       "                 CountVectorizer(analyzer=<function text_process at 0x00000270C96390D0>,\n",
+       "                                 binary=False, decode_error='strict',\n",
+       "                                 dtype=<class 'numpy.int64'>, encoding='utf-8',\n",
+       "                                 input='content', lowercase=True, max_df=1.0,\n",
+       "                                 max_features=None, min_df=1,\n",
+       "                                 ngram_range=(1, 1), preprocessor=None,\n",
+       "                                 stop_words=None, strip_accents=None,\n",
+       "                                 token_pattern='(?u)\\\\b\\\\w\\\\w+\\\\b',\n",
+       "                                 tokenizer=None, vocabulary=None)),\n",
+       "                ('tfidf',\n",
+       "                 TfidfTransformer(norm='l2', smooth_idf=True,\n",
+       "                                  sublinear_tf=False, use_idf=True)),\n",
+       "                ('classifier',\n",
+       "                 MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True))],\n",
+       "         verbose=False)"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline.fit(train['STORY'],train['SECTION'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[1 2 1 ... 1 0 1]\n"
+     ]
+    }
+   ],
+   "source": [
+    "predictions = pipeline.predict(test['STORY'])\n",
+    "print(predictions)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SECTION</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2718</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2719</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2720</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2721</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2722</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2723</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2724</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2725</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2726</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2727</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2728</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2729</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2730</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2731</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2732</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2733</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2734</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2735</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2736</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2737</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2738</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2739</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2740</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2741</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2742</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2743</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2744</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2745</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2746</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2747</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2748 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      SECTION\n",
+       "0           1\n",
+       "1           2\n",
+       "2           1\n",
+       "3           1\n",
+       "4           1\n",
+       "5           1\n",
+       "6           1\n",
+       "7           2\n",
+       "8           1\n",
+       "9           2\n",
+       "10          0\n",
+       "11          3\n",
+       "12          2\n",
+       "13          1\n",
+       "14          2\n",
+       "15          1\n",
+       "16          1\n",
+       "17          2\n",
+       "18          3\n",
+       "19          2\n",
+       "20          2\n",
+       "21          1\n",
+       "22          2\n",
+       "23          0\n",
+       "24          0\n",
+       "25          2\n",
+       "26          2\n",
+       "27          3\n",
+       "28          3\n",
+       "29          0\n",
+       "...       ...\n",
+       "2718        0\n",
+       "2719        2\n",
+       "2720        3\n",
+       "2721        1\n",
+       "2722        0\n",
+       "2723        1\n",
+       "2724        2\n",
+       "2725        0\n",
+       "2726        1\n",
+       "2727        2\n",
+       "2728        1\n",
+       "2729        1\n",
+       "2730        3\n",
+       "2731        1\n",
+       "2732        3\n",
+       "2733        1\n",
+       "2734        0\n",
+       "2735        3\n",
+       "2736        0\n",
+       "2737        1\n",
+       "2738        1\n",
+       "2739        1\n",
+       "2740        3\n",
+       "2741        0\n",
+       "2742        0\n",
+       "2743        1\n",
+       "2744        1\n",
+       "2745        1\n",
+       "2746        0\n",
+       "2747        1\n",
+       "\n",
+       "[2748 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "output = pd.DataFrame(predictions,columns=['SECTION'])\n",
+    "output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output.to_excel('output.xlsx',sheet_name='Sheet1',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SECTION</th>\n",
+       "      <th>STORY</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2019 will see gadgets like gaming smartphones ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>It has also unleashed a wave of changes in the...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>It can be confusing to pick the right smartpho...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>The mobile application is integrated with a da...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>We have rounded up some of the gadgets that sh...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "      <td>\"Imagine if every message you sent was kept wi...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Positioned along the four sides of the Asus RO...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>2</td>\n",
+       "      <td>In fact, when I applied to USC film school the...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "      <td>As spotted by Android Police, Netflix is testi...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Her moves were immaculately choreographed as s...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>0</td>\n",
+       "      <td>The NCP leadership was under tremendous pressu...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>3</td>\n",
+       "      <td>On the traded volume front, 48.67 lakh shares ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>2</td>\n",
+       "      <td>They wrote, “Welcoming Makkal Selvan Vijay Set...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>1</td>\n",
+       "      <td>The back of the phone features a 12MP+13MP AI ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Talking about how the pressure got to her at o...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>1</td>\n",
+       "      <td>The Xiaomi Play is expected to have a CPU runn...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>1</td>\n",
+       "      <td>In January 2019, the telecom industry added 21...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Apart from the finalists, the grand finale als...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>3</td>\n",
+       "      <td>\"We expect (a) slew of REIT IPOs to hit the ma...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>2</td>\n",
+       "      <td>According to the same report, Avengers Endgame...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>2</td>\n",
+       "      <td>“Wounded by Lannister riders, they will seek r...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>1</td>\n",
+       "      <td>With so much hatred around, through the show, ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>2</td>\n",
+       "      <td>The Force Awakens, incidentally, wind up as th...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>0</td>\n",
+       "      <td>The two have been taking potshots at each othe...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Thakur said “they (authorities) wanted to forc...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>2</td>\n",
+       "      <td>“It’s seriously delightful that our new Dracul...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>2</td>\n",
+       "      <td>(Photo: Diljit Dosanjh/Instagram)Diljit Dosanj...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Some respite came from the 6% growth in utilit...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>3</td>\n",
+       "      <td>The outlook for India's rupee has deteriorated...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>0</td>\n",
+       "      <td>However, political analysts said that it will ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2718</th>\n",
+       "      <td>0</td>\n",
+       "      <td>The party renominated its sitting MP Ranjet Ra...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2719</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Was there a moment you were bullied?I was very...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2720</th>\n",
+       "      <td>3</td>\n",
+       "      <td>In Delhi, gold of 99.9% and 99.5% purities fel...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2721</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Snapchat's controversial and criticised redesi...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2722</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Of the eight states that faced single-phase po...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2723</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Facebook pulled 513 Pages, Groups and accounts...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2724</th>\n",
+       "      <td>2</td>\n",
+       "      <td>One of the most popular female actors in the 1...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2725</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Two Telugu TV actresses died in a road acciden...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2726</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Google CEO Sundar Pichai introduced Duplex ear...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2727</th>\n",
+       "      <td>2</td>\n",
+       "      <td>”Priyanka was replaced by Katrina Kaif, who wo...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2728</th>\n",
+       "      <td>1</td>\n",
+       "      <td>The Chinese smartphone manufacturer today intr...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2729</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Samsung, the world’s largest smartphone maker,...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2730</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Reliance Securities has revised TCS target pri...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2731</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Music streaming may not be novel, but it’s sti...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2732</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Ericsson India Pvt. Ltd had moved the Supreme ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2733</th>\n",
+       "      <td>1</td>\n",
+       "      <td>On the other hand, the hotel and airline loyal...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2734</th>\n",
+       "      <td>0</td>\n",
+       "      <td>The Narendra Modi government simply finished t...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2735</th>\n",
+       "      <td>3</td>\n",
+       "      <td>The yield on 10-year Treasuries climbed two ba...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2736</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Section 126 of the Representation of People Ac...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2737</th>\n",
+       "      <td>1</td>\n",
+       "      <td>With these techniques, machines are also learn...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2738</th>\n",
+       "      <td>1</td>\n",
+       "      <td>The traffic challan payment is also restricted...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2739</th>\n",
+       "      <td>1</td>\n",
+       "      <td>“We are not like Western countries, where peop...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2740</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Further, SBICAP Securities says delivery of th...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2741</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Two, Raj Thackeray’s Maharashtra Navnirman Sen...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2742</th>\n",
+       "      <td>0</td>\n",
+       "      <td>Senior leaders of the BJP are using the sugges...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2743</th>\n",
+       "      <td>1</td>\n",
+       "      <td>According to researchers, fraud in the mobile ...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2744</th>\n",
+       "      <td>1</td>\n",
+       "      <td>The iPhone XS and XS Max share the Apple A12 c...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2745</th>\n",
+       "      <td>1</td>\n",
+       "      <td>On the photography front, the Note 5 Pro featu...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2746</th>\n",
+       "      <td>0</td>\n",
+       "      <td>UDAY mandated that discoms bring the gap betwe...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2747</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Ripple also helps bank customers send money to...</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2748 rows × 2 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      SECTION                                              STORY\n",
+       "0           1  2019 will see gadgets like gaming smartphones ...\n",
+       "1           2  It has also unleashed a wave of changes in the...\n",
+       "2           1  It can be confusing to pick the right smartpho...\n",
+       "3           1  The mobile application is integrated with a da...\n",
+       "4           1  We have rounded up some of the gadgets that sh...\n",
+       "5           1  \"Imagine if every message you sent was kept wi...\n",
+       "6           1  Positioned along the four sides of the Asus RO...\n",
+       "7           2  In fact, when I applied to USC film school the...\n",
+       "8           1  As spotted by Android Police, Netflix is testi...\n",
+       "9           2  Her moves were immaculately choreographed as s...\n",
+       "10          0  The NCP leadership was under tremendous pressu...\n",
+       "11          3  On the traded volume front, 48.67 lakh shares ...\n",
+       "12          2  They wrote, “Welcoming Makkal Selvan Vijay Set...\n",
+       "13          1  The back of the phone features a 12MP+13MP AI ...\n",
+       "14          2  Talking about how the pressure got to her at o...\n",
+       "15          1  The Xiaomi Play is expected to have a CPU runn...\n",
+       "16          1  In January 2019, the telecom industry added 21...\n",
+       "17          2  Apart from the finalists, the grand finale als...\n",
+       "18          3  \"We expect (a) slew of REIT IPOs to hit the ma...\n",
+       "19          2  According to the same report, Avengers Endgame...\n",
+       "20          2  “Wounded by Lannister riders, they will seek r...\n",
+       "21          1  With so much hatred around, through the show, ...\n",
+       "22          2  The Force Awakens, incidentally, wind up as th...\n",
+       "23          0  The two have been taking potshots at each othe...\n",
+       "24          0  Thakur said “they (authorities) wanted to forc...\n",
+       "25          2  “It’s seriously delightful that our new Dracul...\n",
+       "26          2  (Photo: Diljit Dosanjh/Instagram)Diljit Dosanj...\n",
+       "27          3  Some respite came from the 6% growth in utilit...\n",
+       "28          3  The outlook for India's rupee has deteriorated...\n",
+       "29          0  However, political analysts said that it will ...\n",
+       "...       ...                                                ...\n",
+       "2718        0  The party renominated its sitting MP Ranjet Ra...\n",
+       "2719        2  Was there a moment you were bullied?I was very...\n",
+       "2720        3  In Delhi, gold of 99.9% and 99.5% purities fel...\n",
+       "2721        1  Snapchat's controversial and criticised redesi...\n",
+       "2722        0  Of the eight states that faced single-phase po...\n",
+       "2723        1  Facebook pulled 513 Pages, Groups and accounts...\n",
+       "2724        2  One of the most popular female actors in the 1...\n",
+       "2725        0  Two Telugu TV actresses died in a road acciden...\n",
+       "2726        1  Google CEO Sundar Pichai introduced Duplex ear...\n",
+       "2727        2  ”Priyanka was replaced by Katrina Kaif, who wo...\n",
+       "2728        1  The Chinese smartphone manufacturer today intr...\n",
+       "2729        1  Samsung, the world’s largest smartphone maker,...\n",
+       "2730        3  Reliance Securities has revised TCS target pri...\n",
+       "2731        1  Music streaming may not be novel, but it’s sti...\n",
+       "2732        3  Ericsson India Pvt. Ltd had moved the Supreme ...\n",
+       "2733        1  On the other hand, the hotel and airline loyal...\n",
+       "2734        0  The Narendra Modi government simply finished t...\n",
+       "2735        3  The yield on 10-year Treasuries climbed two ba...\n",
+       "2736        0  Section 126 of the Representation of People Ac...\n",
+       "2737        1  With these techniques, machines are also learn...\n",
+       "2738        1  The traffic challan payment is also restricted...\n",
+       "2739        1  “We are not like Western countries, where peop...\n",
+       "2740        3  Further, SBICAP Securities says delivery of th...\n",
+       "2741        0  Two, Raj Thackeray’s Maharashtra Navnirman Sen...\n",
+       "2742        0  Senior leaders of the BJP are using the sugges...\n",
+       "2743        1  According to researchers, fraud in the mobile ...\n",
+       "2744        1  The iPhone XS and XS Max share the Apple A12 c...\n",
+       "2745        1  On the photography front, the Note 5 Pro featu...\n",
+       "2746        0  UDAY mandated that discoms bring the gap betwe...\n",
+       "2747        1  Ripple also helps bank customers send money to...\n",
+       "\n",
+       "[2748 rows x 2 columns]"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "output['STORY'] = test['STORY']\n",
+    "output"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.svm import SVC\n",
+    "from sklearn.feature_extraction.text import TfidfTransformer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']}\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "grid = GridSearchCV(SVC(),param_grid,verbose=3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.pipeline import Pipeline\n",
+    "\n",
+    "pipeline = Pipeline([\n",
+    "    ('bow', CountVectorizer(analyzer=text_process)),  # strings to token integer counts\n",
+    "    ('tfidf', TfidfTransformer()),  # integer counts to weighted TF-IDF scores\n",
+    "    ('classifier', grid),  # train on TF-IDF vectors w/ Naive Bayes classifier\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\Subham\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:1978: FutureWarning: The default value of cv will change from 3 to 5 in version 0.22. Specify it explicitly to silence this warning.\n",
+      "  warnings.warn(CV_WARNING, FutureWarning)\n",
+      "[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Fitting 3 folds for each of 25 candidates, totalling 75 fits\n",
+      "[CV] C=0.1, gamma=1, kernel=rbf ......................................\n",
+      "[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.561, total=  30.1s\n",
+      "[CV] C=0.1, gamma=1, kernel=rbf ......................................\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:   30.0s remaining:    0.0s\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.561, total=  30.8s\n",
+      "[CV] C=0.1, gamma=1, kernel=rbf ......................................\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  1.0min remaining:    0.0s\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[CV] .......... C=0.1, gamma=1, kernel=rbf, score=0.560, total=  29.4s\n",
+      "[CV] C=0.1, gamma=0.1, kernel=rbf ....................................\n",
+      "[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.391, total=  29.9s\n",
+      "[CV] C=0.1, gamma=0.1, kernel=rbf ....................................\n",
+      "[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.385, total=  29.3s\n",
+      "[CV] C=0.1, gamma=0.1, kernel=rbf ....................................\n",
+      "[CV] ........ C=0.1, gamma=0.1, kernel=rbf, score=0.387, total=  29.3s\n",
+      "[CV] C=0.1, gamma=0.01, kernel=rbf ...................................\n",
+      "[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.363, total=  29.2s\n",
+      "[CV] C=0.1, gamma=0.01, kernel=rbf ...................................\n",
+      "[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.363, total=  28.8s\n",
+      "[CV] C=0.1, gamma=0.01, kernel=rbf ...................................\n",
+      "[CV] ....... C=0.1, gamma=0.01, kernel=rbf, score=0.363, total=  29.3s\n",
+      "[CV] C=0.1, gamma=0.001, kernel=rbf ..................................\n",
+      "[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.363, total=  28.4s\n",
+      "[CV] C=0.1, gamma=0.001, kernel=rbf ..................................\n",
+      "[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.363, total=  27.5s\n",
+      "[CV] C=0.1, gamma=0.001, kernel=rbf ..................................\n",
+      "[CV] ...... C=0.1, gamma=0.001, kernel=rbf, score=0.363, total=  27.8s\n",
+      "[CV] C=0.1, gamma=0.0001, kernel=rbf .................................\n",
+      "[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.363, total=  27.6s\n",
+      "[CV] C=0.1, gamma=0.0001, kernel=rbf .................................\n",
+      "[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.363, total=  27.5s\n",
+      "[CV] C=0.1, gamma=0.0001, kernel=rbf .................................\n",
+      "[CV] ..... C=0.1, gamma=0.0001, kernel=rbf, score=0.363, total=  28.6s\n",
+      "[CV] C=1, gamma=1, kernel=rbf ........................................\n",
+      "[CV] ............ C=1, gamma=1, kernel=rbf, score=0.961, total=  26.9s\n",
+      "[CV] C=1, gamma=1, kernel=rbf ........................................\n",
+      "[CV] ............ C=1, gamma=1, kernel=rbf, score=0.956, total=  26.6s\n",
+      "[CV] C=1, gamma=1, kernel=rbf ........................................\n",
+      "[CV] ............ C=1, gamma=1, kernel=rbf, score=0.962, total=  26.6s\n",
+      "[CV] C=1, gamma=0.1, kernel=rbf ......................................\n",
+      "[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.940, total=  18.9s\n",
+      "[CV] C=1, gamma=0.1, kernel=rbf ......................................\n",
+      "[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.937, total=  18.7s\n",
+      "[CV] C=1, gamma=0.1, kernel=rbf ......................................\n",
+      "[CV] .......... C=1, gamma=0.1, kernel=rbf, score=0.941, total=  19.0s\n",
+      "[CV] C=1, gamma=0.01, kernel=rbf .....................................\n",
+      "[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.416, total=  27.8s\n",
+      "[CV] C=1, gamma=0.01, kernel=rbf .....................................\n",
+      "[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.417, total=  27.7s\n",
+      "[CV] C=1, gamma=0.01, kernel=rbf .....................................\n",
+      "[CV] ......... C=1, gamma=0.01, kernel=rbf, score=0.417, total=  27.9s\n",
+      "[CV] C=1, gamma=0.001, kernel=rbf ....................................\n",
+      "[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.363, total=  28.2s\n",
+      "[CV] C=1, gamma=0.001, kernel=rbf ....................................\n",
+      "[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.363, total=  28.2s\n",
+      "[CV] C=1, gamma=0.001, kernel=rbf ....................................\n",
+      "[CV] ........ C=1, gamma=0.001, kernel=rbf, score=0.363, total=  28.4s\n",
+      "[CV] C=1, gamma=0.0001, kernel=rbf ...................................\n",
+      "[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.363, total=  28.8s\n",
+      "[CV] C=1, gamma=0.0001, kernel=rbf ...................................\n",
+      "[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.363, total=  28.8s\n",
+      "[CV] C=1, gamma=0.0001, kernel=rbf ...................................\n",
+      "[CV] ....... C=1, gamma=0.0001, kernel=rbf, score=0.363, total=  31.9s\n",
+      "[CV] C=10, gamma=1, kernel=rbf .......................................\n",
+      "[CV] ........... C=10, gamma=1, kernel=rbf, score=0.963, total=  31.7s\n",
+      "[CV] C=10, gamma=1, kernel=rbf .......................................\n",
+      "[CV] ........... C=10, gamma=1, kernel=rbf, score=0.958, total=  31.3s\n",
+      "[CV] C=10, gamma=1, kernel=rbf .......................................\n",
+      "[CV] ........... C=10, gamma=1, kernel=rbf, score=0.964, total=  28.3s\n",
+      "[CV] C=10, gamma=0.1, kernel=rbf .....................................\n",
+      "[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.972, total=  14.4s\n",
+      "[CV] C=10, gamma=0.1, kernel=rbf .....................................\n",
+      "[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.965, total=  14.5s\n",
+      "[CV] C=10, gamma=0.1, kernel=rbf .....................................\n",
+      "[CV] ......... C=10, gamma=0.1, kernel=rbf, score=0.970, total=  14.5s\n",
+      "[CV] C=10, gamma=0.01, kernel=rbf ....................................\n",
+      "[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.947, total=  18.0s\n",
+      "[CV] C=10, gamma=0.01, kernel=rbf ....................................\n",
+      "[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.942, total=  17.7s\n",
+      "[CV] C=10, gamma=0.01, kernel=rbf ....................................\n",
+      "[CV] ........ C=10, gamma=0.01, kernel=rbf, score=0.945, total=  18.1s\n",
+      "[CV] C=10, gamma=0.001, kernel=rbf ...................................\n",
+      "[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.419, total=  27.9s\n",
+      "[CV] C=10, gamma=0.001, kernel=rbf ...................................\n",
+      "[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.421, total=  27.9s\n",
+      "[CV] C=10, gamma=0.001, kernel=rbf ...................................\n",
+      "[CV] ....... C=10, gamma=0.001, kernel=rbf, score=0.420, total=  29.2s\n",
+      "[CV] C=10, gamma=0.0001, kernel=rbf ..................................\n",
+      "[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.363, total=  28.3s\n",
+      "[CV] C=10, gamma=0.0001, kernel=rbf ..................................\n",
+      "[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.363, total=  28.5s\n",
+      "[CV] C=10, gamma=0.0001, kernel=rbf ..................................\n",
+      "[CV] ...... C=10, gamma=0.0001, kernel=rbf, score=0.363, total=  28.2s\n",
+      "[CV] C=100, gamma=1, kernel=rbf ......................................\n",
+      "[CV] .......... C=100, gamma=1, kernel=rbf, score=0.963, total=  27.5s\n",
+      "[CV] C=100, gamma=1, kernel=rbf ......................................\n",
+      "[CV] .......... C=100, gamma=1, kernel=rbf, score=0.958, total=  27.2s\n",
+      "[CV] C=100, gamma=1, kernel=rbf ......................................\n",
+      "[CV] .......... C=100, gamma=1, kernel=rbf, score=0.964, total=  27.5s\n",
+      "[CV] C=100, gamma=0.1, kernel=rbf ....................................\n",
+      "[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.972, total=  14.2s\n",
+      "[CV] C=100, gamma=0.1, kernel=rbf ....................................\n",
+      "[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.964, total=  14.5s\n",
+      "[CV] C=100, gamma=0.1, kernel=rbf ....................................\n",
+      "[CV] ........ C=100, gamma=0.1, kernel=rbf, score=0.971, total=  14.6s\n",
+      "[CV] C=100, gamma=0.01, kernel=rbf ...................................\n",
+      "[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.972, total=  13.6s\n",
+      "[CV] C=100, gamma=0.01, kernel=rbf ...................................\n",
+      "[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.965, total=  13.7s\n",
+      "[CV] C=100, gamma=0.01, kernel=rbf ...................................\n",
+      "[CV] ....... C=100, gamma=0.01, kernel=rbf, score=0.970, total=  14.5s\n",
+      "[CV] C=100, gamma=0.001, kernel=rbf ..................................\n",
+      "[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.948, total=  18.3s\n",
+      "[CV] C=100, gamma=0.001, kernel=rbf ..................................\n",
+      "[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.943, total=  17.7s\n",
+      "[CV] C=100, gamma=0.001, kernel=rbf ..................................\n",
+      "[CV] ...... C=100, gamma=0.001, kernel=rbf, score=0.946, total=  18.1s\n",
+      "[CV] C=100, gamma=0.0001, kernel=rbf .................................\n",
+      "[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.419, total=  29.7s\n",
+      "[CV] C=100, gamma=0.0001, kernel=rbf .................................\n",
+      "[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.421, total=  28.2s\n",
+      "[CV] C=100, gamma=0.0001, kernel=rbf .................................\n",
+      "[CV] ..... C=100, gamma=0.0001, kernel=rbf, score=0.420, total=  29.2s\n",
+      "[CV] C=1000, gamma=1, kernel=rbf .....................................\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.963, total=  28.3s\n",
+      "[CV] C=1000, gamma=1, kernel=rbf .....................................\n",
+      "[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.958, total=  27.6s\n",
+      "[CV] C=1000, gamma=1, kernel=rbf .....................................\n",
+      "[CV] ......... C=1000, gamma=1, kernel=rbf, score=0.964, total=  28.6s\n",
+      "[CV] C=1000, gamma=0.1, kernel=rbf ...................................\n",
+      "[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.972, total=  15.1s\n",
+      "[CV] C=1000, gamma=0.1, kernel=rbf ...................................\n",
+      "[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.964, total=  15.9s\n",
+      "[CV] C=1000, gamma=0.1, kernel=rbf ...................................\n",
+      "[CV] ....... C=1000, gamma=0.1, kernel=rbf, score=0.971, total=  15.3s\n",
+      "[CV] C=1000, gamma=0.01, kernel=rbf ..................................\n",
+      "[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.973, total=  14.4s\n",
+      "[CV] C=1000, gamma=0.01, kernel=rbf ..................................\n",
+      "[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.963, total=  14.1s\n",
+      "[CV] C=1000, gamma=0.01, kernel=rbf ..................................\n",
+      "[CV] ...... C=1000, gamma=0.01, kernel=rbf, score=0.970, total=  14.4s\n",
+      "[CV] C=1000, gamma=0.001, kernel=rbf .................................\n",
+      "[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.972, total=  14.1s\n",
+      "[CV] C=1000, gamma=0.001, kernel=rbf .................................\n",
+      "[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.965, total=  14.2s\n",
+      "[CV] C=1000, gamma=0.001, kernel=rbf .................................\n",
+      "[CV] ..... C=1000, gamma=0.001, kernel=rbf, score=0.971, total=  14.4s\n",
+      "[CV] C=1000, gamma=0.0001, kernel=rbf ................................\n",
+      "[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.948, total=  18.2s\n",
+      "[CV] C=1000, gamma=0.0001, kernel=rbf ................................\n",
+      "[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.943, total=  18.5s\n",
+      "[CV] C=1000, gamma=0.0001, kernel=rbf ................................\n",
+      "[CV] .... C=1000, gamma=0.0001, kernel=rbf, score=0.946, total=  18.7s\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[Parallel(n_jobs=1)]: Done  75 out of  75 | elapsed: 29.5min finished\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Pipeline(memory=None,\n",
+       "         steps=[('bow',\n",
+       "                 CountVectorizer(analyzer=<function text_process at 0x000001A3A6C5AF28>,\n",
+       "                                 binary=False, decode_error='strict',\n",
+       "                                 dtype=<class 'numpy.int64'>, encoding='utf-8',\n",
+       "                                 input='content', lowercase=True, max_df=1.0,\n",
+       "                                 max_features=None, min_df=1,\n",
+       "                                 ngram_range=(1, 1), preprocessor=None,\n",
+       "                                 stop_words=None, strip_accents=None,\n",
+       "                                 token_pattern='(?u)\\\\b\\\\w\\\\w...\n",
+       "                                            decision_function_shape='ovr',\n",
+       "                                            degree=3, gamma='auto_deprecated',\n",
+       "                                            kernel='rbf', max_iter=-1,\n",
+       "                                            probability=False,\n",
+       "                                            random_state=None, shrinking=True,\n",
+       "                                            tol=0.001, verbose=False),\n",
+       "                              iid='warn', n_jobs=None,\n",
+       "                              param_grid={'C': [0.1, 1, 10, 100, 1000],\n",
+       "                                          'gamma': [1, 0.1, 0.01, 0.001,\n",
+       "                                                    0.0001],\n",
+       "                                          'kernel': ['rbf']},\n",
+       "                              pre_dispatch='2*n_jobs', refit=True,\n",
+       "                              return_train_score=False, scoring=None,\n",
+       "                              verbose=3))],\n",
+       "         verbose=False)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline.fit(train['STORY'],train['SECTION'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SECTION</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2718</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2719</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2720</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2721</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2722</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2723</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2724</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2725</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2726</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2727</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2728</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2729</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2730</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2731</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2732</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2733</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2734</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2735</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2736</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2737</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2738</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2739</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2740</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2741</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2742</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2743</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2744</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2745</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2746</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2747</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2748 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      SECTION\n",
+       "0           1\n",
+       "1           2\n",
+       "2           1\n",
+       "3           1\n",
+       "4           1\n",
+       "5           1\n",
+       "6           1\n",
+       "7           2\n",
+       "8           1\n",
+       "9           2\n",
+       "10          0\n",
+       "11          3\n",
+       "12          2\n",
+       "13          1\n",
+       "14          2\n",
+       "15          1\n",
+       "16          3\n",
+       "17          2\n",
+       "18          3\n",
+       "19          2\n",
+       "20          2\n",
+       "21          2\n",
+       "22          2\n",
+       "23          0\n",
+       "24          0\n",
+       "25          2\n",
+       "26          2\n",
+       "27          3\n",
+       "28          3\n",
+       "29          0\n",
+       "...       ...\n",
+       "2718        0\n",
+       "2719        2\n",
+       "2720        3\n",
+       "2721        1\n",
+       "2722        0\n",
+       "2723        1\n",
+       "2724        2\n",
+       "2725        2\n",
+       "2726        1\n",
+       "2727        2\n",
+       "2728        1\n",
+       "2729        1\n",
+       "2730        3\n",
+       "2731        2\n",
+       "2732        3\n",
+       "2733        1\n",
+       "2734        0\n",
+       "2735        3\n",
+       "2736        0\n",
+       "2737        1\n",
+       "2738        1\n",
+       "2739        1\n",
+       "2740        3\n",
+       "2741        0\n",
+       "2742        0\n",
+       "2743        1\n",
+       "2744        1\n",
+       "2745        1\n",
+       "2746        0\n",
+       "2747        1\n",
+       "\n",
+       "[2748 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions = pipeline.predict(test['STORY'])\n",
+    "output_svm_quote = pd.DataFrame(predictions,columns=['SECTION'])\n",
+    "output_svm_quote"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SECTION</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>15</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>16</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>18</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>19</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2718</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2719</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2720</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2721</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2722</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2723</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2724</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2725</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2726</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2727</th>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2728</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2729</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2730</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2731</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2732</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2733</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2734</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2735</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2736</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2737</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2738</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2739</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2740</th>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2741</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2742</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2743</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2744</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2745</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2746</th>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2747</th>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2748 rows × 1 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      SECTION\n",
+       "0           1\n",
+       "1           2\n",
+       "2           1\n",
+       "3           1\n",
+       "4           1\n",
+       "5           1\n",
+       "6           1\n",
+       "7           2\n",
+       "8           1\n",
+       "9           2\n",
+       "10          0\n",
+       "11          3\n",
+       "12          2\n",
+       "13          1\n",
+       "14          2\n",
+       "15          1\n",
+       "16          3\n",
+       "17          2\n",
+       "18          3\n",
+       "19          2\n",
+       "20          2\n",
+       "21          2\n",
+       "22          2\n",
+       "23          0\n",
+       "24          0\n",
+       "25          2\n",
+       "26          2\n",
+       "27          3\n",
+       "28          3\n",
+       "29          0\n",
+       "...       ...\n",
+       "2718        0\n",
+       "2719        2\n",
+       "2720        3\n",
+       "2721        1\n",
+       "2722        0\n",
+       "2723        1\n",
+       "2724        2\n",
+       "2725        2\n",
+       "2726        1\n",
+       "2727        2\n",
+       "2728        1\n",
+       "2729        1\n",
+       "2730        3\n",
+       "2731        1\n",
+       "2732        3\n",
+       "2733        1\n",
+       "2734        0\n",
+       "2735        3\n",
+       "2736        0\n",
+       "2737        1\n",
+       "2738        1\n",
+       "2739        1\n",
+       "2740        3\n",
+       "2741        0\n",
+       "2742        0\n",
+       "2743        1\n",
+       "2744        1\n",
+       "2745        1\n",
+       "2746        0\n",
+       "2747        1\n",
+       "\n",
+       "[2748 rows x 1 columns]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions = pipeline.predict(test['STORY'])\n",
+    "output_svm = pd.DataFrame(predictions,columns=['SECTION'])\n",
+    "output_svm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_svm_quote.to_excel('output_svm_quote.xlsx',sheet_name='Sheet1',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_svm_skim.to_excel('output_svm_skim.xlsx',sheet_name='Sheet1',index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['sadva', 'vavb', 'is', 'what']"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "token = ['sadva','vavb','\"',\"'\",'`','is','what']\n",
+    "[word for word in token if word.lower() not in (stopwords.words('english') and ['\"',\"'\",'`','”','“'])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/News_category_project/Sample_submission.xlsx b/News_category_project/Sample_submission.xlsx
new file mode 100644
index 0000000..2b7ecbb
Binary files /dev/null and b/News_category_project/Sample_submission.xlsx differ
diff --git a/News_category_project/output.xlsx b/News_category_project/output.xlsx
new file mode 100644
index 0000000..e604512
Binary files /dev/null and b/News_category_project/output.xlsx differ
diff --git a/News_category_project/output_svm.xlsx b/News_category_project/output_svm.xlsx
new file mode 100644
index 0000000..dbc1c37
Binary files /dev/null and b/News_category_project/output_svm.xlsx differ
diff --git a/News_category_project/output_svm_quote.xlsx b/News_category_project/output_svm_quote.xlsx
new file mode 100644
index 0000000..03c6918
Binary files /dev/null and b/News_category_project/output_svm_quote.xlsx differ
diff --git a/News_category_project/output_svm_skim.xlsx b/News_category_project/output_svm_skim.xlsx
new file mode 100644
index 0000000..39ab27f
Binary files /dev/null and b/News_category_project/output_svm_skim.xlsx differ
diff --git a/README.md b/README.md
index 7d84014..1689496 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,3 @@
-# Natural-Language-Processing
\ No newline at end of file
+# Natural-Language-Processing
+
+A project on Classifying the category of news articles is added
\ No newline at end of file

	STORY	SECTION
0	But the most painful was the huge reversal in ...	3
1	How formidable is the opposition alliance amon...	0
2	Most Asian currencies were trading lower today...	3
3	If you want to answer any question, click on ‘...	1
4	In global markets, gold prices edged up today ...	3
	STORY
0	2019 will see gadgets like gaming smartphones ...
1	It has also unleashed a wave of changes in the...
2	It can be confusing to pick the right smartpho...
3	The mobile application is integrated with a da...
4	We have rounded up some of the gadgets that sh...
	SECTION
count	7628.000000
mean	1.357892
std	0.999341
min	0.000000
25%	1.000000
50%	1.000000
75%	2.000000
max	3.000000
	STORY
	count	unique	top	freq
SECTION
0	1686	1673	This story has been published from a wire agen...	4
1	2772	2731	This story has been published from a wire agen...	13
2	1924	1914	The consensus reads, “Exciting, entertaining, ...	3
3	1246	1233	This story has been published from a wire agen...	11
	SECTION
0	1
1	2
2	1
3	1
4	1
5	1
6	1
7	2
8	1
9	2
10	0
11	3
12	2
13	1
14	2
15	1
16	1
17	2
18	3
19	2
20	2
21	1
22	2
23	0
24	0
25	2
26	2
27	3
28	3
29	0
...	...
2718	0
2719	2
2720	3
2721	1
2722	0
2723	1
2724	2
2725	0
2726	1
2727	2
2728	1
2729	1
2730	3
2731	1
2732	3
2733	1
2734	0
2735	3
2736	0
2737	1
2738	1
2739	1
2740	3
2741	0
2742	0
2743	1
2744	1
2745	1
2746	0
2747	1