first step py and hs code py run and tested with datasets commit
This commit is contained in:
parent
4c100d4029
commit
7b19a84976
152
.gitignore
vendored
152
.gitignore
vendored
@ -1 +1,151 @@
|
|||||||
*.log
|
# Created by .ignore support plugin (hsz.mobi)
|
||||||
|
### Python template
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
env/
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*,cover
|
||||||
|
.hypothesis/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
target/
|
||||||
|
|
||||||
|
# IPython Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
.python-version
|
||||||
|
|
||||||
|
# celery beat schedule file
|
||||||
|
celerybeat-schedule
|
||||||
|
|
||||||
|
# dotenv
|
||||||
|
.env
|
||||||
|
|
||||||
|
# virtualenv
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
### VirtualEnv template
|
||||||
|
# Virtualenv
|
||||||
|
# http://iamzed.com/2009/05/07/a-primer-on-virtualenv/
|
||||||
|
.Python
|
||||||
|
[Bb]in
|
||||||
|
[Ii]nclude
|
||||||
|
[Ll]ib
|
||||||
|
[Ll]ib64
|
||||||
|
[Ll]ocal
|
||||||
|
[Ss]cripts
|
||||||
|
pyvenv.cfg
|
||||||
|
.venv
|
||||||
|
pip-selfcheck.json
|
||||||
|
### JetBrains template
|
||||||
|
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm
|
||||||
|
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
|
||||||
|
|
||||||
|
# User-specific stuff:
|
||||||
|
.idea/workspace.xml
|
||||||
|
.idea/tasks.xml
|
||||||
|
.idea/dictionaries
|
||||||
|
.idea/vcs.xml
|
||||||
|
.idea/jsLibraryMappings.xml
|
||||||
|
|
||||||
|
# Sensitive or high-churn files:
|
||||||
|
.idea/dataSources.ids
|
||||||
|
.idea/dataSources.xml
|
||||||
|
.idea/dataSources.local.xml
|
||||||
|
.idea/sqlDataSources.xml
|
||||||
|
.idea/dynamic.xml
|
||||||
|
.idea/uiDesigner.xml
|
||||||
|
|
||||||
|
# Gradle:
|
||||||
|
.idea/gradle.xml
|
||||||
|
.idea/libraries
|
||||||
|
|
||||||
|
# Mongo Explorer plugin:
|
||||||
|
.idea/mongoSettings.xml
|
||||||
|
|
||||||
|
.idea/
|
||||||
|
|
||||||
|
## File-based project format:
|
||||||
|
*.iws
|
||||||
|
|
||||||
|
## Plugin-specific files:
|
||||||
|
|
||||||
|
# IntelliJ
|
||||||
|
/out/
|
||||||
|
|
||||||
|
# mpeltonen/sbt-idea plugin
|
||||||
|
.idea_modules/
|
||||||
|
|
||||||
|
# JIRA plugin
|
||||||
|
atlassian-ide-plugin.xml
|
||||||
|
|
||||||
|
# Crashlytics plugin (for Android Studio and IntelliJ)
|
||||||
|
com_crashlytics_export_strings.xml
|
||||||
|
crashlytics.properties
|
||||||
|
crashlytics-build.properties
|
||||||
|
fabric.properties
|
||||||
|
3
.idea/.gitignore
vendored
Normal file
3
.idea/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
# Default ignored files
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
10
.idea/hatespeech-twitterdataset.iml
Normal file
10
.idea/hatespeech-twitterdataset.iml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/venv" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
</module>
|
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
6
.idea/inspectionProfiles/profiles_settings.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
4
.idea/misc.xml
Normal file
4
.idea/misc.xml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (hatespeech-twitterdataset)" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
8
.idea/modules.xml
Normal file
8
.idea/modules.xml
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/hatespeech-twitterdataset.iml" filepath="$PROJECT_DIR$/.idea/hatespeech-twitterdataset.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
6
.idea/vcs.xml
Normal file
6
.idea/vcs.xml
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
Can't render this file because it is too large.
|
26954
datasets/twitter_test.csv
Normal file
26954
datasets/twitter_test.csv
Normal file
File diff suppressed because it is too large
Load Diff
72
firststep.py
Normal file
72
firststep.py
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
from nltk.util import pr
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.feature_extraction.text import CountVectorizer
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.tree import DecisionTreeClassifier
|
||||||
|
import re
|
||||||
|
import nltk
|
||||||
|
stemmer = nltk.SnowballStemmer("english")
|
||||||
|
from nltk.corpus import stopwords
|
||||||
|
import string
|
||||||
|
stopword=set(stopwords.words('english'))
|
||||||
|
data = pd.read_csv("datasets//twitter.csv")
|
||||||
|
|
||||||
|
#Step 2 I will add a new column to this dataset as labels which will contain the values as: Hate Speech O. ffensive Language No Hate and Offensive
|
||||||
|
|
||||||
|
data["labels"] = data["class"].map({0: "Hate Speech",
|
||||||
|
1: "Offensive Language",
|
||||||
|
2: "No Hate and Offensive"})
|
||||||
|
data = data[["tweet", "labels"]]
|
||||||
|
|
||||||
|
|
||||||
|
#Step 3 Now I will only select the tweet and labels columns for the rest of the task of training a hate speech detection model:
|
||||||
|
|
||||||
|
def clean(text):
|
||||||
|
text = str(text).lower()
|
||||||
|
text = re.sub('\[.*?\]', '', text)
|
||||||
|
text = re.sub('https?://\S+|www\.\S+', '', text)
|
||||||
|
text = re.sub('<.*?>+', '', text)
|
||||||
|
text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
|
||||||
|
text = re.sub('\n', '', text)
|
||||||
|
text = re.sub('\w*\d\w*', '', text)
|
||||||
|
text = [word for word in text.split(' ') if word not in stopword]
|
||||||
|
text=" ".join(text)
|
||||||
|
text = [stemmer.stem(word) for word in text.split(' ')]
|
||||||
|
text=" ".join(text)
|
||||||
|
return text
|
||||||
|
data["tweet"] = data["tweet"].apply(clean)
|
||||||
|
|
||||||
|
#Step 5 Now I will create a function to clean the texts in the tweet column
|
||||||
|
|
||||||
|
|
||||||
|
x = np.array(data["tweet"])
|
||||||
|
y = np.array(data["labels"])
|
||||||
|
|
||||||
|
cv = CountVectorizer()
|
||||||
|
X = cv.fit_transform(x) # Fit the Data
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
|
||||||
|
|
||||||
|
clf = DecisionTreeClassifier()
|
||||||
|
clf.fit(X_train,y_train)
|
||||||
|
|
||||||
|
#Step 6 Now let’s split the dataset into training and test sets and train a machine learning model for the task of hate speech detection
|
||||||
|
|
||||||
|
x = np.array(data["tweet"])
|
||||||
|
y = np.array(data["labels"])
|
||||||
|
|
||||||
|
cv = CountVectorizer()
|
||||||
|
X = cv.fit_transform(x) # Fit the Data
|
||||||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
|
||||||
|
|
||||||
|
clf = DecisionTreeClassifier()
|
||||||
|
clf.fit(X_train,y_train)
|
||||||
|
#print(data.head())
|
||||||
|
|
||||||
|
#Step 7 Now let’s test this machine learning model to see if it detects hate speech or not
|
||||||
|
|
||||||
|
sample = "Let's unite and kill all the people who are protesting against the government"
|
||||||
|
data = cv.transform([sample]).toarray()
|
||||||
|
print(clf.predict(data))
|
||||||
|
|
||||||
|
|
@ -5,7 +5,7 @@ from sklearn.feature_extraction.text import CountVectorizer
|
|||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
from sklearn.tree import DecisionTreeClassifier
|
from sklearn.tree import DecisionTreeClassifier
|
||||||
|
|
||||||
data = pd.read_csv("twitter.csv")
|
data = pd.read_csv("datasets//twitter.csv")
|
||||||
#print(data.head())
|
#print(data.head())
|
||||||
|
|
||||||
data["labels"] = data["class"].map({0: "Hate Speech", 1: "Offensive Language", 2: "No Hate and Offensive"})
|
data["labels"] = data["class"].map({0: "Hate Speech", 1: "Offensive Language", 2: "No Hate and Offensive"})
|
||||||
|
BIN
notepad/HateSpeechDetecyion.docx
Normal file
BIN
notepad/HateSpeechDetecyion.docx
Normal file
Binary file not shown.
112
notepad/git cmds.txt
Normal file
112
notepad/git cmds.txt
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
git init
|
||||||
|
|
||||||
|
git add --a - add to stage
|
||||||
|
|
||||||
|
git status
|
||||||
|
|
||||||
|
git commit -m "Message" -- add .git to commit
|
||||||
|
|
||||||
|
git diff
|
||||||
|
|
||||||
|
git diff --staged
|
||||||
|
|
||||||
|
git commit -a -m "Message" -- direct commit without staging
|
||||||
|
|
||||||
|
git log
|
||||||
|
%**************************************%
|
||||||
|
git rm -rf - remove full .git from the dir
|
||||||
|
|
||||||
|
git rm <filename> - remove the file
|
||||||
|
|
||||||
|
git mv <filename> <renamefilename>
|
||||||
|
|
||||||
|
%**************************************%
|
||||||
|
Git Skipping
|
||||||
|
git rm --cached <filename> for untrack the file
|
||||||
|
%**************************************%
|
||||||
|
|
||||||
|
%**************************************%
|
||||||
|
Git Log: Viewing & Changing Commits
|
||||||
|
|
||||||
|
Create new Repo.,
|
||||||
|
git clone <gitpath> <foldername>
|
||||||
|
|
||||||
|
git log -- to see all history
|
||||||
|
|
||||||
|
git log --stat
|
||||||
|
|
||||||
|
git log --pretty=oneline
|
||||||
|
git log --pretty=short
|
||||||
|
git log --pretty=full
|
||||||
|
|
||||||
|
git log --since=2.days
|
||||||
|
git log --since=2.weeks
|
||||||
|
git log --since=2.years
|
||||||
|
|
||||||
|
git log --pretty=format:
|
||||||
|
git log --pretty=format: "%h --%ae"
|
||||||
|
More :
|
||||||
|
https://git-scm.com/docs/git-log
|
||||||
|
|
||||||
|
|
||||||
|
if you want to change commit message and update the changes again,
|
||||||
|
git log -p -l
|
||||||
|
git commit --amend
|
||||||
|
press i and write and exit,
|
||||||
|
staging and commit again
|
||||||
|
|
||||||
|
%**************************************%
|
||||||
|
|
||||||
|
|
||||||
|
Stage - git add . or git add -a ->(all)
|
||||||
|
Unstage - git restore --staged <filename>
|
||||||
|
|
||||||
|
|
||||||
|
change git checkout -- .gitignore
|
||||||
|
|
||||||
|
git checkout --f
|
||||||
|
|
||||||
|
without changes get back
|
||||||
|
do not use much -- it remove staging and remove recent changes
|
||||||
|
|
||||||
|
%**************************************%
|
||||||
|
|
||||||
|
git remote
|
||||||
|
|
||||||
|
git remote add orgin <link>
|
||||||
|
|
||||||
|
git checkout -b <branch>
|
||||||
|
|
||||||
|
45) To create new branch
|
||||||
|
$ git checkout -b <new-branch-name>
|
||||||
|
|
||||||
|
46) To switch into different branch
|
||||||
|
$ git checkout <branch-name>
|
||||||
|
|
||||||
|
47) TO check all the branches in git
|
||||||
|
$ git branch
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user