GithubHelp home page GithubHelp logo

impact_intelligence_interview's Introduction

Impact Intelligence Assessment

Load R packages to use

library(tidyverse)
library(data.table)
library(readxl)
library(xlsx)
library(knitr)

Read data into R

topic_an  <- read_excel("data/Impact Intelligence - Project Contractor Sample.xlsx", sheet = 4)
topic_metrics  <- read_excel("data/Impact Intelligence - Project Contractor Sample.xlsx", sheet = 3)

Save the names of the two sheets

# convert to DT
setDT(topic_an)

nms_topic <- names(topic_an)
nms_topic_metrics <- names(topic_metrics)

Function to clean names

  • ie it removes white space and other characters to make it easier to use a programming language like R or python etc.
nms_clean <- function(data_set){
  nms_old <- names(data_set)
  nms_new <- nms_old %>% tolower() 
  nms_new <- gsub("\\s", "_", nms_new)
  nms_new <- gsub("\\.$", "", nms_new)
  nms_new <- gsub("\\(|\\)|%", "", nms_new)
  nms_new <- gsub("/", "", nms_new)
  
  setDT(data_set)
  setnames(data_set, nms_old, nms_new)
  data_set
}

Clean names

topic_an <- nms_clean(topic_an)
topic_metrics <- nms_clean(topic_metrics)
nms_topic_metrics_clean <- names(topic_metrics)

Some other minor cleaning

## filter out topics where they are irrelevant
topic_an <- topic_an[!grepl("irrelevant", topic_name)]

## extract topic id so that it's easier to merge
topic_an[, topic_id := str_extract(topic_label, "^\\d{1,3}[^\\.]")]

## delete topic label so that you only have one topic label in the data

topic_an[, topic_label := NULL]

#merge the two data sets
topic_an_ans <- merge(topic_an, topic_metrics, by = "topic_id")

## calculate Share of Voice on all Topics
topic_an_ans[, share_voice := round(segment_count/sum(topic_metrics$segment_count), 4)]
## order with share voice and select top 5 descending
setorder(topic_an_ans, -share_voice)
## select top 5
topic_an_ans <- topic_an_ans[1:5,]
## Rename to the original names

head(topic_an_ans) %>%
  kable()
topic_id topic_name topic_label segment_count average_sentiment average_engagement overrepresented_keywords share_voice
48 TopicD 48. weight, calorie, workout, meal plan, healthy, lifestyle, mindset, weight loss, lose weight, coach 57 0.7657323 5.068750 [‘meal’, ‘eat’, ‘diet’, ‘body’, ‘weight’, ‘fat’, ‘calorie’, ‘lose’, ‘step’, ‘day’, ‘nutrition’, ‘time’, ‘gain’, ‘workout’, ‘gym’, ‘healthy’, ‘health’, ‘meal plan’, ‘goal’, ‘fitness’] 0.0569
114 TopicK 114. change, environmental impact, impact, climate change, epi, shropshire, environmental, predict, cop26, climatecrisis 50 0.4889069 7.218750 [‘consumer’, ‘change’, ‘choice’, ‘ask’, ‘environmental impact’, ‘action’, ‘brand’, ‘behaviour’, ‘future’, ‘cop26’, ‘impact’, ‘influence’, ‘habit’, ‘shropshire’, ‘epi’, ‘act’, ‘educate’, ‘population’, ‘climate change’, ‘sustainability’] 0.0500
107 TopicL 107. reusable bag, reusable, plastic bag, tote bag, sustainablelive, bsci, tx287, go0367, totebag, carryout 40 0.6687746 17.743697 [‘bag’, ‘reusable bag’, ‘reusable’, ‘shopping’, ‘durable’, ‘tote’, ‘plastic bag’, ‘use’, ‘carry’, ‘grocery store’, ‘cotton’, ‘tote bag’, ‘sustainablelive’, ‘universal’, ‘trolley’, ‘canvas’, ‘shop’, ‘grocery’, ‘trip’, ‘single’] 0.0400
93 TopicB 93. recycling, recycle, soft plastic, recycling bin, recyclable, flexible plastic, cardboard, redcycle, plastic, plasticcycle 40 0.4521038 7.054688 [‘recycle’, ‘recycling’, ‘battery’, ‘bin’, ‘plastic’, ‘soft plastic’, ‘film’, ‘recyclable’, ‘accept’, ‘recycling bin’, ‘flexible plastic’, ‘dispose’, ‘collection’, ‘cardboard’, ‘authority’, ‘bag’, ‘paper’, ‘redcycle’, ‘facility’, ‘drop’] 0.0400
15 TopicE 15. palm oil, palm, rainforest, deforestation, orangutans, mspo, certified sustainable palm oil, sustainablepalmoil, greenhouse gas, rspo 32 0.0972222 15.833333 [‘palm oil’, ‘oil’, ‘palm’, ‘product’, ‘rainforest’, ‘destroy’, ‘vegetable’, ‘deforestation’, ‘use’, ‘habitat’, ‘produce’, ‘crop’, ‘sustainable’, ‘mspo’, ‘orangutans’, ‘tree’, ‘kembali’, ‘certified sustainable palm oil’, ‘nutella’, ‘adelaide’] 0.0320

Rename back columns to original names

nms_final_dt <- names(topic_an_ans)

setnames(topic_an_ans,
         nms_topic_metrics_clean, 
         nms_topic_metrics, skip_absent = T)

nms_to_rnm <- c("topic_name", "share_voice",
                "Overrepresented Keywords")

nms_to_rnm_to <- c("Topic Name", 
                   "Share of Voice on all Topics",
                   "Keywords")

setnames(topic_an_ans,
         nms_to_rnm, 
         nms_to_rnm_to, skip_absent = F)

Select required columns

nms_required <- c("Topic Name", "Share of Voice on all Topics",
                  "Average Sentiment", "Average Engagement", "Keywords")

output_df <- topic_an_ans[, ..nms_required]

Delete output sheet

  • it will enable in writing to it
wb_path <- "data/Impact Intelligence - Project Contractor Sample.xlsx"
wb <- loadWorkbook(wb_path)
removeSheet(wb, sheetName = "Output")
saveWorkbook(wb, wb_path)

Append the sheet to your excel workbook

## Then append the sheet to your excel workbook
write.xlsx(as.data.frame(output_df), 
           file = "data/Impact Intelligence - Project Contractor Sample.xlsx", 
           sheetName="Output",
           append=TRUE,
           row.names = F)

impact_intelligence_interview's People

Contributors

m-mburu avatar

Watchers

 avatar

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.